1 /* stb_image - v2.23 - public domain image loader - http://nothings.org/stb
2                                   no warranty implied; use at your own risk
3 
4    Do this:
5       #define STB_IMAGE_IMPLEMENTATION
6    before you include this file in *one* C or C++ file to create the implementation.
7 
8    // i.e. it should look like this:
9    #include ...
10    #include ...
11    #include ...
12    #define STB_IMAGE_IMPLEMENTATION
13    #include "stb_image.h"
14 
15    You can #define STBI_ASSERT(x) before the #include to avoid using assert.h.
16    And #define STBI_MALLOC, STBI_REALLOC, and STBI_FREE to avoid using malloc,realloc,free
17 
18 
19    QUICK NOTES:
20       Primarily of interest to game developers and other people who can
21           avoid problematic images and only need the trivial interface
22 
23       JPEG baseline & progressive (12 bpc/arithmetic not supported, same as stock IJG lib)
24       PNG 1/2/4/8/16-bit-per-channel
25 
26       TGA (not sure what subset, if a subset)
27       BMP non-1bpp, non-RLE
28       PSD (composited view only, no extra channels, 8/16 bit-per-channel)
29 
30       GIF (*comp always reports as 4-channel)
31       HDR (radiance rgbE format)
32       PIC (Softimage PIC)
33       PNM (PPM and PGM binary only)
34 
35       Animated GIF still needs a proper API, but here's one way to do it:
36           http://gist.github.com/urraka/685d9a6340b26b830d49
37 
38       - decode from memory or through FILE (define STBI_NO_STDIO to remove code)
39       - decode from arbitrary I/O callbacks
40       - SIMD acceleration on x86/x64 (SSE2) and ARM (NEON)
41 
42    Full documentation under "DOCUMENTATION" below.
43 
44 
45 LICENSE
46 
47   See end of file for license information.
48 
49 RECENT REVISION HISTORY:
50 
51       2.23  (2019-08-11) fix clang static analysis warning
52       2.22  (2019-03-04) gif fixes, fix warnings
53       2.21  (2019-02-25) fix typo in comment
54       2.20  (2019-02-07) support utf8 filenames in Windows; fix warnings and platform ifdefs
55       2.19  (2018-02-11) fix warning
56       2.18  (2018-01-30) fix warnings
57       2.17  (2018-01-29) bugfix, 1-bit BMP, 16-bitness query, fix warnings
58       2.16  (2017-07-23) all functions have 16-bit variants; optimizations; bugfixes
59       2.15  (2017-03-18) fix png-1,2,4; all Imagenet JPGs; no runtime SSE detection on GCC
60       2.14  (2017-03-03) remove deprecated STBI_JPEG_OLD; fixes for Imagenet JPGs
61       2.13  (2016-12-04) experimental 16-bit API, only for PNG so far; fixes
62       2.12  (2016-04-02) fix typo in 2.11 PSD fix that caused crashes
63       2.11  (2016-04-02) 16-bit PNGS; enable SSE2 in non-gcc x64
64                          RGB-format JPEG; remove white matting in PSD;
65                          allocate large structures on the stack;
66                          correct channel count for PNG & BMP
67       2.10  (2016-01-22) avoid warning introduced in 2.09
68       2.09  (2016-01-16) 16-bit TGA; comments in PNM files; STBI_REALLOC_SIZED
69 
70    See end of file for full revision history.
71 
72 
73  ============================    Contributors    =========================
74 
75  Image formats                          Extensions, features
76     Sean Barrett (jpeg, png, bmp)          Jetro Lauha (stbi_info)
77     Nicolas Schulz (hdr, psd)              Martin "SpartanJ" Golini (stbi_info)
78     Jonathan Dummer (tga)                  James "moose2000" Brown (iPhone PNG)
79     Jean-Marc Lienher (gif)                Ben "Disch" Wenger (io callbacks)
80     Tom Seddon (pic)                       Omar Cornut (1/2/4-bit PNG)
81     Thatcher Ulrich (psd)                  Nicolas Guillemot (vertical flip)
82     Ken Miller (pgm, ppm)                  Richard Mitton (16-bit PSD)
83     github:urraka (animated gif)           Junggon Kim (PNM comments)
84     Christopher Forseth (animated gif)     Daniel Gibson (16-bit TGA)
85                                            socks-the-fox (16-bit PNG)
86                                            Jeremy Sawicki (handle all ImageNet JPGs)
87  Optimizations & bugfixes                  Mikhail Morozov (1-bit BMP)
88     Fabian "ryg" Giesen                    Anael Seghezzi (is-16-bit query)
89     Arseny Kapoulkine
90     John-Mark Allen
91     Carmelo J Fdez-Aguera
92 
93  Bug & warning fixes
94     Marc LeBlanc            David Woo          Guillaume George   Martins Mozeiko
95     Christpher Lloyd        Jerry Jansson      Joseph Thomson     Phil Jordan
96     Dave Moore              Roy Eltham         Hayaki Saito       Nathan Reed
97     Won Chun                Luke Graham        Johan Duparc       Nick Verigakis
98     the Horde3D community   Thomas Ruf         Ronny Chevalier    github:rlyeh
99     Janez Zemva             John Bartholomew   Michal Cichon      github:romigrou
100     Jonathan Blow           Ken Hamada         Tero Hanninen      github:svdijk
101     Laurent Gomila          Cort Stratton      Sergio Gonzalez    github:snagar
102     Aruelien Pocheville     Thibault Reuille   Cass Everitt       github:Zelex
103     Ryamond Barbiero        Paul Du Bois       Engin Manap        github:grim210
104     Aldo Culquicondor       Philipp Wiesemann  Dale Weiler        github:sammyhw
105     Oriol Ferrer Mesia      Josh Tobin         Matthew Gregan     github:phprus
106     Julian Raschke          Gregory Mullen     Baldur Karlsson    github:poppolopoppo
107     Christian Floisand      Kevin Schmidt      JR Smith           github:darealshinji
108     Blazej Dariusz Roszkowski                                     github:Michaelangel007
109 */
110 
111 #ifndef STBI_INCLUDE_STB_IMAGE_H
112 #define STBI_INCLUDE_STB_IMAGE_H
113 
114 // DOCUMENTATION
115 //
116 // Limitations:
117 //    - no 12-bit-per-channel JPEG
118 //    - no JPEGs with arithmetic coding
119 //    - GIF always returns *comp=4
120 //
121 // Basic usage (see HDR discussion below for HDR usage):
122 //    int x,y,n;
123 //    unsigned char *data = stbi_load(filename, &x, &y, &n, 0);
124 //    // ... process data if not NULL ...
125 //    // ... x = width, y = height, n = # 8-bit components per pixel ...
126 //    // ... replace '0' with '1'..'4' to force that many components per pixel
127 //    // ... but 'n' will always be the number that it would have been if you said 0
128 //    stbi_image_free(data)
129 //
130 // Standard parameters:
131 //    int *x                 -- outputs image width in pixels
132 //    int *y                 -- outputs image height in pixels
133 //    int *channels_in_file  -- outputs # of image components in image file
134 //    int desired_channels   -- if non-zero, # of image components requested in result
135 //
136 // The return value from an image loader is an 'unsigned char *' which points
137 // to the pixel data, or NULL on an allocation failure or if the image is
138 // corrupt or invalid. The pixel data consists of *y scanlines of *x pixels,
139 // with each pixel consisting of N interleaved 8-bit components; the first
140 // pixel pointed to is top-left-most in the image. There is no padding between
141 // image scanlines or between pixels, regardless of format. The number of
142 // components N is 'desired_channels' if desired_channels is non-zero, or
143 // *channels_in_file otherwise. If desired_channels is non-zero,
144 // *channels_in_file has the number of components that _would_ have been
145 // output otherwise. E.g. if you set desired_channels to 4, you will always
146 // get RGBA output, but you can check *channels_in_file to see if it's trivially
147 // opaque because e.g. there were only 3 channels in the source image.
148 //
149 // An output image with N components has the following components interleaved
150 // in this order in each pixel:
151 //
152 //     N=#comp     components
153 //       1           grey
154 //       2           grey, alpha
155 //       3           red, green, blue
156 //       4           red, green, blue, alpha
157 //
158 // If image loading fails for any reason, the return value will be NULL,
159 // and *x, *y, *channels_in_file will be unchanged. The function
160 // stbi_failure_reason() can be queried for an extremely brief, end-user
161 // unfriendly explanation of why the load failed. Define STBI_NO_FAILURE_STRINGS
162 // to avoid compiling these strings at all, and STBI_FAILURE_USERMSG to get slightly
163 // more user-friendly ones.
164 //
165 // Paletted PNG, BMP, GIF, and PIC images are automatically depalettized.
166 //
167 // ===========================================================================
168 //
169 // UNICODE:
170 //
171 //   If compiling for Windows and you wish to use Unicode filenames, compile
172 //   with
173 //       #define STBI_WINDOWS_UTF8
174 //   and pass utf8-encoded filenames. Call stbi_convert_wchar_to_utf8 to convert
175 //   Windows wchar_t filenames to utf8.
176 //
177 // ===========================================================================
178 //
179 // Philosophy
180 //
181 // stb libraries are designed with the following priorities:
182 //
183 //    1. easy to use
184 //    2. easy to maintain
185 //    3. good performance
186 //
187 // Sometimes I let "good performance" creep up in priority over "easy to maintain",
188 // and for best performance I may provide less-easy-to-use APIs that give higher
189 // performance, in addition to the easy-to-use ones. Nevertheless, it's important
190 // to keep in mind that from the standpoint of you, a client of this library,
191 // all you care about is #1 and #3, and stb libraries DO NOT emphasize #3 above all.
192 //
193 // Some secondary priorities arise directly from the first two, some of which
194 // provide more explicit reasons why performance can't be emphasized.
195 //
196 //    - Portable ("ease of use")
197 //    - Small source code footprint ("easy to maintain")
198 //    - No dependencies ("ease of use")
199 //
200 // ===========================================================================
201 //
202 // I/O callbacks
203 //
204 // I/O callbacks allow you to read from arbitrary sources, like packaged
205 // files or some other source. Data read from callbacks are processed
206 // through a small internal buffer (currently 128 bytes) to try to reduce
207 // overhead.
208 //
209 // The three functions you must define are "read" (reads some bytes of data),
210 // "skip" (skips some bytes of data), "eof" (reports if the stream is at the end).
211 //
212 // ===========================================================================
213 //
214 // SIMD support
215 //
216 // The JPEG decoder will try to automatically use SIMD kernels on x86 when
217 // supported by the compiler. For ARM Neon support, you must explicitly
218 // request it.
219 //
220 // (The old do-it-yourself SIMD API is no longer supported in the current
221 // code.)
222 //
223 // On x86, SSE2 will automatically be used when available based on a run-time
224 // test; if not, the generic C versions are used as a fall-back. On ARM targets,
225 // the typical path is to have separate builds for NEON and non-NEON devices
226 // (at least this is true for iOS and Android). Therefore, the NEON support is
227 // toggled by a build flag: define STBI_NEON to get NEON loops.
228 //
229 // If for some reason you do not want to use any of SIMD code, or if
230 // you have issues compiling it, you can disable it entirely by
231 // defining STBI_NO_SIMD.
232 //
233 // ===========================================================================
234 //
235 // HDR image support   (disable by defining STBI_NO_HDR)
236 //
237 // stb_image supports loading HDR images in general, and currently the Radiance
238 // .HDR file format specifically. You can still load any file through the existing
239 // interface; if you attempt to load an HDR file, it will be automatically remapped
240 // to LDR, assuming gamma 2.2 and an arbitrary scale factor defaulting to 1;
241 // both of these constants can be reconfigured through this interface:
242 //
243 //     stbi_hdr_to_ldr_gamma(2.2f);
244 //     stbi_hdr_to_ldr_scale(1.0f);
245 //
246 // (note, do not use _inverse_ constants; stbi_image will invert them
247 // appropriately).
248 //
249 // Additionally, there is a new, parallel interface for loading files as
250 // (linear) floats to preserve the full dynamic range:
251 //
252 //    float *data = stbi_loadf(filename, &x, &y, &n, 0);
253 //
254 // If you load LDR images through this interface, those images will
255 // be promoted to floating point values, run through the inverse of
256 // constants corresponding to the above:
257 //
258 //     stbi_ldr_to_hdr_scale(1.0f);
259 //     stbi_ldr_to_hdr_gamma(2.2f);
260 //
261 // Finally, given a filename (or an open file or memory block--see header
262 // file for details) containing image data, you can query for the "most
263 // appropriate" interface to use (that is, whether the image is HDR or
264 // not), using:
265 //
266 //     stbi_is_hdr(char *filename);
267 //
268 // ===========================================================================
269 //
270 // iPhone PNG support:
271 //
272 // By default we convert iphone-formatted PNGs back to RGB, even though
273 // they are internally encoded differently. You can disable this conversion
274 // by calling stbi_convert_iphone_png_to_rgb(0), in which case
275 // you will always just get the native iphone "format" through (which
276 // is BGR stored in RGB).
277 //
278 // Call stbi_set_unpremultiply_on_load(1) as well to force a divide per
279 // pixel to remove any premultiplied alpha *only* if the image file explicitly
280 // says there's premultiplied data (currently only happens in iPhone images,
281 // and only if iPhone convert-to-rgb processing is on).
282 //
283 // ===========================================================================
284 //
285 // ADDITIONAL CONFIGURATION
286 //
287 //  - You can suppress implementation of any of the decoders to reduce
288 //    your code footprint by #defining one or more of the following
289 //    symbols before creating the implementation.
290 //
291 //        STBI_NO_JPEG
292 //        STBI_NO_PNG
293 //        STBI_NO_BMP
294 //        STBI_NO_PSD
295 //        STBI_NO_TGA
296 //        STBI_NO_GIF
297 //        STBI_NO_HDR
298 //        STBI_NO_PIC
299 //        STBI_NO_PNM   (.ppm and .pgm)
300 //
301 //  - You can request *only* certain decoders and suppress all other ones
302 //    (this will be more forward-compatible, as addition of new decoders
303 //    doesn't require you to disable them explicitly):
304 //
305 //        STBI_ONLY_JPEG
306 //        STBI_ONLY_PNG
307 //        STBI_ONLY_BMP
308 //        STBI_ONLY_PSD
309 //        STBI_ONLY_TGA
310 //        STBI_ONLY_GIF
311 //        STBI_ONLY_HDR
312 //        STBI_ONLY_PIC
313 //        STBI_ONLY_PNM   (.ppm and .pgm)
314 //
315 //   - If you use STBI_NO_PNG (or _ONLY_ without PNG), and you still
316 //     want the zlib decoder to be available, #define STBI_SUPPORT_ZLIB
317 //
318 
319 
320 #ifndef STBI_NO_STDIO
321 #include <stdio.h>
322 #endif // STBI_NO_STDIO
323 
324 #define STBI_VERSION 1
325 
326 enum
327 {
328    STBI_default = 0, // only used for desired_channels
329 
330    STBI_grey       = 1,
331    STBI_grey_alpha = 2,
332    STBI_rgb        = 3,
333    STBI_rgb_alpha  = 4
334 };
335 
336 #include <stdlib.h>
337 typedef unsigned char stbi_uc;
338 typedef unsigned short stbi_us;
339 
340 #ifdef __cplusplus
341 extern "C" {
342 #endif
343 
344 #ifndef STBIDEF
345 #ifdef STB_IMAGE_STATIC
346 #define STBIDEF static
347 #else
348 #define STBIDEF extern
349 #endif
350 #endif
351 
352 //////////////////////////////////////////////////////////////////////////////
353 //
354 // PRIMARY API - works on images of any type
355 //
356 
357 //
358 // load image by filename, open file, or memory buffer
359 //
360 
361 typedef struct
362 {
363    int      (*read)  (void *user,char *data,int size);   // fill 'data' with 'size' bytes.  return number of bytes actually read
364    void     (*skip)  (void *user,int n);                 // skip the next 'n' bytes, or 'unget' the last -n bytes if negative
365    int      (*eof)   (void *user);                       // returns nonzero if we are at end of file/data
366 } stbi_io_callbacks;
367 
368 ////////////////////////////////////
369 //
370 // 8-bits-per-channel interface
371 //
372 
373 STBIDEF stbi_uc *stbi_load_from_memory   (stbi_uc           const *buffer, int len   , int *x, int *y, int *channels_in_file, int desired_channels);
374 STBIDEF stbi_uc *stbi_load_from_callbacks(stbi_io_callbacks const *clbk  , void *user, int *x, int *y, int *channels_in_file, int desired_channels);
375 
376 #ifndef STBI_NO_STDIO
377 STBIDEF stbi_uc *stbi_load            (char const *filename, int *x, int *y, int *channels_in_file, int desired_channels);
378 STBIDEF stbi_uc *stbi_load_from_file  (FILE *f, int *x, int *y, int *channels_in_file, int desired_channels);
379 // for stbi_load_from_file, file pointer is left pointing immediately after image
380 #endif
381 
382 #ifndef STBI_NO_GIF
383 STBIDEF stbi_uc *stbi_load_gif_from_memory(stbi_uc const *buffer, int len, int **delays, int *x, int *y, int *z, int *comp, int req_comp);
384 #endif
385 
386 #ifdef STBI_WINDOWS_UTF8
387 STBIDEF int stbi_convert_wchar_to_utf8(char *buffer, size_t bufferlen, const wchar_t* input);
388 #endif
389 
390 ////////////////////////////////////
391 //
392 // 16-bits-per-channel interface
393 //
394 
395 STBIDEF stbi_us *stbi_load_16_from_memory   (stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels);
396 STBIDEF stbi_us *stbi_load_16_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *channels_in_file, int desired_channels);
397 
398 #ifndef STBI_NO_STDIO
399 STBIDEF stbi_us *stbi_load_16          (char const *filename, int *x, int *y, int *channels_in_file, int desired_channels);
400 STBIDEF stbi_us *stbi_load_from_file_16(FILE *f, int *x, int *y, int *channels_in_file, int desired_channels);
401 #endif
402 
403 ////////////////////////////////////
404 //
405 // float-per-channel interface
406 //
407 #ifndef STBI_NO_LINEAR
408    STBIDEF float *stbi_loadf_from_memory     (stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels);
409    STBIDEF float *stbi_loadf_from_callbacks  (stbi_io_callbacks const *clbk, void *user, int *x, int *y,  int *channels_in_file, int desired_channels);
410 
411    #ifndef STBI_NO_STDIO
412    STBIDEF float *stbi_loadf            (char const *filename, int *x, int *y, int *channels_in_file, int desired_channels);
413    STBIDEF float *stbi_loadf_from_file  (FILE *f, int *x, int *y, int *channels_in_file, int desired_channels);
414    #endif
415 #endif
416 
417 #ifndef STBI_NO_HDR
418    STBIDEF void   stbi_hdr_to_ldr_gamma(float gamma);
419    STBIDEF void   stbi_hdr_to_ldr_scale(float scale);
420 #endif // STBI_NO_HDR
421 
422 #ifndef STBI_NO_LINEAR
423    STBIDEF void   stbi_ldr_to_hdr_gamma(float gamma);
424    STBIDEF void   stbi_ldr_to_hdr_scale(float scale);
425 #endif // STBI_NO_LINEAR
426 
427 // stbi_is_hdr is always defined, but always returns false if STBI_NO_HDR
428 STBIDEF int    stbi_is_hdr_from_callbacks(stbi_io_callbacks const *clbk, void *user);
429 STBIDEF int    stbi_is_hdr_from_memory(stbi_uc const *buffer, int len);
430 #ifndef STBI_NO_STDIO
431 STBIDEF int      stbi_is_hdr          (char const *filename);
432 STBIDEF int      stbi_is_hdr_from_file(FILE *f);
433 #endif // STBI_NO_STDIO
434 
435 
436 // get a VERY brief reason for failure
437 // NOT THREADSAFE
438 STBIDEF const char *stbi_failure_reason  (void);
439 
440 // free the loaded image -- this is just free()
441 STBIDEF void     stbi_image_free      (void *retval_from_stbi_load);
442 
443 // get image dimensions & components without fully decoding
444 STBIDEF int      stbi_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp);
445 STBIDEF int      stbi_info_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp);
446 STBIDEF int      stbi_is_16_bit_from_memory(stbi_uc const *buffer, int len);
447 STBIDEF int      stbi_is_16_bit_from_callbacks(stbi_io_callbacks const *clbk, void *user);
448 
449 #ifndef STBI_NO_STDIO
450 STBIDEF int      stbi_info               (char const *filename,     int *x, int *y, int *comp);
451 STBIDEF int      stbi_info_from_file     (FILE *f,                  int *x, int *y, int *comp);
452 STBIDEF int      stbi_is_16_bit          (char const *filename);
453 STBIDEF int      stbi_is_16_bit_from_file(FILE *f);
454 #endif
455 
456 
457 
458 // for image formats that explicitly notate that they have premultiplied alpha,
459 // we just return the colors as stored in the file. set this flag to force
460 // unpremultiplication. results are undefined if the unpremultiply overflow.
461 STBIDEF void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply);
462 
463 // indicate whether we should process iphone images back to canonical format,
464 // or just pass them through "as-is"
465 STBIDEF void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert);
466 
467 // flip the image vertically, so the first pixel in the output array is the bottom left
468 STBIDEF void stbi_set_flip_vertically_on_load(int flag_true_if_should_flip);
469 
470 // ZLIB client - used by PNG, available for other purposes
471 
472 STBIDEF char *stbi_zlib_decode_malloc_guesssize(const char *buffer, int len, int initial_size, int *outlen);
473 STBIDEF char *stbi_zlib_decode_malloc_guesssize_headerflag(const char *buffer, int len, int initial_size, int *outlen, int parse_header);
474 STBIDEF char *stbi_zlib_decode_malloc(const char *buffer, int len, int *outlen);
475 STBIDEF int   stbi_zlib_decode_buffer(char *obuffer, int olen, const char *ibuffer, int ilen);
476 
477 STBIDEF char *stbi_zlib_decode_noheader_malloc(const char *buffer, int len, int *outlen);
478 STBIDEF int   stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char *ibuffer, int ilen);
479 
480 
481 #ifdef __cplusplus
482 }
483 #endif
484 
485 //
486 //
487 ////   end header file   /////////////////////////////////////////////////////
488 #endif // STBI_INCLUDE_STB_IMAGE_H
489 
490 #ifdef STB_IMAGE_IMPLEMENTATION
491 
492 #if defined(STBI_ONLY_JPEG) || defined(STBI_ONLY_PNG) || defined(STBI_ONLY_BMP) \
493   || defined(STBI_ONLY_TGA) || defined(STBI_ONLY_GIF) || defined(STBI_ONLY_PSD) \
494   || defined(STBI_ONLY_HDR) || defined(STBI_ONLY_PIC) || defined(STBI_ONLY_PNM) \
495   || defined(STBI_ONLY_ZLIB)
496    #ifndef STBI_ONLY_JPEG
497    #define STBI_NO_JPEG
498    #endif
499    #ifndef STBI_ONLY_PNG
500    #define STBI_NO_PNG
501    #endif
502    #ifndef STBI_ONLY_BMP
503    #define STBI_NO_BMP
504    #endif
505    #ifndef STBI_ONLY_PSD
506    #define STBI_NO_PSD
507    #endif
508    #ifndef STBI_ONLY_TGA
509    #define STBI_NO_TGA
510    #endif
511    #ifndef STBI_ONLY_GIF
512    #define STBI_NO_GIF
513    #endif
514    #ifndef STBI_ONLY_HDR
515    #define STBI_NO_HDR
516    #endif
517    #ifndef STBI_ONLY_PIC
518    #define STBI_NO_PIC
519    #endif
520    #ifndef STBI_ONLY_PNM
521    #define STBI_NO_PNM
522    #endif
523 #endif
524 
525 #if defined(STBI_NO_PNG) && !defined(STBI_SUPPORT_ZLIB) && !defined(STBI_NO_ZLIB)
526 #define STBI_NO_ZLIB
527 #endif
528 
529 
530 #include <stdarg.h>
531 #include <stddef.h> // ptrdiff_t on osx
532 #include <stdlib.h>
533 #include <string.h>
534 #include <limits.h>
535 
536 #if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR)
537 #include <math.h>  // ldexp, pow
538 #endif
539 
540 #ifndef STBI_NO_STDIO
541 #include <stdio.h>
542 #endif
543 
544 #ifndef STBI_ASSERT
545 #include <assert.h>
546 #define STBI_ASSERT(x) assert(x)
547 #endif
548 
549 #ifdef __cplusplus
550 #define STBI_EXTERN extern "C"
551 #else
552 #define STBI_EXTERN extern
553 #endif
554 
555 
556 #ifndef _MSC_VER
557    #ifdef __cplusplus
558    #define stbi_inline inline
559    #else
560    #define stbi_inline
561    #endif
562 #else
563    #define stbi_inline __forceinline
564 #endif
565 
566 
567 #ifdef _MSC_VER
568 typedef unsigned short stbi__uint16;
569 typedef   signed short stbi__int16;
570 typedef unsigned int   stbi__uint32;
571 typedef   signed int   stbi__int32;
572 #else
573 #include <stdint.h>
574 typedef uint16_t stbi__uint16;
575 typedef int16_t  stbi__int16;
576 typedef uint32_t stbi__uint32;
577 typedef int32_t  stbi__int32;
578 #endif
579 
580 // should produce compiler error if size is wrong
581 typedef unsigned char validate_uint32[sizeof(stbi__uint32)==4 ? 1 : -1];
582 
583 #ifdef _MSC_VER
584 #define STBI_NOTUSED(v)  (void)(v)
585 #else
586 #define STBI_NOTUSED(v)  (void)sizeof(v)
587 #endif
588 
589 #ifdef _MSC_VER
590 #define STBI_HAS_LROTL
591 #endif
592 
593 #ifdef STBI_HAS_LROTL
594    #define stbi_lrot(x,y)  _lrotl(x,y)
595 #else
596    #define stbi_lrot(x,y)  (((x) << (y)) | ((x) >> (32 - (y))))
597 #endif
598 
599 #if defined(STBI_MALLOC) && defined(STBI_FREE) && (defined(STBI_REALLOC) || defined(STBI_REALLOC_SIZED))
600 // ok
601 #elif !defined(STBI_MALLOC) && !defined(STBI_FREE) && !defined(STBI_REALLOC) && !defined(STBI_REALLOC_SIZED)
602 // ok
603 #else
604 #error "Must define all or none of STBI_MALLOC, STBI_FREE, and STBI_REALLOC (or STBI_REALLOC_SIZED)."
605 #endif
606 
607 #ifndef STBI_MALLOC
608 #define STBI_MALLOC(sz)           malloc(sz)
609 #define STBI_REALLOC(p,newsz)     realloc(p,newsz)
610 #define STBI_FREE(p)              free(p)
611 #endif
612 
613 #ifndef STBI_REALLOC_SIZED
614 #define STBI_REALLOC_SIZED(p,oldsz,newsz) STBI_REALLOC(p,newsz)
615 #endif
616 
617 // x86/x64 detection
618 #if defined(__x86_64__) || defined(_M_X64)
619 #define STBI__X64_TARGET
620 #elif defined(__i386) || defined(_M_IX86)
621 #define STBI__X86_TARGET
622 #endif
623 
624 #if defined(__GNUC__) && defined(STBI__X86_TARGET) && !defined(__SSE2__) && !defined(STBI_NO_SIMD)
625 // gcc doesn't support sse2 intrinsics unless you compile with -msse2,
626 // which in turn means it gets to use SSE2 everywhere. This is unfortunate,
627 // but previous attempts to provide the SSE2 functions with runtime
628 // detection caused numerous issues. The way architecture extensions are
629 // exposed in GCC/Clang is, sadly, not really suited for one-file libs.
630 // New behavior: if compiled with -msse2, we use SSE2 without any
631 // detection; if not, we don't use it at all.
632 #define STBI_NO_SIMD
633 #endif
634 
635 #if defined(__MINGW32__) && defined(STBI__X86_TARGET) && !defined(STBI_MINGW_ENABLE_SSE2) && !defined(STBI_NO_SIMD)
636 // Note that __MINGW32__ doesn't actually mean 32-bit, so we have to avoid STBI__X64_TARGET
637 //
638 // 32-bit MinGW wants ESP to be 16-byte aligned, but this is not in the
639 // Windows ABI and VC++ as well as Windows DLLs don't maintain that invariant.
640 // As a result, enabling SSE2 on 32-bit MinGW is dangerous when not
641 // simultaneously enabling "-mstackrealign".
642 //
643 // See https://github.com/nothings/stb/issues/81 for more information.
644 //
645 // So default to no SSE2 on 32-bit MinGW. If you've read this far and added
646 // -mstackrealign to your build settings, feel free to #define STBI_MINGW_ENABLE_SSE2.
647 #define STBI_NO_SIMD
648 #endif
649 
650 #if !defined(STBI_NO_SIMD) && (defined(STBI__X86_TARGET) || defined(STBI__X64_TARGET))
651 #define STBI_SSE2
652 #include <emmintrin.h>
653 
654 #ifdef _MSC_VER
655 
656 #if _MSC_VER >= 1400  // not VC6
657 #include <intrin.h> // __cpuid
stbi__cpuid3(void)658 static int stbi__cpuid3(void)
659 {
660    int info[4];
661    __cpuid(info,1);
662    return info[3];
663 }
664 #else
stbi__cpuid3(void)665 static int stbi__cpuid3(void)
666 {
667    int res;
668    __asm {
669       mov  eax,1
670       cpuid
671       mov  res,edx
672    }
673    return res;
674 }
675 #endif
676 
677 #define STBI_SIMD_ALIGN(type, name) __declspec(align(16)) type name
678 
679 #if !defined(STBI_NO_JPEG) && defined(STBI_SSE2)
stbi__sse2_available(void)680 static int stbi__sse2_available(void)
681 {
682    int info3 = stbi__cpuid3();
683    return ((info3 >> 26) & 1) != 0;
684 }
685 #endif
686 
687 #else // assume GCC-style if not VC++
688 #define STBI_SIMD_ALIGN(type, name) type name __attribute__((aligned(16)))
689 
690 #if !defined(STBI_NO_JPEG) && defined(STBI_SSE2)
stbi__sse2_available(void)691 static int stbi__sse2_available(void)
692 {
693    // If we're even attempting to compile this on GCC/Clang, that means
694    // -msse2 is on, which means the compiler is allowed to use SSE2
695    // instructions at will, and so are we.
696    return 1;
697 }
698 #endif
699 
700 #endif
701 #endif
702 
703 // ARM NEON
704 #if defined(STBI_NO_SIMD) && defined(STBI_NEON)
705 #undef STBI_NEON
706 #endif
707 
708 #ifdef STBI_NEON
709 #include <arm_neon.h>
710 // assume GCC or Clang on ARM targets
711 #define STBI_SIMD_ALIGN(type, name) type name __attribute__((aligned(16)))
712 #endif
713 
714 #ifndef STBI_SIMD_ALIGN
715 #define STBI_SIMD_ALIGN(type, name) type name
716 #endif
717 
718 ///////////////////////////////////////////////
719 //
720 //  stbi__context struct and start_xxx functions
721 
722 // stbi__context structure is our basic context used by all images, so it
723 // contains all the IO context, plus some basic image information
724 typedef struct
725 {
726    stbi__uint32 img_x, img_y;
727    int img_n, img_out_n;
728 
729    stbi_io_callbacks io;
730    void *io_user_data;
731 
732    int read_from_callbacks;
733    int buflen;
734    stbi_uc buffer_start[128];
735 
736    stbi_uc *img_buffer, *img_buffer_end;
737    stbi_uc *img_buffer_original, *img_buffer_original_end;
738 } stbi__context;
739 
740 
741 static void stbi__refill_buffer(stbi__context *s);
742 
743 // initialize a memory-decode context
stbi__start_mem(stbi__context * s,stbi_uc const * buffer,int len)744 static void stbi__start_mem(stbi__context *s, stbi_uc const *buffer, int len)
745 {
746    s->io.read = NULL;
747    s->read_from_callbacks = 0;
748    s->img_buffer = s->img_buffer_original = (stbi_uc *) buffer;
749    s->img_buffer_end = s->img_buffer_original_end = (stbi_uc *) buffer+len;
750 }
751 
752 // initialize a callback-based context
stbi__start_callbacks(stbi__context * s,stbi_io_callbacks * c,void * user)753 static void stbi__start_callbacks(stbi__context *s, stbi_io_callbacks *c, void *user)
754 {
755    s->io = *c;
756    s->io_user_data = user;
757    s->buflen = sizeof(s->buffer_start);
758    s->read_from_callbacks = 1;
759    s->img_buffer_original = s->buffer_start;
760    stbi__refill_buffer(s);
761    s->img_buffer_original_end = s->img_buffer_end;
762 }
763 
764 #ifndef STBI_NO_STDIO
765 
stbi__stdio_read(void * user,char * data,int size)766 static int stbi__stdio_read(void *user, char *data, int size)
767 {
768    return (int) fread(data,1,size,(FILE*) user);
769 }
770 
stbi__stdio_skip(void * user,int n)771 static void stbi__stdio_skip(void *user, int n)
772 {
773    fseek((FILE*) user, n, SEEK_CUR);
774 }
775 
stbi__stdio_eof(void * user)776 static int stbi__stdio_eof(void *user)
777 {
778    return feof((FILE*) user);
779 }
780 
781 static stbi_io_callbacks stbi__stdio_callbacks =
782 {
783    stbi__stdio_read,
784    stbi__stdio_skip,
785    stbi__stdio_eof,
786 };
787 
stbi__start_file(stbi__context * s,FILE * f)788 static void stbi__start_file(stbi__context *s, FILE *f)
789 {
790    stbi__start_callbacks(s, &stbi__stdio_callbacks, (void *) f);
791 }
792 
793 //static void stop_file(stbi__context *s) { }
794 
795 #endif // !STBI_NO_STDIO
796 
stbi__rewind(stbi__context * s)797 static void stbi__rewind(stbi__context *s)
798 {
799    // conceptually rewind SHOULD rewind to the beginning of the stream,
800    // but we just rewind to the beginning of the initial buffer, because
801    // we only use it after doing 'test', which only ever looks at at most 92 bytes
802    s->img_buffer = s->img_buffer_original;
803    s->img_buffer_end = s->img_buffer_original_end;
804 }
805 
806 enum
807 {
808    STBI_ORDER_RGB,
809    STBI_ORDER_BGR
810 };
811 
812 typedef struct
813 {
814    int bits_per_channel;
815    int num_channels;
816    int channel_order;
817 } stbi__result_info;
818 
819 #ifndef STBI_NO_JPEG
820 static int      stbi__jpeg_test(stbi__context *s);
821 static void    *stbi__jpeg_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
822 static int      stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp);
823 #endif
824 
825 #ifndef STBI_NO_PNG
826 static int      stbi__png_test(stbi__context *s);
827 static void    *stbi__png_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
828 static int      stbi__png_info(stbi__context *s, int *x, int *y, int *comp);
829 static int      stbi__png_is16(stbi__context *s);
830 #endif
831 
832 #ifndef STBI_NO_BMP
833 static int      stbi__bmp_test(stbi__context *s);
834 static void    *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
835 static int      stbi__bmp_info(stbi__context *s, int *x, int *y, int *comp);
836 #endif
837 
838 #ifndef STBI_NO_TGA
839 static int      stbi__tga_test(stbi__context *s);
840 static void    *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
841 static int      stbi__tga_info(stbi__context *s, int *x, int *y, int *comp);
842 #endif
843 
844 #ifndef STBI_NO_PSD
845 static int      stbi__psd_test(stbi__context *s);
846 static void    *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc);
847 static int      stbi__psd_info(stbi__context *s, int *x, int *y, int *comp);
848 static int      stbi__psd_is16(stbi__context *s);
849 #endif
850 
851 #ifndef STBI_NO_HDR
852 static int      stbi__hdr_test(stbi__context *s);
853 static float   *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
854 static int      stbi__hdr_info(stbi__context *s, int *x, int *y, int *comp);
855 #endif
856 
857 #ifndef STBI_NO_PIC
858 static int      stbi__pic_test(stbi__context *s);
859 static void    *stbi__pic_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
860 static int      stbi__pic_info(stbi__context *s, int *x, int *y, int *comp);
861 #endif
862 
863 #ifndef STBI_NO_GIF
864 static int      stbi__gif_test(stbi__context *s);
865 static void    *stbi__gif_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
866 static void    *stbi__load_gif_main(stbi__context *s, int **delays, int *x, int *y, int *z, int *comp, int req_comp);
867 static int      stbi__gif_info(stbi__context *s, int *x, int *y, int *comp);
868 #endif
869 
870 #ifndef STBI_NO_PNM
871 static int      stbi__pnm_test(stbi__context *s);
872 static void    *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
873 static int      stbi__pnm_info(stbi__context *s, int *x, int *y, int *comp);
874 #endif
875 
876 // this is not threadsafe
877 static const char *stbi__g_failure_reason;
878 
stbi_failure_reason(void)879 STBIDEF const char *stbi_failure_reason(void)
880 {
881    return stbi__g_failure_reason;
882 }
883 
stbi__err(const char * str)884 static int stbi__err(const char *str)
885 {
886    stbi__g_failure_reason = str;
887    return 0;
888 }
889 
stbi__malloc(size_t size)890 static void *stbi__malloc(size_t size)
891 {
892     return STBI_MALLOC(size);
893 }
894 
895 // stb_image uses ints pervasively, including for offset calculations.
896 // therefore the largest decoded image size we can support with the
897 // current code, even on 64-bit targets, is INT_MAX. this is not a
898 // significant limitation for the intended use case.
899 //
900 // we do, however, need to make sure our size calculations don't
901 // overflow. hence a few helper functions for size calculations that
902 // multiply integers together, making sure that they're non-negative
903 // and no overflow occurs.
904 
905 // return 1 if the sum is valid, 0 on overflow.
906 // negative terms are considered invalid.
stbi__addsizes_valid(int a,int b)907 static int stbi__addsizes_valid(int a, int b)
908 {
909    if (b < 0) return 0;
910    // now 0 <= b <= INT_MAX, hence also
911    // 0 <= INT_MAX - b <= INTMAX.
912    // And "a + b <= INT_MAX" (which might overflow) is the
913    // same as a <= INT_MAX - b (no overflow)
914    return a <= INT_MAX - b;
915 }
916 
917 // returns 1 if the product is valid, 0 on overflow.
918 // negative factors are considered invalid.
stbi__mul2sizes_valid(int a,int b)919 static int stbi__mul2sizes_valid(int a, int b)
920 {
921    if (a < 0 || b < 0) return 0;
922    if (b == 0) return 1; // mul-by-0 is always safe
923    // portable way to check for no overflows in a*b
924    return a <= INT_MAX/b;
925 }
926 
927 // returns 1 if "a*b + add" has no negative terms/factors and doesn't overflow
stbi__mad2sizes_valid(int a,int b,int add)928 static int stbi__mad2sizes_valid(int a, int b, int add)
929 {
930    return stbi__mul2sizes_valid(a, b) && stbi__addsizes_valid(a*b, add);
931 }
932 
933 // returns 1 if "a*b*c + add" has no negative terms/factors and doesn't overflow
stbi__mad3sizes_valid(int a,int b,int c,int add)934 static int stbi__mad3sizes_valid(int a, int b, int c, int add)
935 {
936    return stbi__mul2sizes_valid(a, b) && stbi__mul2sizes_valid(a*b, c) &&
937       stbi__addsizes_valid(a*b*c, add);
938 }
939 
940 // returns 1 if "a*b*c*d + add" has no negative terms/factors and doesn't overflow
941 #if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR)
stbi__mad4sizes_valid(int a,int b,int c,int d,int add)942 static int stbi__mad4sizes_valid(int a, int b, int c, int d, int add)
943 {
944    return stbi__mul2sizes_valid(a, b) && stbi__mul2sizes_valid(a*b, c) &&
945       stbi__mul2sizes_valid(a*b*c, d) && stbi__addsizes_valid(a*b*c*d, add);
946 }
947 #endif
948 
949 // mallocs with size overflow checking
stbi__malloc_mad2(int a,int b,int add)950 static void *stbi__malloc_mad2(int a, int b, int add)
951 {
952    if (!stbi__mad2sizes_valid(a, b, add)) return NULL;
953    return stbi__malloc(a*b + add);
954 }
955 
stbi__malloc_mad3(int a,int b,int c,int add)956 static void *stbi__malloc_mad3(int a, int b, int c, int add)
957 {
958    if (!stbi__mad3sizes_valid(a, b, c, add)) return NULL;
959    return stbi__malloc(a*b*c + add);
960 }
961 
962 #if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR)
stbi__malloc_mad4(int a,int b,int c,int d,int add)963 static void *stbi__malloc_mad4(int a, int b, int c, int d, int add)
964 {
965    if (!stbi__mad4sizes_valid(a, b, c, d, add)) return NULL;
966    return stbi__malloc(a*b*c*d + add);
967 }
968 #endif
969 
970 // stbi__err - error
971 // stbi__errpf - error returning pointer to float
972 // stbi__errpuc - error returning pointer to unsigned char
973 
974 #ifdef STBI_NO_FAILURE_STRINGS
975    #define stbi__err(x,y)  0
976 #elif defined(STBI_FAILURE_USERMSG)
977    #define stbi__err(x,y)  stbi__err(y)
978 #else
979    #define stbi__err(x,y)  stbi__err(x)
980 #endif
981 
982 #define stbi__errpf(x,y)   ((float *)(size_t) (stbi__err(x,y)?NULL:NULL))
983 #define stbi__errpuc(x,y)  ((unsigned char *)(size_t) (stbi__err(x,y)?NULL:NULL))
984 
stbi_image_free(void * retval_from_stbi_load)985 STBIDEF void stbi_image_free(void *retval_from_stbi_load)
986 {
987    STBI_FREE(retval_from_stbi_load);
988 }
989 
990 #ifndef STBI_NO_LINEAR
991 static float   *stbi__ldr_to_hdr(stbi_uc *data, int x, int y, int comp);
992 #endif
993 
994 #ifndef STBI_NO_HDR
995 static stbi_uc *stbi__hdr_to_ldr(float   *data, int x, int y, int comp);
996 #endif
997 
998 static int stbi__vertically_flip_on_load = 0;
999 
stbi_set_flip_vertically_on_load(int flag_true_if_should_flip)1000 STBIDEF void stbi_set_flip_vertically_on_load(int flag_true_if_should_flip)
1001 {
1002     stbi__vertically_flip_on_load = flag_true_if_should_flip;
1003 }
1004 
stbi__load_main(stbi__context * s,int * x,int * y,int * comp,int req_comp,stbi__result_info * ri,int bpc)1005 static void *stbi__load_main(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc)
1006 {
1007    memset(ri, 0, sizeof(*ri)); // make sure it's initialized if we add new fields
1008    ri->bits_per_channel = 8; // default is 8 so most paths don't have to be changed
1009    ri->channel_order = STBI_ORDER_RGB; // all current input & output are this, but this is here so we can add BGR order
1010    ri->num_channels = 0;
1011 
1012    #ifndef STBI_NO_JPEG
1013    if (stbi__jpeg_test(s)) return stbi__jpeg_load(s,x,y,comp,req_comp, ri);
1014    #endif
1015    #ifndef STBI_NO_PNG
1016    if (stbi__png_test(s))  return stbi__png_load(s,x,y,comp,req_comp, ri);
1017    #endif
1018    #ifndef STBI_NO_BMP
1019    if (stbi__bmp_test(s))  return stbi__bmp_load(s,x,y,comp,req_comp, ri);
1020    #endif
1021    #ifndef STBI_NO_GIF
1022    if (stbi__gif_test(s))  return stbi__gif_load(s,x,y,comp,req_comp, ri);
1023    #endif
1024    #ifndef STBI_NO_PSD
1025    if (stbi__psd_test(s))  return stbi__psd_load(s,x,y,comp,req_comp, ri, bpc);
1026    #endif
1027    #ifndef STBI_NO_PIC
1028    if (stbi__pic_test(s))  return stbi__pic_load(s,x,y,comp,req_comp, ri);
1029    #endif
1030    #ifndef STBI_NO_PNM
1031    if (stbi__pnm_test(s))  return stbi__pnm_load(s,x,y,comp,req_comp, ri);
1032    #endif
1033 
1034    #ifndef STBI_NO_HDR
1035    if (stbi__hdr_test(s)) {
1036       float *hdr = stbi__hdr_load(s, x,y,comp,req_comp, ri);
1037       return stbi__hdr_to_ldr(hdr, *x, *y, req_comp ? req_comp : *comp);
1038    }
1039    #endif
1040 
1041    #ifndef STBI_NO_TGA
1042    // test tga last because it's a crappy test!
1043    if (stbi__tga_test(s))
1044       return stbi__tga_load(s,x,y,comp,req_comp, ri);
1045    #endif
1046 
1047    return stbi__errpuc("unknown image type", "Image not of any known type, or corrupt");
1048 }
1049 
stbi__convert_16_to_8(stbi__uint16 * orig,int w,int h,int channels)1050 static stbi_uc *stbi__convert_16_to_8(stbi__uint16 *orig, int w, int h, int channels)
1051 {
1052    int i;
1053    int img_len = w * h * channels;
1054    stbi_uc *reduced;
1055 
1056    reduced = (stbi_uc *) stbi__malloc(img_len);
1057    if (reduced == NULL) return stbi__errpuc("outofmem", "Out of memory");
1058 
1059    for (i = 0; i < img_len; ++i)
1060       reduced[i] = (stbi_uc)((orig[i] >> 8) & 0xFF); // top half of each byte is sufficient approx of 16->8 bit scaling
1061 
1062    STBI_FREE(orig);
1063    return reduced;
1064 }
1065 
stbi__convert_8_to_16(stbi_uc * orig,int w,int h,int channels)1066 static stbi__uint16 *stbi__convert_8_to_16(stbi_uc *orig, int w, int h, int channels)
1067 {
1068    int i;
1069    int img_len = w * h * channels;
1070    stbi__uint16 *enlarged;
1071 
1072    enlarged = (stbi__uint16 *) stbi__malloc(img_len*2);
1073    if (enlarged == NULL) return (stbi__uint16 *) stbi__errpuc("outofmem", "Out of memory");
1074 
1075    for (i = 0; i < img_len; ++i)
1076       enlarged[i] = (stbi__uint16)((orig[i] << 8) + orig[i]); // replicate to high and low byte, maps 0->0, 255->0xffff
1077 
1078    STBI_FREE(orig);
1079    return enlarged;
1080 }
1081 
stbi__vertical_flip(void * image,int w,int h,int bytes_per_pixel)1082 static void stbi__vertical_flip(void *image, int w, int h, int bytes_per_pixel)
1083 {
1084    int row;
1085    size_t bytes_per_row = (size_t)w * bytes_per_pixel;
1086    stbi_uc temp[2048];
1087    stbi_uc *bytes = (stbi_uc *)image;
1088 
1089    for (row = 0; row < (h>>1); row++) {
1090       stbi_uc *row0 = bytes + row*bytes_per_row;
1091       stbi_uc *row1 = bytes + (h - row - 1)*bytes_per_row;
1092       // swap row0 with row1
1093       size_t bytes_left = bytes_per_row;
1094       while (bytes_left) {
1095          size_t bytes_copy = (bytes_left < sizeof(temp)) ? bytes_left : sizeof(temp);
1096          memcpy(temp, row0, bytes_copy);
1097          memcpy(row0, row1, bytes_copy);
1098          memcpy(row1, temp, bytes_copy);
1099          row0 += bytes_copy;
1100          row1 += bytes_copy;
1101          bytes_left -= bytes_copy;
1102       }
1103    }
1104 }
1105 
1106 #ifndef STBI_NO_GIF
stbi__vertical_flip_slices(void * image,int w,int h,int z,int bytes_per_pixel)1107 static void stbi__vertical_flip_slices(void *image, int w, int h, int z, int bytes_per_pixel)
1108 {
1109    int slice;
1110    int slice_size = w * h * bytes_per_pixel;
1111 
1112    stbi_uc *bytes = (stbi_uc *)image;
1113    for (slice = 0; slice < z; ++slice) {
1114       stbi__vertical_flip(bytes, w, h, bytes_per_pixel);
1115       bytes += slice_size;
1116    }
1117 }
1118 #endif
1119 
stbi__load_and_postprocess_8bit(stbi__context * s,int * x,int * y,int * comp,int req_comp)1120 static unsigned char *stbi__load_and_postprocess_8bit(stbi__context *s, int *x, int *y, int *comp, int req_comp)
1121 {
1122    stbi__result_info ri;
1123    void *result = stbi__load_main(s, x, y, comp, req_comp, &ri, 8);
1124 
1125    if (result == NULL)
1126       return NULL;
1127 
1128    if (ri.bits_per_channel != 8) {
1129       STBI_ASSERT(ri.bits_per_channel == 16);
1130       result = stbi__convert_16_to_8((stbi__uint16 *) result, *x, *y, req_comp == 0 ? *comp : req_comp);
1131       ri.bits_per_channel = 8;
1132    }
1133 
1134    // @TODO: move stbi__convert_format to here
1135 
1136    if (stbi__vertically_flip_on_load) {
1137       int channels = req_comp ? req_comp : *comp;
1138       stbi__vertical_flip(result, *x, *y, channels * sizeof(stbi_uc));
1139    }
1140 
1141    return (unsigned char *) result;
1142 }
1143 
stbi__load_and_postprocess_16bit(stbi__context * s,int * x,int * y,int * comp,int req_comp)1144 static stbi__uint16 *stbi__load_and_postprocess_16bit(stbi__context *s, int *x, int *y, int *comp, int req_comp)
1145 {
1146    stbi__result_info ri;
1147    void *result = stbi__load_main(s, x, y, comp, req_comp, &ri, 16);
1148 
1149    if (result == NULL)
1150       return NULL;
1151 
1152    if (ri.bits_per_channel != 16) {
1153       STBI_ASSERT(ri.bits_per_channel == 8);
1154       result = stbi__convert_8_to_16((stbi_uc *) result, *x, *y, req_comp == 0 ? *comp : req_comp);
1155       ri.bits_per_channel = 16;
1156    }
1157 
1158    // @TODO: move stbi__convert_format16 to here
1159    // @TODO: special case RGB-to-Y (and RGBA-to-YA) for 8-bit-to-16-bit case to keep more precision
1160 
1161    if (stbi__vertically_flip_on_load) {
1162       int channels = req_comp ? req_comp : *comp;
1163       stbi__vertical_flip(result, *x, *y, channels * sizeof(stbi__uint16));
1164    }
1165 
1166    return (stbi__uint16 *) result;
1167 }
1168 
1169 #if !defined(STBI_NO_HDR) && !defined(STBI_NO_LINEAR)
stbi__float_postprocess(float * result,int * x,int * y,int * comp,int req_comp)1170 static void stbi__float_postprocess(float *result, int *x, int *y, int *comp, int req_comp)
1171 {
1172    if (stbi__vertically_flip_on_load && result != NULL) {
1173       int channels = req_comp ? req_comp : *comp;
1174       stbi__vertical_flip(result, *x, *y, channels * sizeof(float));
1175    }
1176 }
1177 #endif
1178 
1179 #ifndef STBI_NO_STDIO
1180 
1181 #if defined(_MSC_VER) && defined(STBI_WINDOWS_UTF8)
1182 STBI_EXTERN __declspec(dllimport) int __stdcall MultiByteToWideChar(unsigned int cp, unsigned long flags, const char *str, int cbmb, wchar_t *widestr, int cchwide);
1183 STBI_EXTERN __declspec(dllimport) int __stdcall WideCharToMultiByte(unsigned int cp, unsigned long flags, const wchar_t *widestr, int cchwide, char *str, int cbmb, const char *defchar, int *used_default);
1184 #endif
1185 
1186 #if defined(_MSC_VER) && defined(STBI_WINDOWS_UTF8)
stbi_convert_wchar_to_utf8(char * buffer,size_t bufferlen,const wchar_t * input)1187 STBIDEF int stbi_convert_wchar_to_utf8(char *buffer, size_t bufferlen, const wchar_t* input)
1188 {
1189 	return WideCharToMultiByte(65001 /* UTF8 */, 0, input, -1, buffer, (int) bufferlen, NULL, NULL);
1190 }
1191 #endif
1192 
stbi__fopen(char const * filename,char const * mode)1193 static FILE *stbi__fopen(char const *filename, char const *mode)
1194 {
1195    FILE *f;
1196 #if defined(_MSC_VER) && defined(STBI_WINDOWS_UTF8)
1197    wchar_t wMode[64];
1198    wchar_t wFilename[1024];
1199 	if (0 == MultiByteToWideChar(65001 /* UTF8 */, 0, filename, -1, wFilename, sizeof(wFilename)))
1200       return 0;
1201 
1202 	if (0 == MultiByteToWideChar(65001 /* UTF8 */, 0, mode, -1, wMode, sizeof(wMode)))
1203       return 0;
1204 
1205 #if _MSC_VER >= 1400
1206 	if (0 != _wfopen_s(&f, wFilename, wMode))
1207 		f = 0;
1208 #else
1209    f = _wfopen(wFilename, wMode);
1210 #endif
1211 
1212 #elif defined(_MSC_VER) && _MSC_VER >= 1400
1213    if (0 != fopen_s(&f, filename, mode))
1214       f=0;
1215 #else
1216    f = fopen(filename, mode);
1217 #endif
1218    return f;
1219 }
1220 
1221 
stbi_load(char const * filename,int * x,int * y,int * comp,int req_comp)1222 STBIDEF stbi_uc *stbi_load(char const *filename, int *x, int *y, int *comp, int req_comp)
1223 {
1224    FILE *f = stbi__fopen(filename, "rb");
1225    unsigned char *result;
1226    if (!f) return stbi__errpuc("can't fopen", "Unable to open file");
1227    result = stbi_load_from_file(f,x,y,comp,req_comp);
1228    fclose(f);
1229    return result;
1230 }
1231 
stbi_load_from_file(FILE * f,int * x,int * y,int * comp,int req_comp)1232 STBIDEF stbi_uc *stbi_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
1233 {
1234    unsigned char *result;
1235    stbi__context s;
1236    stbi__start_file(&s,f);
1237    result = stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp);
1238    if (result) {
1239       // need to 'unget' all the characters in the IO buffer
1240       fseek(f, - (int) (s.img_buffer_end - s.img_buffer), SEEK_CUR);
1241    }
1242    return result;
1243 }
1244 
stbi_load_from_file_16(FILE * f,int * x,int * y,int * comp,int req_comp)1245 STBIDEF stbi__uint16 *stbi_load_from_file_16(FILE *f, int *x, int *y, int *comp, int req_comp)
1246 {
1247    stbi__uint16 *result;
1248    stbi__context s;
1249    stbi__start_file(&s,f);
1250    result = stbi__load_and_postprocess_16bit(&s,x,y,comp,req_comp);
1251    if (result) {
1252       // need to 'unget' all the characters in the IO buffer
1253       fseek(f, - (int) (s.img_buffer_end - s.img_buffer), SEEK_CUR);
1254    }
1255    return result;
1256 }
1257 
stbi_load_16(char const * filename,int * x,int * y,int * comp,int req_comp)1258 STBIDEF stbi_us *stbi_load_16(char const *filename, int *x, int *y, int *comp, int req_comp)
1259 {
1260    FILE *f = stbi__fopen(filename, "rb");
1261    stbi__uint16 *result;
1262    if (!f) return (stbi_us *) stbi__errpuc("can't fopen", "Unable to open file");
1263    result = stbi_load_from_file_16(f,x,y,comp,req_comp);
1264    fclose(f);
1265    return result;
1266 }
1267 
1268 
1269 #endif //!STBI_NO_STDIO
1270 
stbi_load_16_from_memory(stbi_uc const * buffer,int len,int * x,int * y,int * channels_in_file,int desired_channels)1271 STBIDEF stbi_us *stbi_load_16_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels)
1272 {
1273    stbi__context s;
1274    stbi__start_mem(&s,buffer,len);
1275    return stbi__load_and_postprocess_16bit(&s,x,y,channels_in_file,desired_channels);
1276 }
1277 
stbi_load_16_from_callbacks(stbi_io_callbacks const * clbk,void * user,int * x,int * y,int * channels_in_file,int desired_channels)1278 STBIDEF stbi_us *stbi_load_16_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *channels_in_file, int desired_channels)
1279 {
1280    stbi__context s;
1281    stbi__start_callbacks(&s, (stbi_io_callbacks *)clbk, user);
1282    return stbi__load_and_postprocess_16bit(&s,x,y,channels_in_file,desired_channels);
1283 }
1284 
stbi_load_from_memory(stbi_uc const * buffer,int len,int * x,int * y,int * comp,int req_comp)1285 STBIDEF stbi_uc *stbi_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
1286 {
1287    stbi__context s;
1288    stbi__start_mem(&s,buffer,len);
1289    return stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp);
1290 }
1291 
stbi_load_from_callbacks(stbi_io_callbacks const * clbk,void * user,int * x,int * y,int * comp,int req_comp)1292 STBIDEF stbi_uc *stbi_load_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp)
1293 {
1294    stbi__context s;
1295    stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user);
1296    return stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp);
1297 }
1298 
1299 #ifndef STBI_NO_GIF
stbi_load_gif_from_memory(stbi_uc const * buffer,int len,int ** delays,int * x,int * y,int * z,int * comp,int req_comp)1300 STBIDEF stbi_uc *stbi_load_gif_from_memory(stbi_uc const *buffer, int len, int **delays, int *x, int *y, int *z, int *comp, int req_comp)
1301 {
1302    unsigned char *result;
1303    stbi__context s;
1304    stbi__start_mem(&s,buffer,len);
1305 
1306    result = (unsigned char*) stbi__load_gif_main(&s, delays, x, y, z, comp, req_comp);
1307    if (stbi__vertically_flip_on_load) {
1308       stbi__vertical_flip_slices( result, *x, *y, *z, *comp );
1309    }
1310 
1311    return result;
1312 }
1313 #endif
1314 
1315 #ifndef STBI_NO_LINEAR
stbi__loadf_main(stbi__context * s,int * x,int * y,int * comp,int req_comp)1316 static float *stbi__loadf_main(stbi__context *s, int *x, int *y, int *comp, int req_comp)
1317 {
1318    unsigned char *data;
1319    #ifndef STBI_NO_HDR
1320    if (stbi__hdr_test(s)) {
1321       stbi__result_info ri;
1322       float *hdr_data = stbi__hdr_load(s,x,y,comp,req_comp, &ri);
1323       if (hdr_data)
1324          stbi__float_postprocess(hdr_data,x,y,comp,req_comp);
1325       return hdr_data;
1326    }
1327    #endif
1328    data = stbi__load_and_postprocess_8bit(s, x, y, comp, req_comp);
1329    if (data)
1330       return stbi__ldr_to_hdr(data, *x, *y, req_comp ? req_comp : *comp);
1331    return stbi__errpf("unknown image type", "Image not of any known type, or corrupt");
1332 }
1333 
stbi_loadf_from_memory(stbi_uc const * buffer,int len,int * x,int * y,int * comp,int req_comp)1334 STBIDEF float *stbi_loadf_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
1335 {
1336    stbi__context s;
1337    stbi__start_mem(&s,buffer,len);
1338    return stbi__loadf_main(&s,x,y,comp,req_comp);
1339 }
1340 
stbi_loadf_from_callbacks(stbi_io_callbacks const * clbk,void * user,int * x,int * y,int * comp,int req_comp)1341 STBIDEF float *stbi_loadf_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp)
1342 {
1343    stbi__context s;
1344    stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user);
1345    return stbi__loadf_main(&s,x,y,comp,req_comp);
1346 }
1347 
1348 #ifndef STBI_NO_STDIO
stbi_loadf(char const * filename,int * x,int * y,int * comp,int req_comp)1349 STBIDEF float *stbi_loadf(char const *filename, int *x, int *y, int *comp, int req_comp)
1350 {
1351    float *result;
1352    FILE *f = stbi__fopen(filename, "rb");
1353    if (!f) return stbi__errpf("can't fopen", "Unable to open file");
1354    result = stbi_loadf_from_file(f,x,y,comp,req_comp);
1355    fclose(f);
1356    return result;
1357 }
1358 
stbi_loadf_from_file(FILE * f,int * x,int * y,int * comp,int req_comp)1359 STBIDEF float *stbi_loadf_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
1360 {
1361    stbi__context s;
1362    stbi__start_file(&s,f);
1363    return stbi__loadf_main(&s,x,y,comp,req_comp);
1364 }
1365 #endif // !STBI_NO_STDIO
1366 
1367 #endif // !STBI_NO_LINEAR
1368 
1369 // these is-hdr-or-not is defined independent of whether STBI_NO_LINEAR is
1370 // defined, for API simplicity; if STBI_NO_LINEAR is defined, it always
1371 // reports false!
1372 
stbi_is_hdr_from_memory(stbi_uc const * buffer,int len)1373 STBIDEF int stbi_is_hdr_from_memory(stbi_uc const *buffer, int len)
1374 {
1375    #ifndef STBI_NO_HDR
1376    stbi__context s;
1377    stbi__start_mem(&s,buffer,len);
1378    return stbi__hdr_test(&s);
1379    #else
1380    STBI_NOTUSED(buffer);
1381    STBI_NOTUSED(len);
1382    return 0;
1383    #endif
1384 }
1385 
1386 #ifndef STBI_NO_STDIO
stbi_is_hdr(char const * filename)1387 STBIDEF int      stbi_is_hdr          (char const *filename)
1388 {
1389    FILE *f = stbi__fopen(filename, "rb");
1390    int result=0;
1391    if (f) {
1392       result = stbi_is_hdr_from_file(f);
1393       fclose(f);
1394    }
1395    return result;
1396 }
1397 
stbi_is_hdr_from_file(FILE * f)1398 STBIDEF int stbi_is_hdr_from_file(FILE *f)
1399 {
1400    #ifndef STBI_NO_HDR
1401    long pos = ftell(f);
1402    int res;
1403    stbi__context s;
1404    stbi__start_file(&s,f);
1405    res = stbi__hdr_test(&s);
1406    fseek(f, pos, SEEK_SET);
1407    return res;
1408    #else
1409    STBI_NOTUSED(f);
1410    return 0;
1411    #endif
1412 }
1413 #endif // !STBI_NO_STDIO
1414 
stbi_is_hdr_from_callbacks(stbi_io_callbacks const * clbk,void * user)1415 STBIDEF int      stbi_is_hdr_from_callbacks(stbi_io_callbacks const *clbk, void *user)
1416 {
1417    #ifndef STBI_NO_HDR
1418    stbi__context s;
1419    stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user);
1420    return stbi__hdr_test(&s);
1421    #else
1422    STBI_NOTUSED(clbk);
1423    STBI_NOTUSED(user);
1424    return 0;
1425    #endif
1426 }
1427 
1428 #ifndef STBI_NO_LINEAR
1429 static float stbi__l2h_gamma=2.2f, stbi__l2h_scale=1.0f;
1430 
stbi_ldr_to_hdr_gamma(float gamma)1431 STBIDEF void   stbi_ldr_to_hdr_gamma(float gamma) { stbi__l2h_gamma = gamma; }
stbi_ldr_to_hdr_scale(float scale)1432 STBIDEF void   stbi_ldr_to_hdr_scale(float scale) { stbi__l2h_scale = scale; }
1433 #endif
1434 
1435 static float stbi__h2l_gamma_i=1.0f/2.2f, stbi__h2l_scale_i=1.0f;
1436 
stbi_hdr_to_ldr_gamma(float gamma)1437 STBIDEF void   stbi_hdr_to_ldr_gamma(float gamma) { stbi__h2l_gamma_i = 1/gamma; }
stbi_hdr_to_ldr_scale(float scale)1438 STBIDEF void   stbi_hdr_to_ldr_scale(float scale) { stbi__h2l_scale_i = 1/scale; }
1439 
1440 
1441 //////////////////////////////////////////////////////////////////////////////
1442 //
1443 // Common code used by all image loaders
1444 //
1445 
1446 enum
1447 {
1448    STBI__SCAN_load=0,
1449    STBI__SCAN_type,
1450    STBI__SCAN_header
1451 };
1452 
stbi__refill_buffer(stbi__context * s)1453 static void stbi__refill_buffer(stbi__context *s)
1454 {
1455    int n = (s->io.read)(s->io_user_data,(char*)s->buffer_start,s->buflen);
1456    if (n == 0) {
1457       // at end of file, treat same as if from memory, but need to handle case
1458       // where s->img_buffer isn't pointing to safe memory, e.g. 0-byte file
1459       s->read_from_callbacks = 0;
1460       s->img_buffer = s->buffer_start;
1461       s->img_buffer_end = s->buffer_start+1;
1462       *s->img_buffer = 0;
1463    } else {
1464       s->img_buffer = s->buffer_start;
1465       s->img_buffer_end = s->buffer_start + n;
1466    }
1467 }
1468 
stbi__get8(stbi__context * s)1469 stbi_inline static stbi_uc stbi__get8(stbi__context *s)
1470 {
1471    if (s->img_buffer < s->img_buffer_end)
1472       return *s->img_buffer++;
1473    if (s->read_from_callbacks) {
1474       stbi__refill_buffer(s);
1475       return *s->img_buffer++;
1476    }
1477    return 0;
1478 }
1479 
stbi__at_eof(stbi__context * s)1480 stbi_inline static int stbi__at_eof(stbi__context *s)
1481 {
1482    if (s->io.read) {
1483       if (!(s->io.eof)(s->io_user_data)) return 0;
1484       // if feof() is true, check if buffer = end
1485       // special case: we've only got the special 0 character at the end
1486       if (s->read_from_callbacks == 0) return 1;
1487    }
1488 
1489    return s->img_buffer >= s->img_buffer_end;
1490 }
1491 
stbi__skip(stbi__context * s,int n)1492 static void stbi__skip(stbi__context *s, int n)
1493 {
1494    if (n < 0) {
1495       s->img_buffer = s->img_buffer_end;
1496       return;
1497    }
1498    if (s->io.read) {
1499       int blen = (int) (s->img_buffer_end - s->img_buffer);
1500       if (blen < n) {
1501          s->img_buffer = s->img_buffer_end;
1502          (s->io.skip)(s->io_user_data, n - blen);
1503          return;
1504       }
1505    }
1506    s->img_buffer += n;
1507 }
1508 
stbi__getn(stbi__context * s,stbi_uc * buffer,int n)1509 static int stbi__getn(stbi__context *s, stbi_uc *buffer, int n)
1510 {
1511    if (s->io.read) {
1512       int blen = (int) (s->img_buffer_end - s->img_buffer);
1513       if (blen < n) {
1514          int res, count;
1515 
1516          memcpy(buffer, s->img_buffer, blen);
1517 
1518          count = (s->io.read)(s->io_user_data, (char*) buffer + blen, n - blen);
1519          res = (count == (n-blen));
1520          s->img_buffer = s->img_buffer_end;
1521          return res;
1522       }
1523    }
1524 
1525    if (s->img_buffer+n <= s->img_buffer_end) {
1526       memcpy(buffer, s->img_buffer, n);
1527       s->img_buffer += n;
1528       return 1;
1529    } else
1530       return 0;
1531 }
1532 
stbi__get16be(stbi__context * s)1533 static int stbi__get16be(stbi__context *s)
1534 {
1535    int z = stbi__get8(s);
1536    return (z << 8) + stbi__get8(s);
1537 }
1538 
stbi__get32be(stbi__context * s)1539 static stbi__uint32 stbi__get32be(stbi__context *s)
1540 {
1541    stbi__uint32 z = stbi__get16be(s);
1542    return (z << 16) + stbi__get16be(s);
1543 }
1544 
1545 #if defined(STBI_NO_BMP) && defined(STBI_NO_TGA) && defined(STBI_NO_GIF)
1546 // nothing
1547 #else
stbi__get16le(stbi__context * s)1548 static int stbi__get16le(stbi__context *s)
1549 {
1550    int z = stbi__get8(s);
1551    return z + (stbi__get8(s) << 8);
1552 }
1553 #endif
1554 
1555 #ifndef STBI_NO_BMP
stbi__get32le(stbi__context * s)1556 static stbi__uint32 stbi__get32le(stbi__context *s)
1557 {
1558    stbi__uint32 z = stbi__get16le(s);
1559    return z + (stbi__get16le(s) << 16);
1560 }
1561 #endif
1562 
1563 #define STBI__BYTECAST(x)  ((stbi_uc) ((x) & 255))  // truncate int to byte without warnings
1564 
1565 
1566 //////////////////////////////////////////////////////////////////////////////
1567 //
1568 //  generic converter from built-in img_n to req_comp
1569 //    individual types do this automatically as much as possible (e.g. jpeg
1570 //    does all cases internally since it needs to colorspace convert anyway,
1571 //    and it never has alpha, so very few cases ). png can automatically
1572 //    interleave an alpha=255 channel, but falls back to this for other cases
1573 //
1574 //  assume data buffer is malloced, so malloc a new one and free that one
1575 //  only failure mode is malloc failing
1576 
stbi__compute_y(int r,int g,int b)1577 static stbi_uc stbi__compute_y(int r, int g, int b)
1578 {
1579    return (stbi_uc) (((r*77) + (g*150) +  (29*b)) >> 8);
1580 }
1581 
stbi__convert_format(unsigned char * data,int img_n,int req_comp,unsigned int x,unsigned int y)1582 static unsigned char *stbi__convert_format(unsigned char *data, int img_n, int req_comp, unsigned int x, unsigned int y)
1583 {
1584    int i,j;
1585    unsigned char *good;
1586 
1587    if (req_comp == img_n) return data;
1588    STBI_ASSERT(req_comp >= 1 && req_comp <= 4);
1589 
1590    good = (unsigned char *) stbi__malloc_mad3(req_comp, x, y, 0);
1591    if (good == NULL) {
1592       STBI_FREE(data);
1593       return stbi__errpuc("outofmem", "Out of memory");
1594    }
1595 
1596    for (j=0; j < (int) y; ++j) {
1597       unsigned char *src  = data + j * x * img_n   ;
1598       unsigned char *dest = good + j * x * req_comp;
1599 
1600       #define STBI__COMBO(a,b)  ((a)*8+(b))
1601       #define STBI__CASE(a,b)   case STBI__COMBO(a,b): for(i=x-1; i >= 0; --i, src += a, dest += b)
1602       // convert source image with img_n components to one with req_comp components;
1603       // avoid switch per pixel, so use switch per scanline and massive macros
1604       switch (STBI__COMBO(img_n, req_comp)) {
1605          STBI__CASE(1,2) { dest[0]=src[0]; dest[1]=255;                                     } break;
1606          STBI__CASE(1,3) { dest[0]=dest[1]=dest[2]=src[0];                                  } break;
1607          STBI__CASE(1,4) { dest[0]=dest[1]=dest[2]=src[0]; dest[3]=255;                     } break;
1608          STBI__CASE(2,1) { dest[0]=src[0];                                                  } break;
1609          STBI__CASE(2,3) { dest[0]=dest[1]=dest[2]=src[0];                                  } break;
1610          STBI__CASE(2,4) { dest[0]=dest[1]=dest[2]=src[0]; dest[3]=src[1];                  } break;
1611          STBI__CASE(3,4) { dest[0]=src[0];dest[1]=src[1];dest[2]=src[2];dest[3]=255;        } break;
1612          STBI__CASE(3,1) { dest[0]=stbi__compute_y(src[0],src[1],src[2]);                   } break;
1613          STBI__CASE(3,2) { dest[0]=stbi__compute_y(src[0],src[1],src[2]); dest[1] = 255;    } break;
1614          STBI__CASE(4,1) { dest[0]=stbi__compute_y(src[0],src[1],src[2]);                   } break;
1615          STBI__CASE(4,2) { dest[0]=stbi__compute_y(src[0],src[1],src[2]); dest[1] = src[3]; } break;
1616          STBI__CASE(4,3) { dest[0]=src[0];dest[1]=src[1];dest[2]=src[2];                    } break;
1617          default: STBI_ASSERT(0);
1618       }
1619       #undef STBI__CASE
1620    }
1621 
1622    STBI_FREE(data);
1623    return good;
1624 }
1625 
stbi__compute_y_16(int r,int g,int b)1626 static stbi__uint16 stbi__compute_y_16(int r, int g, int b)
1627 {
1628    return (stbi__uint16) (((r*77) + (g*150) +  (29*b)) >> 8);
1629 }
1630 
stbi__convert_format16(stbi__uint16 * data,int img_n,int req_comp,unsigned int x,unsigned int y)1631 static stbi__uint16 *stbi__convert_format16(stbi__uint16 *data, int img_n, int req_comp, unsigned int x, unsigned int y)
1632 {
1633    int i,j;
1634    stbi__uint16 *good;
1635 
1636    if (req_comp == img_n) return data;
1637    STBI_ASSERT(req_comp >= 1 && req_comp <= 4);
1638 
1639    good = (stbi__uint16 *) stbi__malloc(req_comp * x * y * 2);
1640    if (good == NULL) {
1641       STBI_FREE(data);
1642       return (stbi__uint16 *) stbi__errpuc("outofmem", "Out of memory");
1643    }
1644 
1645    for (j=0; j < (int) y; ++j) {
1646       stbi__uint16 *src  = data + j * x * img_n   ;
1647       stbi__uint16 *dest = good + j * x * req_comp;
1648 
1649       #define STBI__COMBO(a,b)  ((a)*8+(b))
1650       #define STBI__CASE(a,b)   case STBI__COMBO(a,b): for(i=x-1; i >= 0; --i, src += a, dest += b)
1651       // convert source image with img_n components to one with req_comp components;
1652       // avoid switch per pixel, so use switch per scanline and massive macros
1653       switch (STBI__COMBO(img_n, req_comp)) {
1654          STBI__CASE(1,2) { dest[0]=src[0]; dest[1]=0xffff;                                     } break;
1655          STBI__CASE(1,3) { dest[0]=dest[1]=dest[2]=src[0];                                     } break;
1656          STBI__CASE(1,4) { dest[0]=dest[1]=dest[2]=src[0]; dest[3]=0xffff;                     } break;
1657          STBI__CASE(2,1) { dest[0]=src[0];                                                     } break;
1658          STBI__CASE(2,3) { dest[0]=dest[1]=dest[2]=src[0];                                     } break;
1659          STBI__CASE(2,4) { dest[0]=dest[1]=dest[2]=src[0]; dest[3]=src[1];                     } break;
1660          STBI__CASE(3,4) { dest[0]=src[0];dest[1]=src[1];dest[2]=src[2];dest[3]=0xffff;        } break;
1661          STBI__CASE(3,1) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]);                   } break;
1662          STBI__CASE(3,2) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]); dest[1] = 0xffff; } break;
1663          STBI__CASE(4,1) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]);                   } break;
1664          STBI__CASE(4,2) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]); dest[1] = src[3]; } break;
1665          STBI__CASE(4,3) { dest[0]=src[0];dest[1]=src[1];dest[2]=src[2];                       } break;
1666          default: STBI_ASSERT(0);
1667       }
1668       #undef STBI__CASE
1669    }
1670 
1671    STBI_FREE(data);
1672    return good;
1673 }
1674 
1675 #ifndef STBI_NO_LINEAR
stbi__ldr_to_hdr(stbi_uc * data,int x,int y,int comp)1676 static float   *stbi__ldr_to_hdr(stbi_uc *data, int x, int y, int comp)
1677 {
1678    int i,k,n;
1679    float *output;
1680    if (!data) return NULL;
1681    output = (float *) stbi__malloc_mad4(x, y, comp, sizeof(float), 0);
1682    if (output == NULL) { STBI_FREE(data); return stbi__errpf("outofmem", "Out of memory"); }
1683    // compute number of non-alpha components
1684    if (comp & 1) n = comp; else n = comp-1;
1685    for (i=0; i < x*y; ++i) {
1686       for (k=0; k < n; ++k) {
1687          output[i*comp + k] = (float) (pow(data[i*comp+k]/255.0f, stbi__l2h_gamma) * stbi__l2h_scale);
1688       }
1689    }
1690    if (n < comp) {
1691       for (i=0; i < x*y; ++i) {
1692          output[i*comp + n] = data[i*comp + n]/255.0f;
1693       }
1694    }
1695    STBI_FREE(data);
1696    return output;
1697 }
1698 #endif
1699 
1700 #ifndef STBI_NO_HDR
1701 #define stbi__float2int(x)   ((int) (x))
stbi__hdr_to_ldr(float * data,int x,int y,int comp)1702 static stbi_uc *stbi__hdr_to_ldr(float   *data, int x, int y, int comp)
1703 {
1704    int i,k,n;
1705    stbi_uc *output;
1706    if (!data) return NULL;
1707    output = (stbi_uc *) stbi__malloc_mad3(x, y, comp, 0);
1708    if (output == NULL) { STBI_FREE(data); return stbi__errpuc("outofmem", "Out of memory"); }
1709    // compute number of non-alpha components
1710    if (comp & 1) n = comp; else n = comp-1;
1711    for (i=0; i < x*y; ++i) {
1712       for (k=0; k < n; ++k) {
1713          float z = (float) pow(data[i*comp+k]*stbi__h2l_scale_i, stbi__h2l_gamma_i) * 255 + 0.5f;
1714          if (z < 0) z = 0;
1715          if (z > 255) z = 255;
1716          output[i*comp + k] = (stbi_uc) stbi__float2int(z);
1717       }
1718       if (k < comp) {
1719          float z = data[i*comp+k] * 255 + 0.5f;
1720          if (z < 0) z = 0;
1721          if (z > 255) z = 255;
1722          output[i*comp + k] = (stbi_uc) stbi__float2int(z);
1723       }
1724    }
1725    STBI_FREE(data);
1726    return output;
1727 }
1728 #endif
1729 
1730 //////////////////////////////////////////////////////////////////////////////
1731 //
1732 //  "baseline" JPEG/JFIF decoder
1733 //
1734 //    simple implementation
1735 //      - doesn't support delayed output of y-dimension
1736 //      - simple interface (only one output format: 8-bit interleaved RGB)
1737 //      - doesn't try to recover corrupt jpegs
1738 //      - doesn't allow partial loading, loading multiple at once
1739 //      - still fast on x86 (copying globals into locals doesn't help x86)
1740 //      - allocates lots of intermediate memory (full size of all components)
1741 //        - non-interleaved case requires this anyway
1742 //        - allows good upsampling (see next)
1743 //    high-quality
1744 //      - upsampled channels are bilinearly interpolated, even across blocks
1745 //      - quality integer IDCT derived from IJG's 'slow'
1746 //    performance
1747 //      - fast huffman; reasonable integer IDCT
1748 //      - some SIMD kernels for common paths on targets with SSE2/NEON
1749 //      - uses a lot of intermediate memory, could cache poorly
1750 
1751 #ifndef STBI_NO_JPEG
1752 
1753 // huffman decoding acceleration
1754 #define FAST_BITS   9  // larger handles more cases; smaller stomps less cache
1755 
1756 typedef struct
1757 {
1758    stbi_uc  fast[1 << FAST_BITS];
1759    // weirdly, repacking this into AoS is a 10% speed loss, instead of a win
1760    stbi__uint16 code[256];
1761    stbi_uc  values[256];
1762    stbi_uc  size[257];
1763    unsigned int maxcode[18];
1764    int    delta[17];   // old 'firstsymbol' - old 'firstcode'
1765 } stbi__huffman;
1766 
1767 typedef struct
1768 {
1769    stbi__context *s;
1770    stbi__huffman huff_dc[4];
1771    stbi__huffman huff_ac[4];
1772    stbi__uint16 dequant[4][64];
1773    stbi__int16 fast_ac[4][1 << FAST_BITS];
1774 
1775 // sizes for components, interleaved MCUs
1776    int img_h_max, img_v_max;
1777    int img_mcu_x, img_mcu_y;
1778    int img_mcu_w, img_mcu_h;
1779 
1780 // definition of jpeg image component
1781    struct
1782    {
1783       int id;
1784       int h,v;
1785       int tq;
1786       int hd,ha;
1787       int dc_pred;
1788 
1789       int x,y,w2,h2;
1790       stbi_uc *data;
1791       void *raw_data, *raw_coeff;
1792       stbi_uc *linebuf;
1793       short   *coeff;   // progressive only
1794       int      coeff_w, coeff_h; // number of 8x8 coefficient blocks
1795    } img_comp[4];
1796 
1797    stbi__uint32   code_buffer; // jpeg entropy-coded buffer
1798    int            code_bits;   // number of valid bits
1799    unsigned char  marker;      // marker seen while filling entropy buffer
1800    int            nomore;      // flag if we saw a marker so must stop
1801 
1802    int            progressive;
1803    int            spec_start;
1804    int            spec_end;
1805    int            succ_high;
1806    int            succ_low;
1807    int            eob_run;
1808    int            jfif;
1809    int            app14_color_transform; // Adobe APP14 tag
1810    int            rgb;
1811 
1812    int scan_n, order[4];
1813    int restart_interval, todo;
1814 
1815 // kernels
1816    void (*idct_block_kernel)(stbi_uc *out, int out_stride, short data[64]);
1817    void (*YCbCr_to_RGB_kernel)(stbi_uc *out, const stbi_uc *y, const stbi_uc *pcb, const stbi_uc *pcr, int count, int step);
1818    stbi_uc *(*resample_row_hv_2_kernel)(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs);
1819 } stbi__jpeg;
1820 
stbi__build_huffman(stbi__huffman * h,int * count)1821 static int stbi__build_huffman(stbi__huffman *h, int *count)
1822 {
1823    int i,j,k=0;
1824    unsigned int code;
1825    // build size list for each symbol (from JPEG spec)
1826    for (i=0; i < 16; ++i)
1827       for (j=0; j < count[i]; ++j)
1828          h->size[k++] = (stbi_uc) (i+1);
1829    h->size[k] = 0;
1830 
1831    // compute actual symbols (from jpeg spec)
1832    code = 0;
1833    k = 0;
1834    for(j=1; j <= 16; ++j) {
1835       // compute delta to add to code to compute symbol id
1836       h->delta[j] = k - code;
1837       if (h->size[k] == j) {
1838          while (h->size[k] == j)
1839             h->code[k++] = (stbi__uint16) (code++);
1840          if (code-1 >= (1u << j)) return stbi__err("bad code lengths","Corrupt JPEG");
1841       }
1842       // compute largest code + 1 for this size, preshifted as needed later
1843       h->maxcode[j] = code << (16-j);
1844       code <<= 1;
1845    }
1846    h->maxcode[j] = 0xffffffff;
1847 
1848    // build non-spec acceleration table; 255 is flag for not-accelerated
1849    memset(h->fast, 255, 1 << FAST_BITS);
1850    for (i=0; i < k; ++i) {
1851       int s = h->size[i];
1852       if (s <= FAST_BITS) {
1853          int c = h->code[i] << (FAST_BITS-s);
1854          int m = 1 << (FAST_BITS-s);
1855          for (j=0; j < m; ++j) {
1856             h->fast[c+j] = (stbi_uc) i;
1857          }
1858       }
1859    }
1860    return 1;
1861 }
1862 
1863 // build a table that decodes both magnitude and value of small ACs in
1864 // one go.
stbi__build_fast_ac(stbi__int16 * fast_ac,stbi__huffman * h)1865 static void stbi__build_fast_ac(stbi__int16 *fast_ac, stbi__huffman *h)
1866 {
1867    int i;
1868    for (i=0; i < (1 << FAST_BITS); ++i) {
1869       stbi_uc fast = h->fast[i];
1870       fast_ac[i] = 0;
1871       if (fast < 255) {
1872          int rs = h->values[fast];
1873          int run = (rs >> 4) & 15;
1874          int magbits = rs & 15;
1875          int len = h->size[fast];
1876 
1877          if (magbits && len + magbits <= FAST_BITS) {
1878             // magnitude code followed by receive_extend code
1879             int k = ((i << len) & ((1 << FAST_BITS) - 1)) >> (FAST_BITS - magbits);
1880             int m = 1 << (magbits - 1);
1881             if (k < m) k += (~0U << magbits) + 1;
1882             // if the result is small enough, we can fit it in fast_ac table
1883             if (k >= -128 && k <= 127)
1884                fast_ac[i] = (stbi__int16) ((k * 256) + (run * 16) + (len + magbits));
1885          }
1886       }
1887    }
1888 }
1889 
stbi__grow_buffer_unsafe(stbi__jpeg * j)1890 static void stbi__grow_buffer_unsafe(stbi__jpeg *j)
1891 {
1892    do {
1893       unsigned int b = j->nomore ? 0 : stbi__get8(j->s);
1894       if (b == 0xff) {
1895          int c = stbi__get8(j->s);
1896          while (c == 0xff) c = stbi__get8(j->s); // consume fill bytes
1897          if (c != 0) {
1898             j->marker = (unsigned char) c;
1899             j->nomore = 1;
1900             return;
1901          }
1902       }
1903       j->code_buffer |= b << (24 - j->code_bits);
1904       j->code_bits += 8;
1905    } while (j->code_bits <= 24);
1906 }
1907 
1908 // (1 << n) - 1
1909 static const stbi__uint32 stbi__bmask[17]={0,1,3,7,15,31,63,127,255,511,1023,2047,4095,8191,16383,32767,65535};
1910 
1911 // decode a jpeg huffman value from the bitstream
stbi__jpeg_huff_decode(stbi__jpeg * j,stbi__huffman * h)1912 stbi_inline static int stbi__jpeg_huff_decode(stbi__jpeg *j, stbi__huffman *h)
1913 {
1914    unsigned int temp;
1915    int c,k;
1916 
1917    if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
1918 
1919    // look at the top FAST_BITS and determine what symbol ID it is,
1920    // if the code is <= FAST_BITS
1921    c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1);
1922    k = h->fast[c];
1923    if (k < 255) {
1924       int s = h->size[k];
1925       if (s > j->code_bits)
1926          return -1;
1927       j->code_buffer <<= s;
1928       j->code_bits -= s;
1929       return h->values[k];
1930    }
1931 
1932    // naive test is to shift the code_buffer down so k bits are
1933    // valid, then test against maxcode. To speed this up, we've
1934    // preshifted maxcode left so that it has (16-k) 0s at the
1935    // end; in other words, regardless of the number of bits, it
1936    // wants to be compared against something shifted to have 16;
1937    // that way we don't need to shift inside the loop.
1938    temp = j->code_buffer >> 16;
1939    for (k=FAST_BITS+1 ; ; ++k)
1940       if (temp < h->maxcode[k])
1941          break;
1942    if (k == 17) {
1943       // error! code not found
1944       j->code_bits -= 16;
1945       return -1;
1946    }
1947 
1948    if (k > j->code_bits)
1949       return -1;
1950 
1951    // convert the huffman code to the symbol id
1952    c = ((j->code_buffer >> (32 - k)) & stbi__bmask[k]) + h->delta[k];
1953    STBI_ASSERT((((j->code_buffer) >> (32 - h->size[c])) & stbi__bmask[h->size[c]]) == h->code[c]);
1954 
1955    // convert the id to a symbol
1956    j->code_bits -= k;
1957    j->code_buffer <<= k;
1958    return h->values[c];
1959 }
1960 
1961 // bias[n] = (-1<<n) + 1
1962 static const int stbi__jbias[16] = {0,-1,-3,-7,-15,-31,-63,-127,-255,-511,-1023,-2047,-4095,-8191,-16383,-32767};
1963 
1964 // combined JPEG 'receive' and JPEG 'extend', since baseline
1965 // always extends everything it receives.
stbi__extend_receive(stbi__jpeg * j,int n)1966 stbi_inline static int stbi__extend_receive(stbi__jpeg *j, int n)
1967 {
1968    unsigned int k;
1969    int sgn;
1970    if (j->code_bits < n) stbi__grow_buffer_unsafe(j);
1971 
1972    sgn = (stbi__int32)j->code_buffer >> 31; // sign bit is always in MSB
1973    k = stbi_lrot(j->code_buffer, n);
1974    STBI_ASSERT(n >= 0 && n < (int) (sizeof(stbi__bmask)/sizeof(*stbi__bmask)));
1975    j->code_buffer = k & ~stbi__bmask[n];
1976    k &= stbi__bmask[n];
1977    j->code_bits -= n;
1978    return k + (stbi__jbias[n] & ~sgn);
1979 }
1980 
1981 // get some unsigned bits
stbi__jpeg_get_bits(stbi__jpeg * j,int n)1982 stbi_inline static int stbi__jpeg_get_bits(stbi__jpeg *j, int n)
1983 {
1984    unsigned int k;
1985    if (j->code_bits < n) stbi__grow_buffer_unsafe(j);
1986    k = stbi_lrot(j->code_buffer, n);
1987    j->code_buffer = k & ~stbi__bmask[n];
1988    k &= stbi__bmask[n];
1989    j->code_bits -= n;
1990    return k;
1991 }
1992 
stbi__jpeg_get_bit(stbi__jpeg * j)1993 stbi_inline static int stbi__jpeg_get_bit(stbi__jpeg *j)
1994 {
1995    unsigned int k;
1996    if (j->code_bits < 1) stbi__grow_buffer_unsafe(j);
1997    k = j->code_buffer;
1998    j->code_buffer <<= 1;
1999    --j->code_bits;
2000    return k & 0x80000000;
2001 }
2002 
2003 // given a value that's at position X in the zigzag stream,
2004 // where does it appear in the 8x8 matrix coded as row-major?
2005 static const stbi_uc stbi__jpeg_dezigzag[64+15] =
2006 {
2007     0,  1,  8, 16,  9,  2,  3, 10,
2008    17, 24, 32, 25, 18, 11,  4,  5,
2009    12, 19, 26, 33, 40, 48, 41, 34,
2010    27, 20, 13,  6,  7, 14, 21, 28,
2011    35, 42, 49, 56, 57, 50, 43, 36,
2012    29, 22, 15, 23, 30, 37, 44, 51,
2013    58, 59, 52, 45, 38, 31, 39, 46,
2014    53, 60, 61, 54, 47, 55, 62, 63,
2015    // let corrupt input sample past end
2016    63, 63, 63, 63, 63, 63, 63, 63,
2017    63, 63, 63, 63, 63, 63, 63
2018 };
2019 
2020 // decode one 64-entry block--
stbi__jpeg_decode_block(stbi__jpeg * j,short data[64],stbi__huffman * hdc,stbi__huffman * hac,stbi__int16 * fac,int b,stbi__uint16 * dequant)2021 static int stbi__jpeg_decode_block(stbi__jpeg *j, short data[64], stbi__huffman *hdc, stbi__huffman *hac, stbi__int16 *fac, int b, stbi__uint16 *dequant)
2022 {
2023    int diff,dc,k;
2024    int t;
2025 
2026    if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
2027    t = stbi__jpeg_huff_decode(j, hdc);
2028    if (t < 0) return stbi__err("bad huffman code","Corrupt JPEG");
2029 
2030    // 0 all the ac values now so we can do it 32-bits at a time
2031    memset(data,0,64*sizeof(data[0]));
2032 
2033    diff = t ? stbi__extend_receive(j, t) : 0;
2034    dc = j->img_comp[b].dc_pred + diff;
2035    j->img_comp[b].dc_pred = dc;
2036    data[0] = (short) (dc * dequant[0]);
2037 
2038    // decode AC components, see JPEG spec
2039    k = 1;
2040    do {
2041       unsigned int zig;
2042       int c,r,s;
2043       if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
2044       c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1);
2045       r = fac[c];
2046       if (r) { // fast-AC path
2047          k += (r >> 4) & 15; // run
2048          s = r & 15; // combined length
2049          j->code_buffer <<= s;
2050          j->code_bits -= s;
2051          // decode into unzigzag'd location
2052          zig = stbi__jpeg_dezigzag[k++];
2053          data[zig] = (short) ((r >> 8) * dequant[zig]);
2054       } else {
2055          int rs = stbi__jpeg_huff_decode(j, hac);
2056          if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG");
2057          s = rs & 15;
2058          r = rs >> 4;
2059          if (s == 0) {
2060             if (rs != 0xf0) break; // end block
2061             k += 16;
2062          } else {
2063             k += r;
2064             // decode into unzigzag'd location
2065             zig = stbi__jpeg_dezigzag[k++];
2066             data[zig] = (short) (stbi__extend_receive(j,s) * dequant[zig]);
2067          }
2068       }
2069    } while (k < 64);
2070    return 1;
2071 }
2072 
stbi__jpeg_decode_block_prog_dc(stbi__jpeg * j,short data[64],stbi__huffman * hdc,int b)2073 static int stbi__jpeg_decode_block_prog_dc(stbi__jpeg *j, short data[64], stbi__huffman *hdc, int b)
2074 {
2075    int diff,dc;
2076    int t;
2077    if (j->spec_end != 0) return stbi__err("can't merge dc and ac", "Corrupt JPEG");
2078 
2079    if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
2080 
2081    if (j->succ_high == 0) {
2082       // first scan for DC coefficient, must be first
2083       memset(data,0,64*sizeof(data[0])); // 0 all the ac values now
2084       t = stbi__jpeg_huff_decode(j, hdc);
2085       diff = t ? stbi__extend_receive(j, t) : 0;
2086 
2087       dc = j->img_comp[b].dc_pred + diff;
2088       j->img_comp[b].dc_pred = dc;
2089       data[0] = (short) (dc << j->succ_low);
2090    } else {
2091       // refinement scan for DC coefficient
2092       if (stbi__jpeg_get_bit(j))
2093          data[0] += (short) (1 << j->succ_low);
2094    }
2095    return 1;
2096 }
2097 
2098 // @OPTIMIZE: store non-zigzagged during the decode passes,
2099 // and only de-zigzag when dequantizing
stbi__jpeg_decode_block_prog_ac(stbi__jpeg * j,short data[64],stbi__huffman * hac,stbi__int16 * fac)2100 static int stbi__jpeg_decode_block_prog_ac(stbi__jpeg *j, short data[64], stbi__huffman *hac, stbi__int16 *fac)
2101 {
2102    int k;
2103    if (j->spec_start == 0) return stbi__err("can't merge dc and ac", "Corrupt JPEG");
2104 
2105    if (j->succ_high == 0) {
2106       int shift = j->succ_low;
2107 
2108       if (j->eob_run) {
2109          --j->eob_run;
2110          return 1;
2111       }
2112 
2113       k = j->spec_start;
2114       do {
2115          unsigned int zig;
2116          int c,r,s;
2117          if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
2118          c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1);
2119          r = fac[c];
2120          if (r) { // fast-AC path
2121             k += (r >> 4) & 15; // run
2122             s = r & 15; // combined length
2123             j->code_buffer <<= s;
2124             j->code_bits -= s;
2125             zig = stbi__jpeg_dezigzag[k++];
2126             data[zig] = (short) ((r >> 8) << shift);
2127          } else {
2128             int rs = stbi__jpeg_huff_decode(j, hac);
2129             if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG");
2130             s = rs & 15;
2131             r = rs >> 4;
2132             if (s == 0) {
2133                if (r < 15) {
2134                   j->eob_run = (1 << r);
2135                   if (r)
2136                      j->eob_run += stbi__jpeg_get_bits(j, r);
2137                   --j->eob_run;
2138                   break;
2139                }
2140                k += 16;
2141             } else {
2142                k += r;
2143                zig = stbi__jpeg_dezigzag[k++];
2144                data[zig] = (short) (stbi__extend_receive(j,s) << shift);
2145             }
2146          }
2147       } while (k <= j->spec_end);
2148    } else {
2149       // refinement scan for these AC coefficients
2150 
2151       short bit = (short) (1 << j->succ_low);
2152 
2153       if (j->eob_run) {
2154          --j->eob_run;
2155          for (k = j->spec_start; k <= j->spec_end; ++k) {
2156             short *p = &data[stbi__jpeg_dezigzag[k]];
2157             if (*p != 0)
2158                if (stbi__jpeg_get_bit(j))
2159                   if ((*p & bit)==0) {
2160                      if (*p > 0)
2161                         *p += bit;
2162                      else
2163                         *p -= bit;
2164                   }
2165          }
2166       } else {
2167          k = j->spec_start;
2168          do {
2169             int r,s;
2170             int rs = stbi__jpeg_huff_decode(j, hac); // @OPTIMIZE see if we can use the fast path here, advance-by-r is so slow, eh
2171             if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG");
2172             s = rs & 15;
2173             r = rs >> 4;
2174             if (s == 0) {
2175                if (r < 15) {
2176                   j->eob_run = (1 << r) - 1;
2177                   if (r)
2178                      j->eob_run += stbi__jpeg_get_bits(j, r);
2179                   r = 64; // force end of block
2180                } else {
2181                   // r=15 s=0 should write 16 0s, so we just do
2182                   // a run of 15 0s and then write s (which is 0),
2183                   // so we don't have to do anything special here
2184                }
2185             } else {
2186                if (s != 1) return stbi__err("bad huffman code", "Corrupt JPEG");
2187                // sign bit
2188                if (stbi__jpeg_get_bit(j))
2189                   s = bit;
2190                else
2191                   s = -bit;
2192             }
2193 
2194             // advance by r
2195             while (k <= j->spec_end) {
2196                short *p = &data[stbi__jpeg_dezigzag[k++]];
2197                if (*p != 0) {
2198                   if (stbi__jpeg_get_bit(j))
2199                      if ((*p & bit)==0) {
2200                         if (*p > 0)
2201                            *p += bit;
2202                         else
2203                            *p -= bit;
2204                      }
2205                } else {
2206                   if (r == 0) {
2207                      *p = (short) s;
2208                      break;
2209                   }
2210                   --r;
2211                }
2212             }
2213          } while (k <= j->spec_end);
2214       }
2215    }
2216    return 1;
2217 }
2218 
2219 // take a -128..127 value and stbi__clamp it and convert to 0..255
stbi__clamp(int x)2220 stbi_inline static stbi_uc stbi__clamp(int x)
2221 {
2222    // trick to use a single test to catch both cases
2223    if ((unsigned int) x > 255) {
2224       if (x < 0) return 0;
2225       if (x > 255) return 255;
2226    }
2227    return (stbi_uc) x;
2228 }
2229 
2230 #define stbi__f2f(x)  ((int) (((x) * 4096 + 0.5)))
2231 #define stbi__fsh(x)  ((x) * 4096)
2232 
2233 // derived from jidctint -- DCT_ISLOW
2234 #define STBI__IDCT_1D(s0,s1,s2,s3,s4,s5,s6,s7) \
2235    int t0,t1,t2,t3,p1,p2,p3,p4,p5,x0,x1,x2,x3; \
2236    p2 = s2;                                    \
2237    p3 = s6;                                    \
2238    p1 = (p2+p3) * stbi__f2f(0.5411961f);       \
2239    t2 = p1 + p3*stbi__f2f(-1.847759065f);      \
2240    t3 = p1 + p2*stbi__f2f( 0.765366865f);      \
2241    p2 = s0;                                    \
2242    p3 = s4;                                    \
2243    t0 = stbi__fsh(p2+p3);                      \
2244    t1 = stbi__fsh(p2-p3);                      \
2245    x0 = t0+t3;                                 \
2246    x3 = t0-t3;                                 \
2247    x1 = t1+t2;                                 \
2248    x2 = t1-t2;                                 \
2249    t0 = s7;                                    \
2250    t1 = s5;                                    \
2251    t2 = s3;                                    \
2252    t3 = s1;                                    \
2253    p3 = t0+t2;                                 \
2254    p4 = t1+t3;                                 \
2255    p1 = t0+t3;                                 \
2256    p2 = t1+t2;                                 \
2257    p5 = (p3+p4)*stbi__f2f( 1.175875602f);      \
2258    t0 = t0*stbi__f2f( 0.298631336f);           \
2259    t1 = t1*stbi__f2f( 2.053119869f);           \
2260    t2 = t2*stbi__f2f( 3.072711026f);           \
2261    t3 = t3*stbi__f2f( 1.501321110f);           \
2262    p1 = p5 + p1*stbi__f2f(-0.899976223f);      \
2263    p2 = p5 + p2*stbi__f2f(-2.562915447f);      \
2264    p3 = p3*stbi__f2f(-1.961570560f);           \
2265    p4 = p4*stbi__f2f(-0.390180644f);           \
2266    t3 += p1+p4;                                \
2267    t2 += p2+p3;                                \
2268    t1 += p2+p4;                                \
2269    t0 += p1+p3;
2270 
stbi__idct_block(stbi_uc * out,int out_stride,short data[64])2271 static void stbi__idct_block(stbi_uc *out, int out_stride, short data[64])
2272 {
2273    int i,val[64],*v=val;
2274    stbi_uc *o;
2275    short *d = data;
2276 
2277    // columns
2278    for (i=0; i < 8; ++i,++d, ++v) {
2279       // if all zeroes, shortcut -- this avoids dequantizing 0s and IDCTing
2280       if (d[ 8]==0 && d[16]==0 && d[24]==0 && d[32]==0
2281            && d[40]==0 && d[48]==0 && d[56]==0) {
2282          //    no shortcut                 0     seconds
2283          //    (1|2|3|4|5|6|7)==0          0     seconds
2284          //    all separate               -0.047 seconds
2285          //    1 && 2|3 && 4|5 && 6|7:    -0.047 seconds
2286          int dcterm = d[0]*4;
2287          v[0] = v[8] = v[16] = v[24] = v[32] = v[40] = v[48] = v[56] = dcterm;
2288       } else {
2289          STBI__IDCT_1D(d[ 0],d[ 8],d[16],d[24],d[32],d[40],d[48],d[56])
2290          // constants scaled things up by 1<<12; let's bring them back
2291          // down, but keep 2 extra bits of precision
2292          x0 += 512; x1 += 512; x2 += 512; x3 += 512;
2293          v[ 0] = (x0+t3) >> 10;
2294          v[56] = (x0-t3) >> 10;
2295          v[ 8] = (x1+t2) >> 10;
2296          v[48] = (x1-t2) >> 10;
2297          v[16] = (x2+t1) >> 10;
2298          v[40] = (x2-t1) >> 10;
2299          v[24] = (x3+t0) >> 10;
2300          v[32] = (x3-t0) >> 10;
2301       }
2302    }
2303 
2304    for (i=0, v=val, o=out; i < 8; ++i,v+=8,o+=out_stride) {
2305       // no fast case since the first 1D IDCT spread components out
2306       STBI__IDCT_1D(v[0],v[1],v[2],v[3],v[4],v[5],v[6],v[7])
2307       // constants scaled things up by 1<<12, plus we had 1<<2 from first
2308       // loop, plus horizontal and vertical each scale by sqrt(8) so together
2309       // we've got an extra 1<<3, so 1<<17 total we need to remove.
2310       // so we want to round that, which means adding 0.5 * 1<<17,
2311       // aka 65536. Also, we'll end up with -128 to 127 that we want
2312       // to encode as 0..255 by adding 128, so we'll add that before the shift
2313       x0 += 65536 + (128<<17);
2314       x1 += 65536 + (128<<17);
2315       x2 += 65536 + (128<<17);
2316       x3 += 65536 + (128<<17);
2317       // tried computing the shifts into temps, or'ing the temps to see
2318       // if any were out of range, but that was slower
2319       o[0] = stbi__clamp((x0+t3) >> 17);
2320       o[7] = stbi__clamp((x0-t3) >> 17);
2321       o[1] = stbi__clamp((x1+t2) >> 17);
2322       o[6] = stbi__clamp((x1-t2) >> 17);
2323       o[2] = stbi__clamp((x2+t1) >> 17);
2324       o[5] = stbi__clamp((x2-t1) >> 17);
2325       o[3] = stbi__clamp((x3+t0) >> 17);
2326       o[4] = stbi__clamp((x3-t0) >> 17);
2327    }
2328 }
2329 
2330 #ifdef STBI_SSE2
2331 // sse2 integer IDCT. not the fastest possible implementation but it
2332 // produces bit-identical results to the generic C version so it's
2333 // fully "transparent".
stbi__idct_simd(stbi_uc * out,int out_stride,short data[64])2334 static void stbi__idct_simd(stbi_uc *out, int out_stride, short data[64])
2335 {
2336    // This is constructed to match our regular (generic) integer IDCT exactly.
2337    __m128i row0, row1, row2, row3, row4, row5, row6, row7;
2338    __m128i tmp;
2339 
2340    // dot product constant: even elems=x, odd elems=y
2341    #define dct_const(x,y)  _mm_setr_epi16((x),(y),(x),(y),(x),(y),(x),(y))
2342 
2343    // out(0) = c0[even]*x + c0[odd]*y   (c0, x, y 16-bit, out 32-bit)
2344    // out(1) = c1[even]*x + c1[odd]*y
2345    #define dct_rot(out0,out1, x,y,c0,c1) \
2346       __m128i c0##lo = _mm_unpacklo_epi16((x),(y)); \
2347       __m128i c0##hi = _mm_unpackhi_epi16((x),(y)); \
2348       __m128i out0##_l = _mm_madd_epi16(c0##lo, c0); \
2349       __m128i out0##_h = _mm_madd_epi16(c0##hi, c0); \
2350       __m128i out1##_l = _mm_madd_epi16(c0##lo, c1); \
2351       __m128i out1##_h = _mm_madd_epi16(c0##hi, c1)
2352 
2353    // out = in << 12  (in 16-bit, out 32-bit)
2354    #define dct_widen(out, in) \
2355       __m128i out##_l = _mm_srai_epi32(_mm_unpacklo_epi16(_mm_setzero_si128(), (in)), 4); \
2356       __m128i out##_h = _mm_srai_epi32(_mm_unpackhi_epi16(_mm_setzero_si128(), (in)), 4)
2357 
2358    // wide add
2359    #define dct_wadd(out, a, b) \
2360       __m128i out##_l = _mm_add_epi32(a##_l, b##_l); \
2361       __m128i out##_h = _mm_add_epi32(a##_h, b##_h)
2362 
2363    // wide sub
2364    #define dct_wsub(out, a, b) \
2365       __m128i out##_l = _mm_sub_epi32(a##_l, b##_l); \
2366       __m128i out##_h = _mm_sub_epi32(a##_h, b##_h)
2367 
2368    // butterfly a/b, add bias, then shift by "s" and pack
2369    #define dct_bfly32o(out0, out1, a,b,bias,s) \
2370       { \
2371          __m128i abiased_l = _mm_add_epi32(a##_l, bias); \
2372          __m128i abiased_h = _mm_add_epi32(a##_h, bias); \
2373          dct_wadd(sum, abiased, b); \
2374          dct_wsub(dif, abiased, b); \
2375          out0 = _mm_packs_epi32(_mm_srai_epi32(sum_l, s), _mm_srai_epi32(sum_h, s)); \
2376          out1 = _mm_packs_epi32(_mm_srai_epi32(dif_l, s), _mm_srai_epi32(dif_h, s)); \
2377       }
2378 
2379    // 8-bit interleave step (for transposes)
2380    #define dct_interleave8(a, b) \
2381       tmp = a; \
2382       a = _mm_unpacklo_epi8(a, b); \
2383       b = _mm_unpackhi_epi8(tmp, b)
2384 
2385    // 16-bit interleave step (for transposes)
2386    #define dct_interleave16(a, b) \
2387       tmp = a; \
2388       a = _mm_unpacklo_epi16(a, b); \
2389       b = _mm_unpackhi_epi16(tmp, b)
2390 
2391    #define dct_pass(bias,shift) \
2392       { \
2393          /* even part */ \
2394          dct_rot(t2e,t3e, row2,row6, rot0_0,rot0_1); \
2395          __m128i sum04 = _mm_add_epi16(row0, row4); \
2396          __m128i dif04 = _mm_sub_epi16(row0, row4); \
2397          dct_widen(t0e, sum04); \
2398          dct_widen(t1e, dif04); \
2399          dct_wadd(x0, t0e, t3e); \
2400          dct_wsub(x3, t0e, t3e); \
2401          dct_wadd(x1, t1e, t2e); \
2402          dct_wsub(x2, t1e, t2e); \
2403          /* odd part */ \
2404          dct_rot(y0o,y2o, row7,row3, rot2_0,rot2_1); \
2405          dct_rot(y1o,y3o, row5,row1, rot3_0,rot3_1); \
2406          __m128i sum17 = _mm_add_epi16(row1, row7); \
2407          __m128i sum35 = _mm_add_epi16(row3, row5); \
2408          dct_rot(y4o,y5o, sum17,sum35, rot1_0,rot1_1); \
2409          dct_wadd(x4, y0o, y4o); \
2410          dct_wadd(x5, y1o, y5o); \
2411          dct_wadd(x6, y2o, y5o); \
2412          dct_wadd(x7, y3o, y4o); \
2413          dct_bfly32o(row0,row7, x0,x7,bias,shift); \
2414          dct_bfly32o(row1,row6, x1,x6,bias,shift); \
2415          dct_bfly32o(row2,row5, x2,x5,bias,shift); \
2416          dct_bfly32o(row3,row4, x3,x4,bias,shift); \
2417       }
2418 
2419    __m128i rot0_0 = dct_const(stbi__f2f(0.5411961f), stbi__f2f(0.5411961f) + stbi__f2f(-1.847759065f));
2420    __m128i rot0_1 = dct_const(stbi__f2f(0.5411961f) + stbi__f2f( 0.765366865f), stbi__f2f(0.5411961f));
2421    __m128i rot1_0 = dct_const(stbi__f2f(1.175875602f) + stbi__f2f(-0.899976223f), stbi__f2f(1.175875602f));
2422    __m128i rot1_1 = dct_const(stbi__f2f(1.175875602f), stbi__f2f(1.175875602f) + stbi__f2f(-2.562915447f));
2423    __m128i rot2_0 = dct_const(stbi__f2f(-1.961570560f) + stbi__f2f( 0.298631336f), stbi__f2f(-1.961570560f));
2424    __m128i rot2_1 = dct_const(stbi__f2f(-1.961570560f), stbi__f2f(-1.961570560f) + stbi__f2f( 3.072711026f));
2425    __m128i rot3_0 = dct_const(stbi__f2f(-0.390180644f) + stbi__f2f( 2.053119869f), stbi__f2f(-0.390180644f));
2426    __m128i rot3_1 = dct_const(stbi__f2f(-0.390180644f), stbi__f2f(-0.390180644f) + stbi__f2f( 1.501321110f));
2427 
2428    // rounding biases in column/row passes, see stbi__idct_block for explanation.
2429    __m128i bias_0 = _mm_set1_epi32(512);
2430    __m128i bias_1 = _mm_set1_epi32(65536 + (128<<17));
2431 
2432    // load
2433    row0 = _mm_load_si128((const __m128i *) (data + 0*8));
2434    row1 = _mm_load_si128((const __m128i *) (data + 1*8));
2435    row2 = _mm_load_si128((const __m128i *) (data + 2*8));
2436    row3 = _mm_load_si128((const __m128i *) (data + 3*8));
2437    row4 = _mm_load_si128((const __m128i *) (data + 4*8));
2438    row5 = _mm_load_si128((const __m128i *) (data + 5*8));
2439    row6 = _mm_load_si128((const __m128i *) (data + 6*8));
2440    row7 = _mm_load_si128((const __m128i *) (data + 7*8));
2441 
2442    // column pass
2443    dct_pass(bias_0, 10);
2444 
2445    {
2446       // 16bit 8x8 transpose pass 1
2447       dct_interleave16(row0, row4);
2448       dct_interleave16(row1, row5);
2449       dct_interleave16(row2, row6);
2450       dct_interleave16(row3, row7);
2451 
2452       // transpose pass 2
2453       dct_interleave16(row0, row2);
2454       dct_interleave16(row1, row3);
2455       dct_interleave16(row4, row6);
2456       dct_interleave16(row5, row7);
2457 
2458       // transpose pass 3
2459       dct_interleave16(row0, row1);
2460       dct_interleave16(row2, row3);
2461       dct_interleave16(row4, row5);
2462       dct_interleave16(row6, row7);
2463    }
2464 
2465    // row pass
2466    dct_pass(bias_1, 17);
2467 
2468    {
2469       // pack
2470       __m128i p0 = _mm_packus_epi16(row0, row1); // a0a1a2a3...a7b0b1b2b3...b7
2471       __m128i p1 = _mm_packus_epi16(row2, row3);
2472       __m128i p2 = _mm_packus_epi16(row4, row5);
2473       __m128i p3 = _mm_packus_epi16(row6, row7);
2474 
2475       // 8bit 8x8 transpose pass 1
2476       dct_interleave8(p0, p2); // a0e0a1e1...
2477       dct_interleave8(p1, p3); // c0g0c1g1...
2478 
2479       // transpose pass 2
2480       dct_interleave8(p0, p1); // a0c0e0g0...
2481       dct_interleave8(p2, p3); // b0d0f0h0...
2482 
2483       // transpose pass 3
2484       dct_interleave8(p0, p2); // a0b0c0d0...
2485       dct_interleave8(p1, p3); // a4b4c4d4...
2486 
2487       // store
2488       _mm_storel_epi64((__m128i *) out, p0); out += out_stride;
2489       _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p0, 0x4e)); out += out_stride;
2490       _mm_storel_epi64((__m128i *) out, p2); out += out_stride;
2491       _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p2, 0x4e)); out += out_stride;
2492       _mm_storel_epi64((__m128i *) out, p1); out += out_stride;
2493       _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p1, 0x4e)); out += out_stride;
2494       _mm_storel_epi64((__m128i *) out, p3); out += out_stride;
2495       _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p3, 0x4e));
2496    }
2497 
2498 #undef dct_const
2499 #undef dct_rot
2500 #undef dct_widen
2501 #undef dct_wadd
2502 #undef dct_wsub
2503 #undef dct_bfly32o
2504 #undef dct_interleave8
2505 #undef dct_interleave16
2506 #undef dct_pass
2507 }
2508 
2509 #endif // STBI_SSE2
2510 
2511 #ifdef STBI_NEON
2512 
2513 // NEON integer IDCT. should produce bit-identical
2514 // results to the generic C version.
stbi__idct_simd(stbi_uc * out,int out_stride,short data[64])2515 static void stbi__idct_simd(stbi_uc *out, int out_stride, short data[64])
2516 {
2517    int16x8_t row0, row1, row2, row3, row4, row5, row6, row7;
2518 
2519    int16x4_t rot0_0 = vdup_n_s16(stbi__f2f(0.5411961f));
2520    int16x4_t rot0_1 = vdup_n_s16(stbi__f2f(-1.847759065f));
2521    int16x4_t rot0_2 = vdup_n_s16(stbi__f2f( 0.765366865f));
2522    int16x4_t rot1_0 = vdup_n_s16(stbi__f2f( 1.175875602f));
2523    int16x4_t rot1_1 = vdup_n_s16(stbi__f2f(-0.899976223f));
2524    int16x4_t rot1_2 = vdup_n_s16(stbi__f2f(-2.562915447f));
2525    int16x4_t rot2_0 = vdup_n_s16(stbi__f2f(-1.961570560f));
2526    int16x4_t rot2_1 = vdup_n_s16(stbi__f2f(-0.390180644f));
2527    int16x4_t rot3_0 = vdup_n_s16(stbi__f2f( 0.298631336f));
2528    int16x4_t rot3_1 = vdup_n_s16(stbi__f2f( 2.053119869f));
2529    int16x4_t rot3_2 = vdup_n_s16(stbi__f2f( 3.072711026f));
2530    int16x4_t rot3_3 = vdup_n_s16(stbi__f2f( 1.501321110f));
2531 
2532 #define dct_long_mul(out, inq, coeff) \
2533    int32x4_t out##_l = vmull_s16(vget_low_s16(inq), coeff); \
2534    int32x4_t out##_h = vmull_s16(vget_high_s16(inq), coeff)
2535 
2536 #define dct_long_mac(out, acc, inq, coeff) \
2537    int32x4_t out##_l = vmlal_s16(acc##_l, vget_low_s16(inq), coeff); \
2538    int32x4_t out##_h = vmlal_s16(acc##_h, vget_high_s16(inq), coeff)
2539 
2540 #define dct_widen(out, inq) \
2541    int32x4_t out##_l = vshll_n_s16(vget_low_s16(inq), 12); \
2542    int32x4_t out##_h = vshll_n_s16(vget_high_s16(inq), 12)
2543 
2544 // wide add
2545 #define dct_wadd(out, a, b) \
2546    int32x4_t out##_l = vaddq_s32(a##_l, b##_l); \
2547    int32x4_t out##_h = vaddq_s32(a##_h, b##_h)
2548 
2549 // wide sub
2550 #define dct_wsub(out, a, b) \
2551    int32x4_t out##_l = vsubq_s32(a##_l, b##_l); \
2552    int32x4_t out##_h = vsubq_s32(a##_h, b##_h)
2553 
2554 // butterfly a/b, then shift using "shiftop" by "s" and pack
2555 #define dct_bfly32o(out0,out1, a,b,shiftop,s) \
2556    { \
2557       dct_wadd(sum, a, b); \
2558       dct_wsub(dif, a, b); \
2559       out0 = vcombine_s16(shiftop(sum_l, s), shiftop(sum_h, s)); \
2560       out1 = vcombine_s16(shiftop(dif_l, s), shiftop(dif_h, s)); \
2561    }
2562 
2563 #define dct_pass(shiftop, shift) \
2564    { \
2565       /* even part */ \
2566       int16x8_t sum26 = vaddq_s16(row2, row6); \
2567       dct_long_mul(p1e, sum26, rot0_0); \
2568       dct_long_mac(t2e, p1e, row6, rot0_1); \
2569       dct_long_mac(t3e, p1e, row2, rot0_2); \
2570       int16x8_t sum04 = vaddq_s16(row0, row4); \
2571       int16x8_t dif04 = vsubq_s16(row0, row4); \
2572       dct_widen(t0e, sum04); \
2573       dct_widen(t1e, dif04); \
2574       dct_wadd(x0, t0e, t3e); \
2575       dct_wsub(x3, t0e, t3e); \
2576       dct_wadd(x1, t1e, t2e); \
2577       dct_wsub(x2, t1e, t2e); \
2578       /* odd part */ \
2579       int16x8_t sum15 = vaddq_s16(row1, row5); \
2580       int16x8_t sum17 = vaddq_s16(row1, row7); \
2581       int16x8_t sum35 = vaddq_s16(row3, row5); \
2582       int16x8_t sum37 = vaddq_s16(row3, row7); \
2583       int16x8_t sumodd = vaddq_s16(sum17, sum35); \
2584       dct_long_mul(p5o, sumodd, rot1_0); \
2585       dct_long_mac(p1o, p5o, sum17, rot1_1); \
2586       dct_long_mac(p2o, p5o, sum35, rot1_2); \
2587       dct_long_mul(p3o, sum37, rot2_0); \
2588       dct_long_mul(p4o, sum15, rot2_1); \
2589       dct_wadd(sump13o, p1o, p3o); \
2590       dct_wadd(sump24o, p2o, p4o); \
2591       dct_wadd(sump23o, p2o, p3o); \
2592       dct_wadd(sump14o, p1o, p4o); \
2593       dct_long_mac(x4, sump13o, row7, rot3_0); \
2594       dct_long_mac(x5, sump24o, row5, rot3_1); \
2595       dct_long_mac(x6, sump23o, row3, rot3_2); \
2596       dct_long_mac(x7, sump14o, row1, rot3_3); \
2597       dct_bfly32o(row0,row7, x0,x7,shiftop,shift); \
2598       dct_bfly32o(row1,row6, x1,x6,shiftop,shift); \
2599       dct_bfly32o(row2,row5, x2,x5,shiftop,shift); \
2600       dct_bfly32o(row3,row4, x3,x4,shiftop,shift); \
2601    }
2602 
2603    // load
2604    row0 = vld1q_s16(data + 0*8);
2605    row1 = vld1q_s16(data + 1*8);
2606    row2 = vld1q_s16(data + 2*8);
2607    row3 = vld1q_s16(data + 3*8);
2608    row4 = vld1q_s16(data + 4*8);
2609    row5 = vld1q_s16(data + 5*8);
2610    row6 = vld1q_s16(data + 6*8);
2611    row7 = vld1q_s16(data + 7*8);
2612 
2613    // add DC bias
2614    row0 = vaddq_s16(row0, vsetq_lane_s16(1024, vdupq_n_s16(0), 0));
2615 
2616    // column pass
2617    dct_pass(vrshrn_n_s32, 10);
2618 
2619    // 16bit 8x8 transpose
2620    {
2621 // these three map to a single VTRN.16, VTRN.32, and VSWP, respectively.
2622 // whether compilers actually get this is another story, sadly.
2623 #define dct_trn16(x, y) { int16x8x2_t t = vtrnq_s16(x, y); x = t.val[0]; y = t.val[1]; }
2624 #define dct_trn32(x, y) { int32x4x2_t t = vtrnq_s32(vreinterpretq_s32_s16(x), vreinterpretq_s32_s16(y)); x = vreinterpretq_s16_s32(t.val[0]); y = vreinterpretq_s16_s32(t.val[1]); }
2625 #define dct_trn64(x, y) { int16x8_t x0 = x; int16x8_t y0 = y; x = vcombine_s16(vget_low_s16(x0), vget_low_s16(y0)); y = vcombine_s16(vget_high_s16(x0), vget_high_s16(y0)); }
2626 
2627       // pass 1
2628       dct_trn16(row0, row1); // a0b0a2b2a4b4a6b6
2629       dct_trn16(row2, row3);
2630       dct_trn16(row4, row5);
2631       dct_trn16(row6, row7);
2632 
2633       // pass 2
2634       dct_trn32(row0, row2); // a0b0c0d0a4b4c4d4
2635       dct_trn32(row1, row3);
2636       dct_trn32(row4, row6);
2637       dct_trn32(row5, row7);
2638 
2639       // pass 3
2640       dct_trn64(row0, row4); // a0b0c0d0e0f0g0h0
2641       dct_trn64(row1, row5);
2642       dct_trn64(row2, row6);
2643       dct_trn64(row3, row7);
2644 
2645 #undef dct_trn16
2646 #undef dct_trn32
2647 #undef dct_trn64
2648    }
2649 
2650    // row pass
2651    // vrshrn_n_s32 only supports shifts up to 16, we need
2652    // 17. so do a non-rounding shift of 16 first then follow
2653    // up with a rounding shift by 1.
2654    dct_pass(vshrn_n_s32, 16);
2655 
2656    {
2657       // pack and round
2658       uint8x8_t p0 = vqrshrun_n_s16(row0, 1);
2659       uint8x8_t p1 = vqrshrun_n_s16(row1, 1);
2660       uint8x8_t p2 = vqrshrun_n_s16(row2, 1);
2661       uint8x8_t p3 = vqrshrun_n_s16(row3, 1);
2662       uint8x8_t p4 = vqrshrun_n_s16(row4, 1);
2663       uint8x8_t p5 = vqrshrun_n_s16(row5, 1);
2664       uint8x8_t p6 = vqrshrun_n_s16(row6, 1);
2665       uint8x8_t p7 = vqrshrun_n_s16(row7, 1);
2666 
2667       // again, these can translate into one instruction, but often don't.
2668 #define dct_trn8_8(x, y) { uint8x8x2_t t = vtrn_u8(x, y); x = t.val[0]; y = t.val[1]; }
2669 #define dct_trn8_16(x, y) { uint16x4x2_t t = vtrn_u16(vreinterpret_u16_u8(x), vreinterpret_u16_u8(y)); x = vreinterpret_u8_u16(t.val[0]); y = vreinterpret_u8_u16(t.val[1]); }
2670 #define dct_trn8_32(x, y) { uint32x2x2_t t = vtrn_u32(vreinterpret_u32_u8(x), vreinterpret_u32_u8(y)); x = vreinterpret_u8_u32(t.val[0]); y = vreinterpret_u8_u32(t.val[1]); }
2671 
2672       // sadly can't use interleaved stores here since we only write
2673       // 8 bytes to each scan line!
2674 
2675       // 8x8 8-bit transpose pass 1
2676       dct_trn8_8(p0, p1);
2677       dct_trn8_8(p2, p3);
2678       dct_trn8_8(p4, p5);
2679       dct_trn8_8(p6, p7);
2680 
2681       // pass 2
2682       dct_trn8_16(p0, p2);
2683       dct_trn8_16(p1, p3);
2684       dct_trn8_16(p4, p6);
2685       dct_trn8_16(p5, p7);
2686 
2687       // pass 3
2688       dct_trn8_32(p0, p4);
2689       dct_trn8_32(p1, p5);
2690       dct_trn8_32(p2, p6);
2691       dct_trn8_32(p3, p7);
2692 
2693       // store
2694       vst1_u8(out, p0); out += out_stride;
2695       vst1_u8(out, p1); out += out_stride;
2696       vst1_u8(out, p2); out += out_stride;
2697       vst1_u8(out, p3); out += out_stride;
2698       vst1_u8(out, p4); out += out_stride;
2699       vst1_u8(out, p5); out += out_stride;
2700       vst1_u8(out, p6); out += out_stride;
2701       vst1_u8(out, p7);
2702 
2703 #undef dct_trn8_8
2704 #undef dct_trn8_16
2705 #undef dct_trn8_32
2706    }
2707 
2708 #undef dct_long_mul
2709 #undef dct_long_mac
2710 #undef dct_widen
2711 #undef dct_wadd
2712 #undef dct_wsub
2713 #undef dct_bfly32o
2714 #undef dct_pass
2715 }
2716 
2717 #endif // STBI_NEON
2718 
2719 #define STBI__MARKER_none  0xff
2720 // if there's a pending marker from the entropy stream, return that
2721 // otherwise, fetch from the stream and get a marker. if there's no
2722 // marker, return 0xff, which is never a valid marker value
stbi__get_marker(stbi__jpeg * j)2723 static stbi_uc stbi__get_marker(stbi__jpeg *j)
2724 {
2725    stbi_uc x;
2726    if (j->marker != STBI__MARKER_none) { x = j->marker; j->marker = STBI__MARKER_none; return x; }
2727    x = stbi__get8(j->s);
2728    if (x != 0xff) return STBI__MARKER_none;
2729    while (x == 0xff)
2730       x = stbi__get8(j->s); // consume repeated 0xff fill bytes
2731    return x;
2732 }
2733 
2734 // in each scan, we'll have scan_n components, and the order
2735 // of the components is specified by order[]
2736 #define STBI__RESTART(x)     ((x) >= 0xd0 && (x) <= 0xd7)
2737 
2738 // after a restart interval, stbi__jpeg_reset the entropy decoder and
2739 // the dc prediction
stbi__jpeg_reset(stbi__jpeg * j)2740 static void stbi__jpeg_reset(stbi__jpeg *j)
2741 {
2742    j->code_bits = 0;
2743    j->code_buffer = 0;
2744    j->nomore = 0;
2745    j->img_comp[0].dc_pred = j->img_comp[1].dc_pred = j->img_comp[2].dc_pred = j->img_comp[3].dc_pred = 0;
2746    j->marker = STBI__MARKER_none;
2747    j->todo = j->restart_interval ? j->restart_interval : 0x7fffffff;
2748    j->eob_run = 0;
2749    // no more than 1<<31 MCUs if no restart_interal? that's plenty safe,
2750    // since we don't even allow 1<<30 pixels
2751 }
2752 
stbi__parse_entropy_coded_data(stbi__jpeg * z)2753 static int stbi__parse_entropy_coded_data(stbi__jpeg *z)
2754 {
2755    stbi__jpeg_reset(z);
2756    if (!z->progressive) {
2757       if (z->scan_n == 1) {
2758          int i,j;
2759          STBI_SIMD_ALIGN(short, data[64]);
2760          int n = z->order[0];
2761          // non-interleaved data, we just need to process one block at a time,
2762          // in trivial scanline order
2763          // number of blocks to do just depends on how many actual "pixels" this
2764          // component has, independent of interleaved MCU blocking and such
2765          int w = (z->img_comp[n].x+7) >> 3;
2766          int h = (z->img_comp[n].y+7) >> 3;
2767          for (j=0; j < h; ++j) {
2768             for (i=0; i < w; ++i) {
2769                int ha = z->img_comp[n].ha;
2770                if (!stbi__jpeg_decode_block(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+ha, z->fast_ac[ha], n, z->dequant[z->img_comp[n].tq])) return 0;
2771                z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*j*8+i*8, z->img_comp[n].w2, data);
2772                // every data block is an MCU, so countdown the restart interval
2773                if (--z->todo <= 0) {
2774                   if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
2775                   // if it's NOT a restart, then just bail, so we get corrupt data
2776                   // rather than no data
2777                   if (!STBI__RESTART(z->marker)) return 1;
2778                   stbi__jpeg_reset(z);
2779                }
2780             }
2781          }
2782          return 1;
2783       } else { // interleaved
2784          int i,j,k,x,y;
2785          STBI_SIMD_ALIGN(short, data[64]);
2786          for (j=0; j < z->img_mcu_y; ++j) {
2787             for (i=0; i < z->img_mcu_x; ++i) {
2788                // scan an interleaved mcu... process scan_n components in order
2789                for (k=0; k < z->scan_n; ++k) {
2790                   int n = z->order[k];
2791                   // scan out an mcu's worth of this component; that's just determined
2792                   // by the basic H and V specified for the component
2793                   for (y=0; y < z->img_comp[n].v; ++y) {
2794                      for (x=0; x < z->img_comp[n].h; ++x) {
2795                         int x2 = (i*z->img_comp[n].h + x)*8;
2796                         int y2 = (j*z->img_comp[n].v + y)*8;
2797                         int ha = z->img_comp[n].ha;
2798                         if (!stbi__jpeg_decode_block(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+ha, z->fast_ac[ha], n, z->dequant[z->img_comp[n].tq])) return 0;
2799                         z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*y2+x2, z->img_comp[n].w2, data);
2800                      }
2801                   }
2802                }
2803                // after all interleaved components, that's an interleaved MCU,
2804                // so now count down the restart interval
2805                if (--z->todo <= 0) {
2806                   if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
2807                   if (!STBI__RESTART(z->marker)) return 1;
2808                   stbi__jpeg_reset(z);
2809                }
2810             }
2811          }
2812          return 1;
2813       }
2814    } else {
2815       if (z->scan_n == 1) {
2816          int i,j;
2817          int n = z->order[0];
2818          // non-interleaved data, we just need to process one block at a time,
2819          // in trivial scanline order
2820          // number of blocks to do just depends on how many actual "pixels" this
2821          // component has, independent of interleaved MCU blocking and such
2822          int w = (z->img_comp[n].x+7) >> 3;
2823          int h = (z->img_comp[n].y+7) >> 3;
2824          for (j=0; j < h; ++j) {
2825             for (i=0; i < w; ++i) {
2826                short *data = z->img_comp[n].coeff + 64 * (i + j * z->img_comp[n].coeff_w);
2827                if (z->spec_start == 0) {
2828                   if (!stbi__jpeg_decode_block_prog_dc(z, data, &z->huff_dc[z->img_comp[n].hd], n))
2829                      return 0;
2830                } else {
2831                   int ha = z->img_comp[n].ha;
2832                   if (!stbi__jpeg_decode_block_prog_ac(z, data, &z->huff_ac[ha], z->fast_ac[ha]))
2833                      return 0;
2834                }
2835                // every data block is an MCU, so countdown the restart interval
2836                if (--z->todo <= 0) {
2837                   if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
2838                   if (!STBI__RESTART(z->marker)) return 1;
2839                   stbi__jpeg_reset(z);
2840                }
2841             }
2842          }
2843          return 1;
2844       } else { // interleaved
2845          int i,j,k,x,y;
2846          for (j=0; j < z->img_mcu_y; ++j) {
2847             for (i=0; i < z->img_mcu_x; ++i) {
2848                // scan an interleaved mcu... process scan_n components in order
2849                for (k=0; k < z->scan_n; ++k) {
2850                   int n = z->order[k];
2851                   // scan out an mcu's worth of this component; that's just determined
2852                   // by the basic H and V specified for the component
2853                   for (y=0; y < z->img_comp[n].v; ++y) {
2854                      for (x=0; x < z->img_comp[n].h; ++x) {
2855                         int x2 = (i*z->img_comp[n].h + x);
2856                         int y2 = (j*z->img_comp[n].v + y);
2857                         short *data = z->img_comp[n].coeff + 64 * (x2 + y2 * z->img_comp[n].coeff_w);
2858                         if (!stbi__jpeg_decode_block_prog_dc(z, data, &z->huff_dc[z->img_comp[n].hd], n))
2859                            return 0;
2860                      }
2861                   }
2862                }
2863                // after all interleaved components, that's an interleaved MCU,
2864                // so now count down the restart interval
2865                if (--z->todo <= 0) {
2866                   if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
2867                   if (!STBI__RESTART(z->marker)) return 1;
2868                   stbi__jpeg_reset(z);
2869                }
2870             }
2871          }
2872          return 1;
2873       }
2874    }
2875 }
2876 
stbi__jpeg_dequantize(short * data,stbi__uint16 * dequant)2877 static void stbi__jpeg_dequantize(short *data, stbi__uint16 *dequant)
2878 {
2879    int i;
2880    for (i=0; i < 64; ++i)
2881       data[i] *= dequant[i];
2882 }
2883 
stbi__jpeg_finish(stbi__jpeg * z)2884 static void stbi__jpeg_finish(stbi__jpeg *z)
2885 {
2886    if (z->progressive) {
2887       // dequantize and idct the data
2888       int i,j,n;
2889       for (n=0; n < z->s->img_n; ++n) {
2890          int w = (z->img_comp[n].x+7) >> 3;
2891          int h = (z->img_comp[n].y+7) >> 3;
2892          for (j=0; j < h; ++j) {
2893             for (i=0; i < w; ++i) {
2894                short *data = z->img_comp[n].coeff + 64 * (i + j * z->img_comp[n].coeff_w);
2895                stbi__jpeg_dequantize(data, z->dequant[z->img_comp[n].tq]);
2896                z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*j*8+i*8, z->img_comp[n].w2, data);
2897             }
2898          }
2899       }
2900    }
2901 }
2902 
stbi__process_marker(stbi__jpeg * z,int m)2903 static int stbi__process_marker(stbi__jpeg *z, int m)
2904 {
2905    int L;
2906    switch (m) {
2907       case STBI__MARKER_none: // no marker found
2908          return stbi__err("expected marker","Corrupt JPEG");
2909 
2910       case 0xDD: // DRI - specify restart interval
2911          if (stbi__get16be(z->s) != 4) return stbi__err("bad DRI len","Corrupt JPEG");
2912          z->restart_interval = stbi__get16be(z->s);
2913          return 1;
2914 
2915       case 0xDB: // DQT - define quantization table
2916          L = stbi__get16be(z->s)-2;
2917          while (L > 0) {
2918             int q = stbi__get8(z->s);
2919             int p = q >> 4, sixteen = (p != 0);
2920             int t = q & 15,i;
2921             if (p != 0 && p != 1) return stbi__err("bad DQT type","Corrupt JPEG");
2922             if (t > 3) return stbi__err("bad DQT table","Corrupt JPEG");
2923 
2924             for (i=0; i < 64; ++i)
2925                z->dequant[t][stbi__jpeg_dezigzag[i]] = (stbi__uint16)(sixteen ? stbi__get16be(z->s) : stbi__get8(z->s));
2926             L -= (sixteen ? 129 : 65);
2927          }
2928          return L==0;
2929 
2930       case 0xC4: // DHT - define huffman table
2931          L = stbi__get16be(z->s)-2;
2932          while (L > 0) {
2933             stbi_uc *v;
2934             int sizes[16],i,n=0;
2935             int q = stbi__get8(z->s);
2936             int tc = q >> 4;
2937             int th = q & 15;
2938             if (tc > 1 || th > 3) return stbi__err("bad DHT header","Corrupt JPEG");
2939             for (i=0; i < 16; ++i) {
2940                sizes[i] = stbi__get8(z->s);
2941                n += sizes[i];
2942             }
2943             L -= 17;
2944             if (tc == 0) {
2945                if (!stbi__build_huffman(z->huff_dc+th, sizes)) return 0;
2946                v = z->huff_dc[th].values;
2947             } else {
2948                if (!stbi__build_huffman(z->huff_ac+th, sizes)) return 0;
2949                v = z->huff_ac[th].values;
2950             }
2951             for (i=0; i < n; ++i)
2952                v[i] = stbi__get8(z->s);
2953             if (tc != 0)
2954                stbi__build_fast_ac(z->fast_ac[th], z->huff_ac + th);
2955             L -= n;
2956          }
2957          return L==0;
2958    }
2959 
2960    // check for comment block or APP blocks
2961    if ((m >= 0xE0 && m <= 0xEF) || m == 0xFE) {
2962       L = stbi__get16be(z->s);
2963       if (L < 2) {
2964          if (m == 0xFE)
2965             return stbi__err("bad COM len","Corrupt JPEG");
2966          else
2967             return stbi__err("bad APP len","Corrupt JPEG");
2968       }
2969       L -= 2;
2970 
2971       if (m == 0xE0 && L >= 5) { // JFIF APP0 segment
2972          static const unsigned char tag[5] = {'J','F','I','F','\0'};
2973          int ok = 1;
2974          int i;
2975          for (i=0; i < 5; ++i)
2976             if (stbi__get8(z->s) != tag[i])
2977                ok = 0;
2978          L -= 5;
2979          if (ok)
2980             z->jfif = 1;
2981       } else if (m == 0xEE && L >= 12) { // Adobe APP14 segment
2982          static const unsigned char tag[6] = {'A','d','o','b','e','\0'};
2983          int ok = 1;
2984          int i;
2985          for (i=0; i < 6; ++i)
2986             if (stbi__get8(z->s) != tag[i])
2987                ok = 0;
2988          L -= 6;
2989          if (ok) {
2990             stbi__get8(z->s); // version
2991             stbi__get16be(z->s); // flags0
2992             stbi__get16be(z->s); // flags1
2993             z->app14_color_transform = stbi__get8(z->s); // color transform
2994             L -= 6;
2995          }
2996       }
2997 
2998       stbi__skip(z->s, L);
2999       return 1;
3000    }
3001 
3002    return stbi__err("unknown marker","Corrupt JPEG");
3003 }
3004 
3005 // after we see SOS
stbi__process_scan_header(stbi__jpeg * z)3006 static int stbi__process_scan_header(stbi__jpeg *z)
3007 {
3008    int i;
3009    int Ls = stbi__get16be(z->s);
3010    z->scan_n = stbi__get8(z->s);
3011    if (z->scan_n < 1 || z->scan_n > 4 || z->scan_n > (int) z->s->img_n) return stbi__err("bad SOS component count","Corrupt JPEG");
3012    if (Ls != 6+2*z->scan_n) return stbi__err("bad SOS len","Corrupt JPEG");
3013    for (i=0; i < z->scan_n; ++i) {
3014       int id = stbi__get8(z->s), which;
3015       int q = stbi__get8(z->s);
3016       for (which = 0; which < z->s->img_n; ++which)
3017          if (z->img_comp[which].id == id)
3018             break;
3019       if (which == z->s->img_n) return 0; // no match
3020       z->img_comp[which].hd = q >> 4;   if (z->img_comp[which].hd > 3) return stbi__err("bad DC huff","Corrupt JPEG");
3021       z->img_comp[which].ha = q & 15;   if (z->img_comp[which].ha > 3) return stbi__err("bad AC huff","Corrupt JPEG");
3022       z->order[i] = which;
3023    }
3024 
3025    {
3026       int aa;
3027       z->spec_start = stbi__get8(z->s);
3028       z->spec_end   = stbi__get8(z->s); // should be 63, but might be 0
3029       aa = stbi__get8(z->s);
3030       z->succ_high = (aa >> 4);
3031       z->succ_low  = (aa & 15);
3032       if (z->progressive) {
3033          if (z->spec_start > 63 || z->spec_end > 63  || z->spec_start > z->spec_end || z->succ_high > 13 || z->succ_low > 13)
3034             return stbi__err("bad SOS", "Corrupt JPEG");
3035       } else {
3036          if (z->spec_start != 0) return stbi__err("bad SOS","Corrupt JPEG");
3037          if (z->succ_high != 0 || z->succ_low != 0) return stbi__err("bad SOS","Corrupt JPEG");
3038          z->spec_end = 63;
3039       }
3040    }
3041 
3042    return 1;
3043 }
3044 
stbi__free_jpeg_components(stbi__jpeg * z,int ncomp,int why)3045 static int stbi__free_jpeg_components(stbi__jpeg *z, int ncomp, int why)
3046 {
3047    int i;
3048    for (i=0; i < ncomp; ++i) {
3049       if (z->img_comp[i].raw_data) {
3050          STBI_FREE(z->img_comp[i].raw_data);
3051          z->img_comp[i].raw_data = NULL;
3052          z->img_comp[i].data = NULL;
3053       }
3054       if (z->img_comp[i].raw_coeff) {
3055          STBI_FREE(z->img_comp[i].raw_coeff);
3056          z->img_comp[i].raw_coeff = 0;
3057          z->img_comp[i].coeff = 0;
3058       }
3059       if (z->img_comp[i].linebuf) {
3060          STBI_FREE(z->img_comp[i].linebuf);
3061          z->img_comp[i].linebuf = NULL;
3062       }
3063    }
3064    return why;
3065 }
3066 
stbi__process_frame_header(stbi__jpeg * z,int scan)3067 static int stbi__process_frame_header(stbi__jpeg *z, int scan)
3068 {
3069    stbi__context *s = z->s;
3070    int Lf,p,i,q, h_max=1,v_max=1,c;
3071    Lf = stbi__get16be(s);         if (Lf < 11) return stbi__err("bad SOF len","Corrupt JPEG"); // JPEG
3072    p  = stbi__get8(s);            if (p != 8) return stbi__err("only 8-bit","JPEG format not supported: 8-bit only"); // JPEG baseline
3073    s->img_y = stbi__get16be(s);   if (s->img_y == 0) return stbi__err("no header height", "JPEG format not supported: delayed height"); // Legal, but we don't handle it--but neither does IJG
3074    s->img_x = stbi__get16be(s);   if (s->img_x == 0) return stbi__err("0 width","Corrupt JPEG"); // JPEG requires
3075    c = stbi__get8(s);
3076    if (c != 3 && c != 1 && c != 4) return stbi__err("bad component count","Corrupt JPEG");
3077    s->img_n = c;
3078    for (i=0; i < c; ++i) {
3079       z->img_comp[i].data = NULL;
3080       z->img_comp[i].linebuf = NULL;
3081    }
3082 
3083    if (Lf != 8+3*s->img_n) return stbi__err("bad SOF len","Corrupt JPEG");
3084 
3085    z->rgb = 0;
3086    for (i=0; i < s->img_n; ++i) {
3087       static const unsigned char rgb[3] = { 'R', 'G', 'B' };
3088       z->img_comp[i].id = stbi__get8(s);
3089       if (s->img_n == 3 && z->img_comp[i].id == rgb[i])
3090          ++z->rgb;
3091       q = stbi__get8(s);
3092       z->img_comp[i].h = (q >> 4);  if (!z->img_comp[i].h || z->img_comp[i].h > 4) return stbi__err("bad H","Corrupt JPEG");
3093       z->img_comp[i].v = q & 15;    if (!z->img_comp[i].v || z->img_comp[i].v > 4) return stbi__err("bad V","Corrupt JPEG");
3094       z->img_comp[i].tq = stbi__get8(s);  if (z->img_comp[i].tq > 3) return stbi__err("bad TQ","Corrupt JPEG");
3095    }
3096 
3097    if (scan != STBI__SCAN_load) return 1;
3098 
3099    if (!stbi__mad3sizes_valid(s->img_x, s->img_y, s->img_n, 0)) return stbi__err("too large", "Image too large to decode");
3100 
3101    for (i=0; i < s->img_n; ++i) {
3102       if (z->img_comp[i].h > h_max) h_max = z->img_comp[i].h;
3103       if (z->img_comp[i].v > v_max) v_max = z->img_comp[i].v;
3104    }
3105 
3106    // compute interleaved mcu info
3107    z->img_h_max = h_max;
3108    z->img_v_max = v_max;
3109    z->img_mcu_w = h_max * 8;
3110    z->img_mcu_h = v_max * 8;
3111    // these sizes can't be more than 17 bits
3112    z->img_mcu_x = (s->img_x + z->img_mcu_w-1) / z->img_mcu_w;
3113    z->img_mcu_y = (s->img_y + z->img_mcu_h-1) / z->img_mcu_h;
3114 
3115    for (i=0; i < s->img_n; ++i) {
3116       // number of effective pixels (e.g. for non-interleaved MCU)
3117       z->img_comp[i].x = (s->img_x * z->img_comp[i].h + h_max-1) / h_max;
3118       z->img_comp[i].y = (s->img_y * z->img_comp[i].v + v_max-1) / v_max;
3119       // to simplify generation, we'll allocate enough memory to decode
3120       // the bogus oversized data from using interleaved MCUs and their
3121       // big blocks (e.g. a 16x16 iMCU on an image of width 33); we won't
3122       // discard the extra data until colorspace conversion
3123       //
3124       // img_mcu_x, img_mcu_y: <=17 bits; comp[i].h and .v are <=4 (checked earlier)
3125       // so these muls can't overflow with 32-bit ints (which we require)
3126       z->img_comp[i].w2 = z->img_mcu_x * z->img_comp[i].h * 8;
3127       z->img_comp[i].h2 = z->img_mcu_y * z->img_comp[i].v * 8;
3128       z->img_comp[i].coeff = 0;
3129       z->img_comp[i].raw_coeff = 0;
3130       z->img_comp[i].linebuf = NULL;
3131       z->img_comp[i].raw_data = stbi__malloc_mad2(z->img_comp[i].w2, z->img_comp[i].h2, 15);
3132       if (z->img_comp[i].raw_data == NULL)
3133          return stbi__free_jpeg_components(z, i+1, stbi__err("outofmem", "Out of memory"));
3134       // align blocks for idct using mmx/sse
3135       z->img_comp[i].data = (stbi_uc*) (((size_t) z->img_comp[i].raw_data + 15) & ~15);
3136       if (z->progressive) {
3137          // w2, h2 are multiples of 8 (see above)
3138          z->img_comp[i].coeff_w = z->img_comp[i].w2 / 8;
3139          z->img_comp[i].coeff_h = z->img_comp[i].h2 / 8;
3140          z->img_comp[i].raw_coeff = stbi__malloc_mad3(z->img_comp[i].w2, z->img_comp[i].h2, sizeof(short), 15);
3141          if (z->img_comp[i].raw_coeff == NULL)
3142             return stbi__free_jpeg_components(z, i+1, stbi__err("outofmem", "Out of memory"));
3143          z->img_comp[i].coeff = (short*) (((size_t) z->img_comp[i].raw_coeff + 15) & ~15);
3144       }
3145    }
3146 
3147    return 1;
3148 }
3149 
3150 // use comparisons since in some cases we handle more than one case (e.g. SOF)
3151 #define stbi__DNL(x)         ((x) == 0xdc)
3152 #define stbi__SOI(x)         ((x) == 0xd8)
3153 #define stbi__EOI(x)         ((x) == 0xd9)
3154 #define stbi__SOF(x)         ((x) == 0xc0 || (x) == 0xc1 || (x) == 0xc2)
3155 #define stbi__SOS(x)         ((x) == 0xda)
3156 
3157 #define stbi__SOF_progressive(x)   ((x) == 0xc2)
3158 
stbi__decode_jpeg_header(stbi__jpeg * z,int scan)3159 static int stbi__decode_jpeg_header(stbi__jpeg *z, int scan)
3160 {
3161    int m;
3162    z->jfif = 0;
3163    z->app14_color_transform = -1; // valid values are 0,1,2
3164    z->marker = STBI__MARKER_none; // initialize cached marker to empty
3165    m = stbi__get_marker(z);
3166    if (!stbi__SOI(m)) return stbi__err("no SOI","Corrupt JPEG");
3167    if (scan == STBI__SCAN_type) return 1;
3168    m = stbi__get_marker(z);
3169    while (!stbi__SOF(m)) {
3170       if (!stbi__process_marker(z,m)) return 0;
3171       m = stbi__get_marker(z);
3172       while (m == STBI__MARKER_none) {
3173          // some files have extra padding after their blocks, so ok, we'll scan
3174          if (stbi__at_eof(z->s)) return stbi__err("no SOF", "Corrupt JPEG");
3175          m = stbi__get_marker(z);
3176       }
3177    }
3178    z->progressive = stbi__SOF_progressive(m);
3179    if (!stbi__process_frame_header(z, scan)) return 0;
3180    return 1;
3181 }
3182 
3183 // decode image to YCbCr format
stbi__decode_jpeg_image(stbi__jpeg * j)3184 static int stbi__decode_jpeg_image(stbi__jpeg *j)
3185 {
3186    int m;
3187    for (m = 0; m < 4; m++) {
3188       j->img_comp[m].raw_data = NULL;
3189       j->img_comp[m].raw_coeff = NULL;
3190    }
3191    j->restart_interval = 0;
3192    if (!stbi__decode_jpeg_header(j, STBI__SCAN_load)) return 0;
3193    m = stbi__get_marker(j);
3194    while (!stbi__EOI(m)) {
3195       if (stbi__SOS(m)) {
3196          if (!stbi__process_scan_header(j)) return 0;
3197          if (!stbi__parse_entropy_coded_data(j)) return 0;
3198          if (j->marker == STBI__MARKER_none ) {
3199             // handle 0s at the end of image data from IP Kamera 9060
3200             while (!stbi__at_eof(j->s)) {
3201                int x = stbi__get8(j->s);
3202                if (x == 255) {
3203                   j->marker = stbi__get8(j->s);
3204                   break;
3205                }
3206             }
3207             // if we reach eof without hitting a marker, stbi__get_marker() below will fail and we'll eventually return 0
3208          }
3209       } else if (stbi__DNL(m)) {
3210          int Ld = stbi__get16be(j->s);
3211          stbi__uint32 NL = stbi__get16be(j->s);
3212          if (Ld != 4) return stbi__err("bad DNL len", "Corrupt JPEG");
3213          if (NL != j->s->img_y) return stbi__err("bad DNL height", "Corrupt JPEG");
3214       } else {
3215          if (!stbi__process_marker(j, m)) return 0;
3216       }
3217       m = stbi__get_marker(j);
3218    }
3219    if (j->progressive)
3220       stbi__jpeg_finish(j);
3221    return 1;
3222 }
3223 
3224 // static jfif-centered resampling (across block boundaries)
3225 
3226 typedef stbi_uc *(*resample_row_func)(stbi_uc *out, stbi_uc *in0, stbi_uc *in1,
3227                                     int w, int hs);
3228 
3229 #define stbi__div4(x) ((stbi_uc) ((x) >> 2))
3230 
resample_row_1(stbi_uc * out,stbi_uc * in_near,stbi_uc * in_far,int w,int hs)3231 static stbi_uc *resample_row_1(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
3232 {
3233    STBI_NOTUSED(out);
3234    STBI_NOTUSED(in_far);
3235    STBI_NOTUSED(w);
3236    STBI_NOTUSED(hs);
3237    return in_near;
3238 }
3239 
stbi__resample_row_v_2(stbi_uc * out,stbi_uc * in_near,stbi_uc * in_far,int w,int hs)3240 static stbi_uc* stbi__resample_row_v_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
3241 {
3242    // need to generate two samples vertically for every one in input
3243    int i;
3244    STBI_NOTUSED(hs);
3245    for (i=0; i < w; ++i)
3246       out[i] = stbi__div4(3*in_near[i] + in_far[i] + 2);
3247    return out;
3248 }
3249 
stbi__resample_row_h_2(stbi_uc * out,stbi_uc * in_near,stbi_uc * in_far,int w,int hs)3250 static stbi_uc*  stbi__resample_row_h_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
3251 {
3252    // need to generate two samples horizontally for every one in input
3253    int i;
3254    stbi_uc *input = in_near;
3255 
3256    if (w == 1) {
3257       // if only one sample, can't do any interpolation
3258       out[0] = out[1] = input[0];
3259       return out;
3260    }
3261 
3262    out[0] = input[0];
3263    out[1] = stbi__div4(input[0]*3 + input[1] + 2);
3264    for (i=1; i < w-1; ++i) {
3265       int n = 3*input[i]+2;
3266       out[i*2+0] = stbi__div4(n+input[i-1]);
3267       out[i*2+1] = stbi__div4(n+input[i+1]);
3268    }
3269    out[i*2+0] = stbi__div4(input[w-2]*3 + input[w-1] + 2);
3270    out[i*2+1] = input[w-1];
3271 
3272    STBI_NOTUSED(in_far);
3273    STBI_NOTUSED(hs);
3274 
3275    return out;
3276 }
3277 
3278 #define stbi__div16(x) ((stbi_uc) ((x) >> 4))
3279 
stbi__resample_row_hv_2(stbi_uc * out,stbi_uc * in_near,stbi_uc * in_far,int w,int hs)3280 static stbi_uc *stbi__resample_row_hv_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
3281 {
3282    // need to generate 2x2 samples for every one in input
3283    int i,t0,t1;
3284    if (w == 1) {
3285       out[0] = out[1] = stbi__div4(3*in_near[0] + in_far[0] + 2);
3286       return out;
3287    }
3288 
3289    t1 = 3*in_near[0] + in_far[0];
3290    out[0] = stbi__div4(t1+2);
3291    for (i=1; i < w; ++i) {
3292       t0 = t1;
3293       t1 = 3*in_near[i]+in_far[i];
3294       out[i*2-1] = stbi__div16(3*t0 + t1 + 8);
3295       out[i*2  ] = stbi__div16(3*t1 + t0 + 8);
3296    }
3297    out[w*2-1] = stbi__div4(t1+2);
3298 
3299    STBI_NOTUSED(hs);
3300 
3301    return out;
3302 }
3303 
3304 #if defined(STBI_SSE2) || defined(STBI_NEON)
stbi__resample_row_hv_2_simd(stbi_uc * out,stbi_uc * in_near,stbi_uc * in_far,int w,int hs)3305 static stbi_uc *stbi__resample_row_hv_2_simd(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
3306 {
3307    // need to generate 2x2 samples for every one in input
3308    int i=0,t0,t1;
3309 
3310    if (w == 1) {
3311       out[0] = out[1] = stbi__div4(3*in_near[0] + in_far[0] + 2);
3312       return out;
3313    }
3314 
3315    t1 = 3*in_near[0] + in_far[0];
3316    // process groups of 8 pixels for as long as we can.
3317    // note we can't handle the last pixel in a row in this loop
3318    // because we need to handle the filter boundary conditions.
3319    for (; i < ((w-1) & ~7); i += 8) {
3320 #if defined(STBI_SSE2)
3321       // load and perform the vertical filtering pass
3322       // this uses 3*x + y = 4*x + (y - x)
3323       __m128i zero  = _mm_setzero_si128();
3324       __m128i farb  = _mm_loadl_epi64((__m128i *) (in_far + i));
3325       __m128i nearb = _mm_loadl_epi64((__m128i *) (in_near + i));
3326       __m128i farw  = _mm_unpacklo_epi8(farb, zero);
3327       __m128i nearw = _mm_unpacklo_epi8(nearb, zero);
3328       __m128i diff  = _mm_sub_epi16(farw, nearw);
3329       __m128i nears = _mm_slli_epi16(nearw, 2);
3330       __m128i curr  = _mm_add_epi16(nears, diff); // current row
3331 
3332       // horizontal filter works the same based on shifted vers of current
3333       // row. "prev" is current row shifted right by 1 pixel; we need to
3334       // insert the previous pixel value (from t1).
3335       // "next" is current row shifted left by 1 pixel, with first pixel
3336       // of next block of 8 pixels added in.
3337       __m128i prv0 = _mm_slli_si128(curr, 2);
3338       __m128i nxt0 = _mm_srli_si128(curr, 2);
3339       __m128i prev = _mm_insert_epi16(prv0, t1, 0);
3340       __m128i next = _mm_insert_epi16(nxt0, 3*in_near[i+8] + in_far[i+8], 7);
3341 
3342       // horizontal filter, polyphase implementation since it's convenient:
3343       // even pixels = 3*cur + prev = cur*4 + (prev - cur)
3344       // odd  pixels = 3*cur + next = cur*4 + (next - cur)
3345       // note the shared term.
3346       __m128i bias  = _mm_set1_epi16(8);
3347       __m128i curs = _mm_slli_epi16(curr, 2);
3348       __m128i prvd = _mm_sub_epi16(prev, curr);
3349       __m128i nxtd = _mm_sub_epi16(next, curr);
3350       __m128i curb = _mm_add_epi16(curs, bias);
3351       __m128i even = _mm_add_epi16(prvd, curb);
3352       __m128i odd  = _mm_add_epi16(nxtd, curb);
3353 
3354       // interleave even and odd pixels, then undo scaling.
3355       __m128i int0 = _mm_unpacklo_epi16(even, odd);
3356       __m128i int1 = _mm_unpackhi_epi16(even, odd);
3357       __m128i de0  = _mm_srli_epi16(int0, 4);
3358       __m128i de1  = _mm_srli_epi16(int1, 4);
3359 
3360       // pack and write output
3361       __m128i outv = _mm_packus_epi16(de0, de1);
3362       _mm_storeu_si128((__m128i *) (out + i*2), outv);
3363 #elif defined(STBI_NEON)
3364       // load and perform the vertical filtering pass
3365       // this uses 3*x + y = 4*x + (y - x)
3366       uint8x8_t farb  = vld1_u8(in_far + i);
3367       uint8x8_t nearb = vld1_u8(in_near + i);
3368       int16x8_t diff  = vreinterpretq_s16_u16(vsubl_u8(farb, nearb));
3369       int16x8_t nears = vreinterpretq_s16_u16(vshll_n_u8(nearb, 2));
3370       int16x8_t curr  = vaddq_s16(nears, diff); // current row
3371 
3372       // horizontal filter works the same based on shifted vers of current
3373       // row. "prev" is current row shifted right by 1 pixel; we need to
3374       // insert the previous pixel value (from t1).
3375       // "next" is current row shifted left by 1 pixel, with first pixel
3376       // of next block of 8 pixels added in.
3377       int16x8_t prv0 = vextq_s16(curr, curr, 7);
3378       int16x8_t nxt0 = vextq_s16(curr, curr, 1);
3379       int16x8_t prev = vsetq_lane_s16(t1, prv0, 0);
3380       int16x8_t next = vsetq_lane_s16(3*in_near[i+8] + in_far[i+8], nxt0, 7);
3381 
3382       // horizontal filter, polyphase implementation since it's convenient:
3383       // even pixels = 3*cur + prev = cur*4 + (prev - cur)
3384       // odd  pixels = 3*cur + next = cur*4 + (next - cur)
3385       // note the shared term.
3386       int16x8_t curs = vshlq_n_s16(curr, 2);
3387       int16x8_t prvd = vsubq_s16(prev, curr);
3388       int16x8_t nxtd = vsubq_s16(next, curr);
3389       int16x8_t even = vaddq_s16(curs, prvd);
3390       int16x8_t odd  = vaddq_s16(curs, nxtd);
3391 
3392       // undo scaling and round, then store with even/odd phases interleaved
3393       uint8x8x2_t o;
3394       o.val[0] = vqrshrun_n_s16(even, 4);
3395       o.val[1] = vqrshrun_n_s16(odd,  4);
3396       vst2_u8(out + i*2, o);
3397 #endif
3398 
3399       // "previous" value for next iter
3400       t1 = 3*in_near[i+7] + in_far[i+7];
3401    }
3402 
3403    t0 = t1;
3404    t1 = 3*in_near[i] + in_far[i];
3405    out[i*2] = stbi__div16(3*t1 + t0 + 8);
3406 
3407    for (++i; i < w; ++i) {
3408       t0 = t1;
3409       t1 = 3*in_near[i]+in_far[i];
3410       out[i*2-1] = stbi__div16(3*t0 + t1 + 8);
3411       out[i*2  ] = stbi__div16(3*t1 + t0 + 8);
3412    }
3413    out[w*2-1] = stbi__div4(t1+2);
3414 
3415    STBI_NOTUSED(hs);
3416 
3417    return out;
3418 }
3419 #endif
3420 
stbi__resample_row_generic(stbi_uc * out,stbi_uc * in_near,stbi_uc * in_far,int w,int hs)3421 static stbi_uc *stbi__resample_row_generic(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
3422 {
3423    // resample with nearest-neighbor
3424    int i,j;
3425    STBI_NOTUSED(in_far);
3426    for (i=0; i < w; ++i)
3427       for (j=0; j < hs; ++j)
3428          out[i*hs+j] = in_near[i];
3429    return out;
3430 }
3431 
3432 // this is a reduced-precision calculation of YCbCr-to-RGB introduced
3433 // to make sure the code produces the same results in both SIMD and scalar
3434 #define stbi__float2fixed(x)  (((int) ((x) * 4096.0f + 0.5f)) << 8)
stbi__YCbCr_to_RGB_row(stbi_uc * out,const stbi_uc * y,const stbi_uc * pcb,const stbi_uc * pcr,int count,int step)3435 static void stbi__YCbCr_to_RGB_row(stbi_uc *out, const stbi_uc *y, const stbi_uc *pcb, const stbi_uc *pcr, int count, int step)
3436 {
3437    int i;
3438    for (i=0; i < count; ++i) {
3439       int y_fixed = (y[i] << 20) + (1<<19); // rounding
3440       int r,g,b;
3441       int cr = pcr[i] - 128;
3442       int cb = pcb[i] - 128;
3443       r = y_fixed +  cr* stbi__float2fixed(1.40200f);
3444       g = y_fixed + (cr*-stbi__float2fixed(0.71414f)) + ((cb*-stbi__float2fixed(0.34414f)) & 0xffff0000);
3445       b = y_fixed                                     +   cb* stbi__float2fixed(1.77200f);
3446       r >>= 20;
3447       g >>= 20;
3448       b >>= 20;
3449       if ((unsigned) r > 255) { if (r < 0) r = 0; else r = 255; }
3450       if ((unsigned) g > 255) { if (g < 0) g = 0; else g = 255; }
3451       if ((unsigned) b > 255) { if (b < 0) b = 0; else b = 255; }
3452       out[0] = (stbi_uc)r;
3453       out[1] = (stbi_uc)g;
3454       out[2] = (stbi_uc)b;
3455       out[3] = 255;
3456       out += step;
3457    }
3458 }
3459 
3460 #if defined(STBI_SSE2) || defined(STBI_NEON)
stbi__YCbCr_to_RGB_simd(stbi_uc * out,stbi_uc const * y,stbi_uc const * pcb,stbi_uc const * pcr,int count,int step)3461 static void stbi__YCbCr_to_RGB_simd(stbi_uc *out, stbi_uc const *y, stbi_uc const *pcb, stbi_uc const *pcr, int count, int step)
3462 {
3463    int i = 0;
3464 
3465 #ifdef STBI_SSE2
3466    // step == 3 is pretty ugly on the final interleave, and i'm not convinced
3467    // it's useful in practice (you wouldn't use it for textures, for example).
3468    // so just accelerate step == 4 case.
3469    if (step == 4) {
3470       // this is a fairly straightforward implementation and not super-optimized.
3471       __m128i signflip  = _mm_set1_epi8(-0x80);
3472       __m128i cr_const0 = _mm_set1_epi16(   (short) ( 1.40200f*4096.0f+0.5f));
3473       __m128i cr_const1 = _mm_set1_epi16( - (short) ( 0.71414f*4096.0f+0.5f));
3474       __m128i cb_const0 = _mm_set1_epi16( - (short) ( 0.34414f*4096.0f+0.5f));
3475       __m128i cb_const1 = _mm_set1_epi16(   (short) ( 1.77200f*4096.0f+0.5f));
3476       __m128i y_bias = _mm_set1_epi8((char) (unsigned char) 128);
3477       __m128i xw = _mm_set1_epi16(255); // alpha channel
3478 
3479       for (; i+7 < count; i += 8) {
3480          // load
3481          __m128i y_bytes = _mm_loadl_epi64((__m128i *) (y+i));
3482          __m128i cr_bytes = _mm_loadl_epi64((__m128i *) (pcr+i));
3483          __m128i cb_bytes = _mm_loadl_epi64((__m128i *) (pcb+i));
3484          __m128i cr_biased = _mm_xor_si128(cr_bytes, signflip); // -128
3485          __m128i cb_biased = _mm_xor_si128(cb_bytes, signflip); // -128
3486 
3487          // unpack to short (and left-shift cr, cb by 8)
3488          __m128i yw  = _mm_unpacklo_epi8(y_bias, y_bytes);
3489          __m128i crw = _mm_unpacklo_epi8(_mm_setzero_si128(), cr_biased);
3490          __m128i cbw = _mm_unpacklo_epi8(_mm_setzero_si128(), cb_biased);
3491 
3492          // color transform
3493          __m128i yws = _mm_srli_epi16(yw, 4);
3494          __m128i cr0 = _mm_mulhi_epi16(cr_const0, crw);
3495          __m128i cb0 = _mm_mulhi_epi16(cb_const0, cbw);
3496          __m128i cb1 = _mm_mulhi_epi16(cbw, cb_const1);
3497          __m128i cr1 = _mm_mulhi_epi16(crw, cr_const1);
3498          __m128i rws = _mm_add_epi16(cr0, yws);
3499          __m128i gwt = _mm_add_epi16(cb0, yws);
3500          __m128i bws = _mm_add_epi16(yws, cb1);
3501          __m128i gws = _mm_add_epi16(gwt, cr1);
3502 
3503          // descale
3504          __m128i rw = _mm_srai_epi16(rws, 4);
3505          __m128i bw = _mm_srai_epi16(bws, 4);
3506          __m128i gw = _mm_srai_epi16(gws, 4);
3507 
3508          // back to byte, set up for transpose
3509          __m128i brb = _mm_packus_epi16(rw, bw);
3510          __m128i gxb = _mm_packus_epi16(gw, xw);
3511 
3512          // transpose to interleave channels
3513          __m128i t0 = _mm_unpacklo_epi8(brb, gxb);
3514          __m128i t1 = _mm_unpackhi_epi8(brb, gxb);
3515          __m128i o0 = _mm_unpacklo_epi16(t0, t1);
3516          __m128i o1 = _mm_unpackhi_epi16(t0, t1);
3517 
3518          // store
3519          _mm_storeu_si128((__m128i *) (out + 0), o0);
3520          _mm_storeu_si128((__m128i *) (out + 16), o1);
3521          out += 32;
3522       }
3523    }
3524 #endif
3525 
3526 #ifdef STBI_NEON
3527    // in this version, step=3 support would be easy to add. but is there demand?
3528    if (step == 4) {
3529       // this is a fairly straightforward implementation and not super-optimized.
3530       uint8x8_t signflip = vdup_n_u8(0x80);
3531       int16x8_t cr_const0 = vdupq_n_s16(   (short) ( 1.40200f*4096.0f+0.5f));
3532       int16x8_t cr_const1 = vdupq_n_s16( - (short) ( 0.71414f*4096.0f+0.5f));
3533       int16x8_t cb_const0 = vdupq_n_s16( - (short) ( 0.34414f*4096.0f+0.5f));
3534       int16x8_t cb_const1 = vdupq_n_s16(   (short) ( 1.77200f*4096.0f+0.5f));
3535 
3536       for (; i+7 < count; i += 8) {
3537          // load
3538          uint8x8_t y_bytes  = vld1_u8(y + i);
3539          uint8x8_t cr_bytes = vld1_u8(pcr + i);
3540          uint8x8_t cb_bytes = vld1_u8(pcb + i);
3541          int8x8_t cr_biased = vreinterpret_s8_u8(vsub_u8(cr_bytes, signflip));
3542          int8x8_t cb_biased = vreinterpret_s8_u8(vsub_u8(cb_bytes, signflip));
3543 
3544          // expand to s16
3545          int16x8_t yws = vreinterpretq_s16_u16(vshll_n_u8(y_bytes, 4));
3546          int16x8_t crw = vshll_n_s8(cr_biased, 7);
3547          int16x8_t cbw = vshll_n_s8(cb_biased, 7);
3548 
3549          // color transform
3550          int16x8_t cr0 = vqdmulhq_s16(crw, cr_const0);
3551          int16x8_t cb0 = vqdmulhq_s16(cbw, cb_const0);
3552          int16x8_t cr1 = vqdmulhq_s16(crw, cr_const1);
3553          int16x8_t cb1 = vqdmulhq_s16(cbw, cb_const1);
3554          int16x8_t rws = vaddq_s16(yws, cr0);
3555          int16x8_t gws = vaddq_s16(vaddq_s16(yws, cb0), cr1);
3556          int16x8_t bws = vaddq_s16(yws, cb1);
3557 
3558          // undo scaling, round, convert to byte
3559          uint8x8x4_t o;
3560          o.val[0] = vqrshrun_n_s16(rws, 4);
3561          o.val[1] = vqrshrun_n_s16(gws, 4);
3562          o.val[2] = vqrshrun_n_s16(bws, 4);
3563          o.val[3] = vdup_n_u8(255);
3564 
3565          // store, interleaving r/g/b/a
3566          vst4_u8(out, o);
3567          out += 8*4;
3568       }
3569    }
3570 #endif
3571 
3572    for (; i < count; ++i) {
3573       int y_fixed = (y[i] << 20) + (1<<19); // rounding
3574       int r,g,b;
3575       int cr = pcr[i] - 128;
3576       int cb = pcb[i] - 128;
3577       r = y_fixed + cr* stbi__float2fixed(1.40200f);
3578       g = y_fixed + cr*-stbi__float2fixed(0.71414f) + ((cb*-stbi__float2fixed(0.34414f)) & 0xffff0000);
3579       b = y_fixed                                   +   cb* stbi__float2fixed(1.77200f);
3580       r >>= 20;
3581       g >>= 20;
3582       b >>= 20;
3583       if ((unsigned) r > 255) { if (r < 0) r = 0; else r = 255; }
3584       if ((unsigned) g > 255) { if (g < 0) g = 0; else g = 255; }
3585       if ((unsigned) b > 255) { if (b < 0) b = 0; else b = 255; }
3586       out[0] = (stbi_uc)r;
3587       out[1] = (stbi_uc)g;
3588       out[2] = (stbi_uc)b;
3589       out[3] = 255;
3590       out += step;
3591    }
3592 }
3593 #endif
3594 
3595 // set up the kernels
stbi__setup_jpeg(stbi__jpeg * j)3596 static void stbi__setup_jpeg(stbi__jpeg *j)
3597 {
3598    j->idct_block_kernel = stbi__idct_block;
3599    j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_row;
3600    j->resample_row_hv_2_kernel = stbi__resample_row_hv_2;
3601 
3602 #ifdef STBI_SSE2
3603    if (stbi__sse2_available()) {
3604       j->idct_block_kernel = stbi__idct_simd;
3605       j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_simd;
3606       j->resample_row_hv_2_kernel = stbi__resample_row_hv_2_simd;
3607    }
3608 #endif
3609 
3610 #ifdef STBI_NEON
3611    j->idct_block_kernel = stbi__idct_simd;
3612    j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_simd;
3613    j->resample_row_hv_2_kernel = stbi__resample_row_hv_2_simd;
3614 #endif
3615 }
3616 
3617 // clean up the temporary component buffers
stbi__cleanup_jpeg(stbi__jpeg * j)3618 static void stbi__cleanup_jpeg(stbi__jpeg *j)
3619 {
3620    stbi__free_jpeg_components(j, j->s->img_n, 0);
3621 }
3622 
3623 typedef struct
3624 {
3625    resample_row_func resample;
3626    stbi_uc *line0,*line1;
3627    int hs,vs;   // expansion factor in each axis
3628    int w_lores; // horizontal pixels pre-expansion
3629    int ystep;   // how far through vertical expansion we are
3630    int ypos;    // which pre-expansion row we're on
3631 } stbi__resample;
3632 
3633 // fast 0..255 * 0..255 => 0..255 rounded multiplication
stbi__blinn_8x8(stbi_uc x,stbi_uc y)3634 static stbi_uc stbi__blinn_8x8(stbi_uc x, stbi_uc y)
3635 {
3636    unsigned int t = x*y + 128;
3637    return (stbi_uc) ((t + (t >>8)) >> 8);
3638 }
3639 
load_jpeg_image(stbi__jpeg * z,int * out_x,int * out_y,int * comp,int req_comp)3640 static stbi_uc *load_jpeg_image(stbi__jpeg *z, int *out_x, int *out_y, int *comp, int req_comp)
3641 {
3642    int n, decode_n, is_rgb;
3643    z->s->img_n = 0; // make stbi__cleanup_jpeg safe
3644 
3645    // validate req_comp
3646    if (req_comp < 0 || req_comp > 4) return stbi__errpuc("bad req_comp", "Internal error");
3647 
3648    // load a jpeg image from whichever source, but leave in YCbCr format
3649    if (!stbi__decode_jpeg_image(z)) { stbi__cleanup_jpeg(z); return NULL; }
3650 
3651    // determine actual number of components to generate
3652    n = req_comp ? req_comp : z->s->img_n >= 3 ? 3 : 1;
3653 
3654    is_rgb = z->s->img_n == 3 && (z->rgb == 3 || (z->app14_color_transform == 0 && !z->jfif));
3655 
3656    if (z->s->img_n == 3 && n < 3 && !is_rgb)
3657       decode_n = 1;
3658    else
3659       decode_n = z->s->img_n;
3660 
3661    // resample and color-convert
3662    {
3663       int k;
3664       unsigned int i,j;
3665       stbi_uc *output;
3666       stbi_uc *coutput[4] = { NULL, NULL, NULL, NULL };
3667 
3668       stbi__resample res_comp[4];
3669 
3670       for (k=0; k < decode_n; ++k) {
3671          stbi__resample *r = &res_comp[k];
3672 
3673          // allocate line buffer big enough for upsampling off the edges
3674          // with upsample factor of 4
3675          z->img_comp[k].linebuf = (stbi_uc *) stbi__malloc(z->s->img_x + 3);
3676          if (!z->img_comp[k].linebuf) { stbi__cleanup_jpeg(z); return stbi__errpuc("outofmem", "Out of memory"); }
3677 
3678          r->hs      = z->img_h_max / z->img_comp[k].h;
3679          r->vs      = z->img_v_max / z->img_comp[k].v;
3680          r->ystep   = r->vs >> 1;
3681          r->w_lores = (z->s->img_x + r->hs-1) / r->hs;
3682          r->ypos    = 0;
3683          r->line0   = r->line1 = z->img_comp[k].data;
3684 
3685          if      (r->hs == 1 && r->vs == 1) r->resample = resample_row_1;
3686          else if (r->hs == 1 && r->vs == 2) r->resample = stbi__resample_row_v_2;
3687          else if (r->hs == 2 && r->vs == 1) r->resample = stbi__resample_row_h_2;
3688          else if (r->hs == 2 && r->vs == 2) r->resample = z->resample_row_hv_2_kernel;
3689          else                               r->resample = stbi__resample_row_generic;
3690       }
3691 
3692       // can't error after this so, this is safe
3693       output = (stbi_uc *) stbi__malloc_mad3(n, z->s->img_x, z->s->img_y, 1);
3694       if (!output) { stbi__cleanup_jpeg(z); return stbi__errpuc("outofmem", "Out of memory"); }
3695 
3696       // now go ahead and resample
3697       for (j=0; j < z->s->img_y; ++j) {
3698          stbi_uc *out = output + n * z->s->img_x * j;
3699          for (k=0; k < decode_n; ++k) {
3700             stbi__resample *r = &res_comp[k];
3701             int y_bot = r->ystep >= (r->vs >> 1);
3702             coutput[k] = r->resample(z->img_comp[k].linebuf,
3703                                      y_bot ? r->line1 : r->line0,
3704                                      y_bot ? r->line0 : r->line1,
3705                                      r->w_lores, r->hs);
3706             if (++r->ystep >= r->vs) {
3707                r->ystep = 0;
3708                r->line0 = r->line1;
3709                if (++r->ypos < z->img_comp[k].y)
3710                   r->line1 += z->img_comp[k].w2;
3711             }
3712          }
3713          if (n >= 3) {
3714             stbi_uc *y = coutput[0];
3715             if (z->s->img_n == 3) {
3716                if (is_rgb) {
3717                   for (i=0; i < z->s->img_x; ++i) {
3718                      out[0] = y[i];
3719                      out[1] = coutput[1][i];
3720                      out[2] = coutput[2][i];
3721                      out[3] = 255;
3722                      out += n;
3723                   }
3724                } else {
3725                   z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n);
3726                }
3727             } else if (z->s->img_n == 4) {
3728                if (z->app14_color_transform == 0) { // CMYK
3729                   for (i=0; i < z->s->img_x; ++i) {
3730                      stbi_uc m = coutput[3][i];
3731                      out[0] = stbi__blinn_8x8(coutput[0][i], m);
3732                      out[1] = stbi__blinn_8x8(coutput[1][i], m);
3733                      out[2] = stbi__blinn_8x8(coutput[2][i], m);
3734                      out[3] = 255;
3735                      out += n;
3736                   }
3737                } else if (z->app14_color_transform == 2) { // YCCK
3738                   z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n);
3739                   for (i=0; i < z->s->img_x; ++i) {
3740                      stbi_uc m = coutput[3][i];
3741                      out[0] = stbi__blinn_8x8(255 - out[0], m);
3742                      out[1] = stbi__blinn_8x8(255 - out[1], m);
3743                      out[2] = stbi__blinn_8x8(255 - out[2], m);
3744                      out += n;
3745                   }
3746                } else { // YCbCr + alpha?  Ignore the fourth channel for now
3747                   z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n);
3748                }
3749             } else
3750                for (i=0; i < z->s->img_x; ++i) {
3751                   out[0] = out[1] = out[2] = y[i];
3752                   out[3] = 255; // not used if n==3
3753                   out += n;
3754                }
3755          } else {
3756             if (is_rgb) {
3757                if (n == 1)
3758                   for (i=0; i < z->s->img_x; ++i)
3759                      *out++ = stbi__compute_y(coutput[0][i], coutput[1][i], coutput[2][i]);
3760                else {
3761                   for (i=0; i < z->s->img_x; ++i, out += 2) {
3762                      out[0] = stbi__compute_y(coutput[0][i], coutput[1][i], coutput[2][i]);
3763                      out[1] = 255;
3764                   }
3765                }
3766             } else if (z->s->img_n == 4 && z->app14_color_transform == 0) {
3767                for (i=0; i < z->s->img_x; ++i) {
3768                   stbi_uc m = coutput[3][i];
3769                   stbi_uc r = stbi__blinn_8x8(coutput[0][i], m);
3770                   stbi_uc g = stbi__blinn_8x8(coutput[1][i], m);
3771                   stbi_uc b = stbi__blinn_8x8(coutput[2][i], m);
3772                   out[0] = stbi__compute_y(r, g, b);
3773                   out[1] = 255;
3774                   out += n;
3775                }
3776             } else if (z->s->img_n == 4 && z->app14_color_transform == 2) {
3777                for (i=0; i < z->s->img_x; ++i) {
3778                   out[0] = stbi__blinn_8x8(255 - coutput[0][i], coutput[3][i]);
3779                   out[1] = 255;
3780                   out += n;
3781                }
3782             } else {
3783                stbi_uc *y = coutput[0];
3784                if (n == 1)
3785                   for (i=0; i < z->s->img_x; ++i) out[i] = y[i];
3786                else
3787                   for (i=0; i < z->s->img_x; ++i) { *out++ = y[i]; *out++ = 255; }
3788             }
3789          }
3790       }
3791       stbi__cleanup_jpeg(z);
3792       *out_x = z->s->img_x;
3793       *out_y = z->s->img_y;
3794       if (comp) *comp = z->s->img_n >= 3 ? 3 : 1; // report original components, not output
3795       return output;
3796    }
3797 }
3798 
stbi__jpeg_load(stbi__context * s,int * x,int * y,int * comp,int req_comp,stbi__result_info * ri)3799 static void *stbi__jpeg_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
3800 {
3801    unsigned char* result;
3802    stbi__jpeg* j = (stbi__jpeg*) stbi__malloc(sizeof(stbi__jpeg));
3803    STBI_NOTUSED(ri);
3804    j->s = s;
3805    stbi__setup_jpeg(j);
3806    result = load_jpeg_image(j, x,y,comp,req_comp);
3807    STBI_FREE(j);
3808    return result;
3809 }
3810 
stbi__jpeg_test(stbi__context * s)3811 static int stbi__jpeg_test(stbi__context *s)
3812 {
3813    int r;
3814    stbi__jpeg* j = (stbi__jpeg*)stbi__malloc(sizeof(stbi__jpeg));
3815    j->s = s;
3816    stbi__setup_jpeg(j);
3817    r = stbi__decode_jpeg_header(j, STBI__SCAN_type);
3818    stbi__rewind(s);
3819    STBI_FREE(j);
3820    return r;
3821 }
3822 
stbi__jpeg_info_raw(stbi__jpeg * j,int * x,int * y,int * comp)3823 static int stbi__jpeg_info_raw(stbi__jpeg *j, int *x, int *y, int *comp)
3824 {
3825    if (!stbi__decode_jpeg_header(j, STBI__SCAN_header)) {
3826       stbi__rewind( j->s );
3827       return 0;
3828    }
3829    if (x) *x = j->s->img_x;
3830    if (y) *y = j->s->img_y;
3831    if (comp) *comp = j->s->img_n >= 3 ? 3 : 1;
3832    return 1;
3833 }
3834 
stbi__jpeg_info(stbi__context * s,int * x,int * y,int * comp)3835 static int stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp)
3836 {
3837    int result;
3838    stbi__jpeg* j = (stbi__jpeg*) (stbi__malloc(sizeof(stbi__jpeg)));
3839    j->s = s;
3840    result = stbi__jpeg_info_raw(j, x, y, comp);
3841    STBI_FREE(j);
3842    return result;
3843 }
3844 #endif
3845 
3846 // public domain zlib decode    v0.2  Sean Barrett 2006-11-18
3847 //    simple implementation
3848 //      - all input must be provided in an upfront buffer
3849 //      - all output is written to a single output buffer (can malloc/realloc)
3850 //    performance
3851 //      - fast huffman
3852 
3853 #ifndef STBI_NO_ZLIB
3854 
3855 // fast-way is faster to check than jpeg huffman, but slow way is slower
3856 #define STBI__ZFAST_BITS  9 // accelerate all cases in default tables
3857 #define STBI__ZFAST_MASK  ((1 << STBI__ZFAST_BITS) - 1)
3858 
3859 // zlib-style huffman encoding
3860 // (jpegs packs from left, zlib from right, so can't share code)
3861 typedef struct
3862 {
3863    stbi__uint16 fast[1 << STBI__ZFAST_BITS];
3864    stbi__uint16 firstcode[16];
3865    int maxcode[17];
3866    stbi__uint16 firstsymbol[16];
3867    stbi_uc  size[288];
3868    stbi__uint16 value[288];
3869 } stbi__zhuffman;
3870 
stbi__bitreverse16(int n)3871 stbi_inline static int stbi__bitreverse16(int n)
3872 {
3873   n = ((n & 0xAAAA) >>  1) | ((n & 0x5555) << 1);
3874   n = ((n & 0xCCCC) >>  2) | ((n & 0x3333) << 2);
3875   n = ((n & 0xF0F0) >>  4) | ((n & 0x0F0F) << 4);
3876   n = ((n & 0xFF00) >>  8) | ((n & 0x00FF) << 8);
3877   return n;
3878 }
3879 
stbi__bit_reverse(int v,int bits)3880 stbi_inline static int stbi__bit_reverse(int v, int bits)
3881 {
3882    STBI_ASSERT(bits <= 16);
3883    // to bit reverse n bits, reverse 16 and shift
3884    // e.g. 11 bits, bit reverse and shift away 5
3885    return stbi__bitreverse16(v) >> (16-bits);
3886 }
3887 
stbi__zbuild_huffman(stbi__zhuffman * z,const stbi_uc * sizelist,int num)3888 static int stbi__zbuild_huffman(stbi__zhuffman *z, const stbi_uc *sizelist, int num)
3889 {
3890    int i,k=0;
3891    int code, next_code[16], sizes[17];
3892 
3893    // DEFLATE spec for generating codes
3894    memset(sizes, 0, sizeof(sizes));
3895    memset(z->fast, 0, sizeof(z->fast));
3896    for (i=0; i < num; ++i)
3897       ++sizes[sizelist[i]];
3898    sizes[0] = 0;
3899    for (i=1; i < 16; ++i)
3900       if (sizes[i] > (1 << i))
3901          return stbi__err("bad sizes", "Corrupt PNG");
3902    code = 0;
3903    for (i=1; i < 16; ++i) {
3904       next_code[i] = code;
3905       z->firstcode[i] = (stbi__uint16) code;
3906       z->firstsymbol[i] = (stbi__uint16) k;
3907       code = (code + sizes[i]);
3908       if (sizes[i])
3909          if (code-1 >= (1 << i)) return stbi__err("bad codelengths","Corrupt PNG");
3910       z->maxcode[i] = code << (16-i); // preshift for inner loop
3911       code <<= 1;
3912       k += sizes[i];
3913    }
3914    z->maxcode[16] = 0x10000; // sentinel
3915    for (i=0; i < num; ++i) {
3916       int s = sizelist[i];
3917       if (s) {
3918          int c = next_code[s] - z->firstcode[s] + z->firstsymbol[s];
3919          stbi__uint16 fastv = (stbi__uint16) ((s << 9) | i);
3920          z->size [c] = (stbi_uc     ) s;
3921          z->value[c] = (stbi__uint16) i;
3922          if (s <= STBI__ZFAST_BITS) {
3923             int j = stbi__bit_reverse(next_code[s],s);
3924             while (j < (1 << STBI__ZFAST_BITS)) {
3925                z->fast[j] = fastv;
3926                j += (1 << s);
3927             }
3928          }
3929          ++next_code[s];
3930       }
3931    }
3932    return 1;
3933 }
3934 
3935 // zlib-from-memory implementation for PNG reading
3936 //    because PNG allows splitting the zlib stream arbitrarily,
3937 //    and it's annoying structurally to have PNG call ZLIB call PNG,
3938 //    we require PNG read all the IDATs and combine them into a single
3939 //    memory buffer
3940 
3941 typedef struct
3942 {
3943    stbi_uc *zbuffer, *zbuffer_end;
3944    int num_bits;
3945    stbi__uint32 code_buffer;
3946 
3947    char *zout;
3948    char *zout_start;
3949    char *zout_end;
3950    int   z_expandable;
3951 
3952    stbi__zhuffman z_length, z_distance;
3953 } stbi__zbuf;
3954 
stbi__zget8(stbi__zbuf * z)3955 stbi_inline static stbi_uc stbi__zget8(stbi__zbuf *z)
3956 {
3957    if (z->zbuffer >= z->zbuffer_end) return 0;
3958    return *z->zbuffer++;
3959 }
3960 
stbi__fill_bits(stbi__zbuf * z)3961 static void stbi__fill_bits(stbi__zbuf *z)
3962 {
3963    do {
3964       STBI_ASSERT(z->code_buffer < (1U << z->num_bits));
3965       z->code_buffer |= (unsigned int) stbi__zget8(z) << z->num_bits;
3966       z->num_bits += 8;
3967    } while (z->num_bits <= 24);
3968 }
3969 
stbi__zreceive(stbi__zbuf * z,int n)3970 stbi_inline static unsigned int stbi__zreceive(stbi__zbuf *z, int n)
3971 {
3972    unsigned int k;
3973    if (z->num_bits < n) stbi__fill_bits(z);
3974    k = z->code_buffer & ((1 << n) - 1);
3975    z->code_buffer >>= n;
3976    z->num_bits -= n;
3977    return k;
3978 }
3979 
stbi__zhuffman_decode_slowpath(stbi__zbuf * a,stbi__zhuffman * z)3980 static int stbi__zhuffman_decode_slowpath(stbi__zbuf *a, stbi__zhuffman *z)
3981 {
3982    int b,s,k;
3983    // not resolved by fast table, so compute it the slow way
3984    // use jpeg approach, which requires MSbits at top
3985    k = stbi__bit_reverse(a->code_buffer, 16);
3986    for (s=STBI__ZFAST_BITS+1; ; ++s)
3987       if (k < z->maxcode[s])
3988          break;
3989    if (s == 16) return -1; // invalid code!
3990    // code size is s, so:
3991    b = (k >> (16-s)) - z->firstcode[s] + z->firstsymbol[s];
3992    STBI_ASSERT(z->size[b] == s);
3993    a->code_buffer >>= s;
3994    a->num_bits -= s;
3995    return z->value[b];
3996 }
3997 
stbi__zhuffman_decode(stbi__zbuf * a,stbi__zhuffman * z)3998 stbi_inline static int stbi__zhuffman_decode(stbi__zbuf *a, stbi__zhuffman *z)
3999 {
4000    int b,s;
4001    if (a->num_bits < 16) stbi__fill_bits(a);
4002    b = z->fast[a->code_buffer & STBI__ZFAST_MASK];
4003    if (b) {
4004       s = b >> 9;
4005       a->code_buffer >>= s;
4006       a->num_bits -= s;
4007       return b & 511;
4008    }
4009    return stbi__zhuffman_decode_slowpath(a, z);
4010 }
4011 
stbi__zexpand(stbi__zbuf * z,char * zout,int n)4012 static int stbi__zexpand(stbi__zbuf *z, char *zout, int n)  // need to make room for n bytes
4013 {
4014    char *q;
4015    int cur, limit, old_limit;
4016    z->zout = zout;
4017    if (!z->z_expandable) return stbi__err("output buffer limit","Corrupt PNG");
4018    cur   = (int) (z->zout     - z->zout_start);
4019    limit = old_limit = (int) (z->zout_end - z->zout_start);
4020    while (cur + n > limit)
4021       limit *= 2;
4022    q = (char *) STBI_REALLOC_SIZED(z->zout_start, old_limit, limit);
4023    STBI_NOTUSED(old_limit);
4024    if (q == NULL) return stbi__err("outofmem", "Out of memory");
4025    z->zout_start = q;
4026    z->zout       = q + cur;
4027    z->zout_end   = q + limit;
4028    return 1;
4029 }
4030 
4031 static const int stbi__zlength_base[31] = {
4032    3,4,5,6,7,8,9,10,11,13,
4033    15,17,19,23,27,31,35,43,51,59,
4034    67,83,99,115,131,163,195,227,258,0,0 };
4035 
4036 static const int stbi__zlength_extra[31]=
4037 { 0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0,0,0 };
4038 
4039 static const int stbi__zdist_base[32] = { 1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193,
4040 257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577,0,0};
4041 
4042 static const int stbi__zdist_extra[32] =
4043 { 0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13};
4044 
stbi__parse_huffman_block(stbi__zbuf * a)4045 static int stbi__parse_huffman_block(stbi__zbuf *a)
4046 {
4047    char *zout = a->zout;
4048    for(;;) {
4049       int z = stbi__zhuffman_decode(a, &a->z_length);
4050       if (z < 256) {
4051          if (z < 0) return stbi__err("bad huffman code","Corrupt PNG"); // error in huffman codes
4052          if (zout >= a->zout_end) {
4053             if (!stbi__zexpand(a, zout, 1)) return 0;
4054             zout = a->zout;
4055          }
4056          *zout++ = (char) z;
4057       } else {
4058          stbi_uc *p;
4059          int len,dist;
4060          if (z == 256) {
4061             a->zout = zout;
4062             return 1;
4063          }
4064          z -= 257;
4065          len = stbi__zlength_base[z];
4066          if (stbi__zlength_extra[z]) len += stbi__zreceive(a, stbi__zlength_extra[z]);
4067          z = stbi__zhuffman_decode(a, &a->z_distance);
4068          if (z < 0) return stbi__err("bad huffman code","Corrupt PNG");
4069          dist = stbi__zdist_base[z];
4070          if (stbi__zdist_extra[z]) dist += stbi__zreceive(a, stbi__zdist_extra[z]);
4071          if (zout - a->zout_start < dist) return stbi__err("bad dist","Corrupt PNG");
4072          if (zout + len > a->zout_end) {
4073             if (!stbi__zexpand(a, zout, len)) return 0;
4074             zout = a->zout;
4075          }
4076          p = (stbi_uc *) (zout - dist);
4077          if (dist == 1) { // run of one byte; common in images.
4078             stbi_uc v = *p;
4079             if (len) { do *zout++ = v; while (--len); }
4080          } else {
4081             if (len) { do *zout++ = *p++; while (--len); }
4082          }
4083       }
4084    }
4085 }
4086 
stbi__compute_huffman_codes(stbi__zbuf * a)4087 static int stbi__compute_huffman_codes(stbi__zbuf *a)
4088 {
4089    static const stbi_uc length_dezigzag[19] = { 16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15 };
4090    stbi__zhuffman z_codelength;
4091    stbi_uc lencodes[286+32+137];//padding for maximum single op
4092    stbi_uc codelength_sizes[19];
4093    int i,n;
4094 
4095    int hlit  = stbi__zreceive(a,5) + 257;
4096    int hdist = stbi__zreceive(a,5) + 1;
4097    int hclen = stbi__zreceive(a,4) + 4;
4098    int ntot  = hlit + hdist;
4099 
4100    memset(codelength_sizes, 0, sizeof(codelength_sizes));
4101    for (i=0; i < hclen; ++i) {
4102       int s = stbi__zreceive(a,3);
4103       codelength_sizes[length_dezigzag[i]] = (stbi_uc) s;
4104    }
4105    if (!stbi__zbuild_huffman(&z_codelength, codelength_sizes, 19)) return 0;
4106 
4107    n = 0;
4108    while (n < ntot) {
4109       int c = stbi__zhuffman_decode(a, &z_codelength);
4110       if (c < 0 || c >= 19) return stbi__err("bad codelengths", "Corrupt PNG");
4111       if (c < 16)
4112          lencodes[n++] = (stbi_uc) c;
4113       else {
4114          stbi_uc fill = 0;
4115          if (c == 16) {
4116             c = stbi__zreceive(a,2)+3;
4117             if (n == 0) return stbi__err("bad codelengths", "Corrupt PNG");
4118             fill = lencodes[n-1];
4119          } else if (c == 17)
4120             c = stbi__zreceive(a,3)+3;
4121          else {
4122             STBI_ASSERT(c == 18);
4123             c = stbi__zreceive(a,7)+11;
4124          }
4125          if (ntot - n < c) return stbi__err("bad codelengths", "Corrupt PNG");
4126          memset(lencodes+n, fill, c);
4127          n += c;
4128       }
4129    }
4130    if (n != ntot) return stbi__err("bad codelengths","Corrupt PNG");
4131    if (!stbi__zbuild_huffman(&a->z_length, lencodes, hlit)) return 0;
4132    if (!stbi__zbuild_huffman(&a->z_distance, lencodes+hlit, hdist)) return 0;
4133    return 1;
4134 }
4135 
stbi__parse_uncompressed_block(stbi__zbuf * a)4136 static int stbi__parse_uncompressed_block(stbi__zbuf *a)
4137 {
4138    stbi_uc header[4];
4139    int len,nlen,k;
4140    if (a->num_bits & 7)
4141       stbi__zreceive(a, a->num_bits & 7); // discard
4142    // drain the bit-packed data into header
4143    k = 0;
4144    while (a->num_bits > 0) {
4145       header[k++] = (stbi_uc) (a->code_buffer & 255); // suppress MSVC run-time check
4146       a->code_buffer >>= 8;
4147       a->num_bits -= 8;
4148    }
4149    STBI_ASSERT(a->num_bits == 0);
4150    // now fill header the normal way
4151    while (k < 4)
4152       header[k++] = stbi__zget8(a);
4153    len  = header[1] * 256 + header[0];
4154    nlen = header[3] * 256 + header[2];
4155    if (nlen != (len ^ 0xffff)) return stbi__err("zlib corrupt","Corrupt PNG");
4156    if (a->zbuffer + len > a->zbuffer_end) return stbi__err("read past buffer","Corrupt PNG");
4157    if (a->zout + len > a->zout_end)
4158       if (!stbi__zexpand(a, a->zout, len)) return 0;
4159    memcpy(a->zout, a->zbuffer, len);
4160    a->zbuffer += len;
4161    a->zout += len;
4162    return 1;
4163 }
4164 
stbi__parse_zlib_header(stbi__zbuf * a)4165 static int stbi__parse_zlib_header(stbi__zbuf *a)
4166 {
4167    int cmf   = stbi__zget8(a);
4168    int cm    = cmf & 15;
4169    /* int cinfo = cmf >> 4; */
4170    int flg   = stbi__zget8(a);
4171    if ((cmf*256+flg) % 31 != 0) return stbi__err("bad zlib header","Corrupt PNG"); // zlib spec
4172    if (flg & 32) return stbi__err("no preset dict","Corrupt PNG"); // preset dictionary not allowed in png
4173    if (cm != 8) return stbi__err("bad compression","Corrupt PNG"); // DEFLATE required for png
4174    // window = 1 << (8 + cinfo)... but who cares, we fully buffer output
4175    return 1;
4176 }
4177 
4178 static const stbi_uc stbi__zdefault_length[288] =
4179 {
4180    8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
4181    8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
4182    8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
4183    8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
4184    8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
4185    9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
4186    9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
4187    9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
4188    7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,8,8,8,8,8,8,8,8
4189 };
4190 static const stbi_uc stbi__zdefault_distance[32] =
4191 {
4192    5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5
4193 };
4194 /*
4195 Init algorithm:
4196 {
4197    int i;   // use <= to match clearly with spec
4198    for (i=0; i <= 143; ++i)     stbi__zdefault_length[i]   = 8;
4199    for (   ; i <= 255; ++i)     stbi__zdefault_length[i]   = 9;
4200    for (   ; i <= 279; ++i)     stbi__zdefault_length[i]   = 7;
4201    for (   ; i <= 287; ++i)     stbi__zdefault_length[i]   = 8;
4202 
4203    for (i=0; i <=  31; ++i)     stbi__zdefault_distance[i] = 5;
4204 }
4205 */
4206 
stbi__parse_zlib(stbi__zbuf * a,int parse_header)4207 static int stbi__parse_zlib(stbi__zbuf *a, int parse_header)
4208 {
4209    int final, type;
4210    if (parse_header)
4211       if (!stbi__parse_zlib_header(a)) return 0;
4212    a->num_bits = 0;
4213    a->code_buffer = 0;
4214    do {
4215       final = stbi__zreceive(a,1);
4216       type = stbi__zreceive(a,2);
4217       if (type == 0) {
4218          if (!stbi__parse_uncompressed_block(a)) return 0;
4219       } else if (type == 3) {
4220          return 0;
4221       } else {
4222          if (type == 1) {
4223             // use fixed code lengths
4224             if (!stbi__zbuild_huffman(&a->z_length  , stbi__zdefault_length  , 288)) return 0;
4225             if (!stbi__zbuild_huffman(&a->z_distance, stbi__zdefault_distance,  32)) return 0;
4226          } else {
4227             if (!stbi__compute_huffman_codes(a)) return 0;
4228          }
4229          if (!stbi__parse_huffman_block(a)) return 0;
4230       }
4231    } while (!final);
4232    return 1;
4233 }
4234 
stbi__do_zlib(stbi__zbuf * a,char * obuf,int olen,int exp,int parse_header)4235 static int stbi__do_zlib(stbi__zbuf *a, char *obuf, int olen, int exp, int parse_header)
4236 {
4237    a->zout_start = obuf;
4238    a->zout       = obuf;
4239    a->zout_end   = obuf + olen;
4240    a->z_expandable = exp;
4241 
4242    return stbi__parse_zlib(a, parse_header);
4243 }
4244 
stbi_zlib_decode_malloc_guesssize(const char * buffer,int len,int initial_size,int * outlen)4245 STBIDEF char *stbi_zlib_decode_malloc_guesssize(const char *buffer, int len, int initial_size, int *outlen)
4246 {
4247    stbi__zbuf a;
4248    char *p = (char *) stbi__malloc(initial_size);
4249    if (p == NULL) return NULL;
4250    a.zbuffer = (stbi_uc *) buffer;
4251    a.zbuffer_end = (stbi_uc *) buffer + len;
4252    if (stbi__do_zlib(&a, p, initial_size, 1, 1)) {
4253       if (outlen) *outlen = (int) (a.zout - a.zout_start);
4254       return a.zout_start;
4255    } else {
4256       STBI_FREE(a.zout_start);
4257       return NULL;
4258    }
4259 }
4260 
stbi_zlib_decode_malloc(char const * buffer,int len,int * outlen)4261 STBIDEF char *stbi_zlib_decode_malloc(char const *buffer, int len, int *outlen)
4262 {
4263    return stbi_zlib_decode_malloc_guesssize(buffer, len, 16384, outlen);
4264 }
4265 
stbi_zlib_decode_malloc_guesssize_headerflag(const char * buffer,int len,int initial_size,int * outlen,int parse_header)4266 STBIDEF char *stbi_zlib_decode_malloc_guesssize_headerflag(const char *buffer, int len, int initial_size, int *outlen, int parse_header)
4267 {
4268    stbi__zbuf a;
4269    char *p = (char *) stbi__malloc(initial_size);
4270    if (p == NULL) return NULL;
4271    a.zbuffer = (stbi_uc *) buffer;
4272    a.zbuffer_end = (stbi_uc *) buffer + len;
4273    if (stbi__do_zlib(&a, p, initial_size, 1, parse_header)) {
4274       if (outlen) *outlen = (int) (a.zout - a.zout_start);
4275       return a.zout_start;
4276    } else {
4277       STBI_FREE(a.zout_start);
4278       return NULL;
4279    }
4280 }
4281 
stbi_zlib_decode_buffer(char * obuffer,int olen,char const * ibuffer,int ilen)4282 STBIDEF int stbi_zlib_decode_buffer(char *obuffer, int olen, char const *ibuffer, int ilen)
4283 {
4284    stbi__zbuf a;
4285    a.zbuffer = (stbi_uc *) ibuffer;
4286    a.zbuffer_end = (stbi_uc *) ibuffer + ilen;
4287    if (stbi__do_zlib(&a, obuffer, olen, 0, 1))
4288       return (int) (a.zout - a.zout_start);
4289    else
4290       return -1;
4291 }
4292 
stbi_zlib_decode_noheader_malloc(char const * buffer,int len,int * outlen)4293 STBIDEF char *stbi_zlib_decode_noheader_malloc(char const *buffer, int len, int *outlen)
4294 {
4295    stbi__zbuf a;
4296    char *p = (char *) stbi__malloc(16384);
4297    if (p == NULL) return NULL;
4298    a.zbuffer = (stbi_uc *) buffer;
4299    a.zbuffer_end = (stbi_uc *) buffer+len;
4300    if (stbi__do_zlib(&a, p, 16384, 1, 0)) {
4301       if (outlen) *outlen = (int) (a.zout - a.zout_start);
4302       return a.zout_start;
4303    } else {
4304       STBI_FREE(a.zout_start);
4305       return NULL;
4306    }
4307 }
4308 
stbi_zlib_decode_noheader_buffer(char * obuffer,int olen,const char * ibuffer,int ilen)4309 STBIDEF int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char *ibuffer, int ilen)
4310 {
4311    stbi__zbuf a;
4312    a.zbuffer = (stbi_uc *) ibuffer;
4313    a.zbuffer_end = (stbi_uc *) ibuffer + ilen;
4314    if (stbi__do_zlib(&a, obuffer, olen, 0, 0))
4315       return (int) (a.zout - a.zout_start);
4316    else
4317       return -1;
4318 }
4319 #endif
4320 
4321 // public domain "baseline" PNG decoder   v0.10  Sean Barrett 2006-11-18
4322 //    simple implementation
4323 //      - only 8-bit samples
4324 //      - no CRC checking
4325 //      - allocates lots of intermediate memory
4326 //        - avoids problem of streaming data between subsystems
4327 //        - avoids explicit window management
4328 //    performance
4329 //      - uses stb_zlib, a PD zlib implementation with fast huffman decoding
4330 
4331 #ifndef STBI_NO_PNG
4332 typedef struct
4333 {
4334    stbi__uint32 length;
4335    stbi__uint32 type;
4336 } stbi__pngchunk;
4337 
stbi__get_chunk_header(stbi__context * s)4338 static stbi__pngchunk stbi__get_chunk_header(stbi__context *s)
4339 {
4340    stbi__pngchunk c;
4341    c.length = stbi__get32be(s);
4342    c.type   = stbi__get32be(s);
4343    return c;
4344 }
4345 
stbi__check_png_header(stbi__context * s)4346 static int stbi__check_png_header(stbi__context *s)
4347 {
4348    static const stbi_uc png_sig[8] = { 137,80,78,71,13,10,26,10 };
4349    int i;
4350    for (i=0; i < 8; ++i)
4351       if (stbi__get8(s) != png_sig[i]) return stbi__err("bad png sig","Not a PNG");
4352    return 1;
4353 }
4354 
4355 typedef struct
4356 {
4357    stbi__context *s;
4358    stbi_uc *idata, *expanded, *out;
4359    int depth;
4360 } stbi__png;
4361 
4362 
4363 enum {
4364    STBI__F_none=0,
4365    STBI__F_sub=1,
4366    STBI__F_up=2,
4367    STBI__F_avg=3,
4368    STBI__F_paeth=4,
4369    // synthetic filters used for first scanline to avoid needing a dummy row of 0s
4370    STBI__F_avg_first,
4371    STBI__F_paeth_first
4372 };
4373 
4374 static stbi_uc first_row_filter[5] =
4375 {
4376    STBI__F_none,
4377    STBI__F_sub,
4378    STBI__F_none,
4379    STBI__F_avg_first,
4380    STBI__F_paeth_first
4381 };
4382 
stbi__paeth(int a,int b,int c)4383 static int stbi__paeth(int a, int b, int c)
4384 {
4385    int p = a + b - c;
4386    int pa = abs(p-a);
4387    int pb = abs(p-b);
4388    int pc = abs(p-c);
4389    if (pa <= pb && pa <= pc) return a;
4390    if (pb <= pc) return b;
4391    return c;
4392 }
4393 
4394 static const stbi_uc stbi__depth_scale_table[9] = { 0, 0xff, 0x55, 0, 0x11, 0,0,0, 0x01 };
4395 
4396 // create the png data from post-deflated data
stbi__create_png_image_raw(stbi__png * a,stbi_uc * raw,stbi__uint32 raw_len,int out_n,stbi__uint32 x,stbi__uint32 y,int depth,int color)4397 static int stbi__create_png_image_raw(stbi__png *a, stbi_uc *raw, stbi__uint32 raw_len, int out_n, stbi__uint32 x, stbi__uint32 y, int depth, int color)
4398 {
4399    int bytes = (depth == 16? 2 : 1);
4400    stbi__context *s = a->s;
4401    stbi__uint32 i,j,stride = x*out_n*bytes;
4402    stbi__uint32 img_len, img_width_bytes;
4403    int k;
4404    int img_n = s->img_n; // copy it into a local for later
4405 
4406    int output_bytes = out_n*bytes;
4407    int filter_bytes = img_n*bytes;
4408    int width = x;
4409 
4410    STBI_ASSERT(out_n == s->img_n || out_n == s->img_n+1);
4411    a->out = (stbi_uc *) stbi__malloc_mad3(x, y, output_bytes, 0); // extra bytes to write off the end into
4412    if (!a->out) return stbi__err("outofmem", "Out of memory");
4413 
4414    if (!stbi__mad3sizes_valid(img_n, x, depth, 7)) return stbi__err("too large", "Corrupt PNG");
4415    img_width_bytes = (((img_n * x * depth) + 7) >> 3);
4416    img_len = (img_width_bytes + 1) * y;
4417 
4418    // we used to check for exact match between raw_len and img_len on non-interlaced PNGs,
4419    // but issue #276 reported a PNG in the wild that had extra data at the end (all zeros),
4420    // so just check for raw_len < img_len always.
4421    if (raw_len < img_len) return stbi__err("not enough pixels","Corrupt PNG");
4422 
4423    for (j=0; j < y; ++j) {
4424       stbi_uc *cur = a->out + stride*j;
4425       stbi_uc *prior;
4426       int filter = *raw++;
4427 
4428       if (filter > 4)
4429          return stbi__err("invalid filter","Corrupt PNG");
4430 
4431       if (depth < 8) {
4432          STBI_ASSERT(img_width_bytes <= x);
4433          cur += x*out_n - img_width_bytes; // store output to the rightmost img_len bytes, so we can decode in place
4434          filter_bytes = 1;
4435          width = img_width_bytes;
4436       }
4437       prior = cur - stride; // bugfix: need to compute this after 'cur +=' computation above
4438 
4439       // if first row, use special filter that doesn't sample previous row
4440       if (j == 0) filter = first_row_filter[filter];
4441 
4442       // handle first byte explicitly
4443       for (k=0; k < filter_bytes; ++k) {
4444          switch (filter) {
4445             case STBI__F_none       : cur[k] = raw[k]; break;
4446             case STBI__F_sub        : cur[k] = raw[k]; break;
4447             case STBI__F_up         : cur[k] = STBI__BYTECAST(raw[k] + prior[k]); break;
4448             case STBI__F_avg        : cur[k] = STBI__BYTECAST(raw[k] + (prior[k]>>1)); break;
4449             case STBI__F_paeth      : cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(0,prior[k],0)); break;
4450             case STBI__F_avg_first  : cur[k] = raw[k]; break;
4451             case STBI__F_paeth_first: cur[k] = raw[k]; break;
4452          }
4453       }
4454 
4455       if (depth == 8) {
4456          if (img_n != out_n)
4457             cur[img_n] = 255; // first pixel
4458          raw += img_n;
4459          cur += out_n;
4460          prior += out_n;
4461       } else if (depth == 16) {
4462          if (img_n != out_n) {
4463             cur[filter_bytes]   = 255; // first pixel top byte
4464             cur[filter_bytes+1] = 255; // first pixel bottom byte
4465          }
4466          raw += filter_bytes;
4467          cur += output_bytes;
4468          prior += output_bytes;
4469       } else {
4470          raw += 1;
4471          cur += 1;
4472          prior += 1;
4473       }
4474 
4475       // this is a little gross, so that we don't switch per-pixel or per-component
4476       if (depth < 8 || img_n == out_n) {
4477          int nk = (width - 1)*filter_bytes;
4478          #define STBI__CASE(f) \
4479              case f:     \
4480                 for (k=0; k < nk; ++k)
4481          switch (filter) {
4482             // "none" filter turns into a memcpy here; make that explicit.
4483             case STBI__F_none:         memcpy(cur, raw, nk); break;
4484             STBI__CASE(STBI__F_sub)          { cur[k] = STBI__BYTECAST(raw[k] + cur[k-filter_bytes]); } break;
4485             STBI__CASE(STBI__F_up)           { cur[k] = STBI__BYTECAST(raw[k] + prior[k]); } break;
4486             STBI__CASE(STBI__F_avg)          { cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k-filter_bytes])>>1)); } break;
4487             STBI__CASE(STBI__F_paeth)        { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-filter_bytes],prior[k],prior[k-filter_bytes])); } break;
4488             STBI__CASE(STBI__F_avg_first)    { cur[k] = STBI__BYTECAST(raw[k] + (cur[k-filter_bytes] >> 1)); } break;
4489             STBI__CASE(STBI__F_paeth_first)  { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-filter_bytes],0,0)); } break;
4490          }
4491          #undef STBI__CASE
4492          raw += nk;
4493       } else {
4494          STBI_ASSERT(img_n+1 == out_n);
4495          #define STBI__CASE(f) \
4496              case f:     \
4497                 for (i=x-1; i >= 1; --i, cur[filter_bytes]=255,raw+=filter_bytes,cur+=output_bytes,prior+=output_bytes) \
4498                    for (k=0; k < filter_bytes; ++k)
4499          switch (filter) {
4500             STBI__CASE(STBI__F_none)         { cur[k] = raw[k]; } break;
4501             STBI__CASE(STBI__F_sub)          { cur[k] = STBI__BYTECAST(raw[k] + cur[k- output_bytes]); } break;
4502             STBI__CASE(STBI__F_up)           { cur[k] = STBI__BYTECAST(raw[k] + prior[k]); } break;
4503             STBI__CASE(STBI__F_avg)          { cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k- output_bytes])>>1)); } break;
4504             STBI__CASE(STBI__F_paeth)        { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k- output_bytes],prior[k],prior[k- output_bytes])); } break;
4505             STBI__CASE(STBI__F_avg_first)    { cur[k] = STBI__BYTECAST(raw[k] + (cur[k- output_bytes] >> 1)); } break;
4506             STBI__CASE(STBI__F_paeth_first)  { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k- output_bytes],0,0)); } break;
4507          }
4508          #undef STBI__CASE
4509 
4510          // the loop above sets the high byte of the pixels' alpha, but for
4511          // 16 bit png files we also need the low byte set. we'll do that here.
4512          if (depth == 16) {
4513             cur = a->out + stride*j; // start at the beginning of the row again
4514             for (i=0; i < x; ++i,cur+=output_bytes) {
4515                cur[filter_bytes+1] = 255;
4516             }
4517          }
4518       }
4519    }
4520 
4521    // we make a separate pass to expand bits to pixels; for performance,
4522    // this could run two scanlines behind the above code, so it won't
4523    // intefere with filtering but will still be in the cache.
4524    if (depth < 8) {
4525       for (j=0; j < y; ++j) {
4526          stbi_uc *cur = a->out + stride*j;
4527          stbi_uc *in  = a->out + stride*j + x*out_n - img_width_bytes;
4528          // unpack 1/2/4-bit into a 8-bit buffer. allows us to keep the common 8-bit path optimal at minimal cost for 1/2/4-bit
4529          // png guarante byte alignment, if width is not multiple of 8/4/2 we'll decode dummy trailing data that will be skipped in the later loop
4530          stbi_uc scale = (color == 0) ? stbi__depth_scale_table[depth] : 1; // scale grayscale values to 0..255 range
4531 
4532          // note that the final byte might overshoot and write more data than desired.
4533          // we can allocate enough data that this never writes out of memory, but it
4534          // could also overwrite the next scanline. can it overwrite non-empty data
4535          // on the next scanline? yes, consider 1-pixel-wide scanlines with 1-bit-per-pixel.
4536          // so we need to explicitly clamp the final ones
4537 
4538          if (depth == 4) {
4539             for (k=x*img_n; k >= 2; k-=2, ++in) {
4540                *cur++ = scale * ((*in >> 4)       );
4541                *cur++ = scale * ((*in     ) & 0x0f);
4542             }
4543             if (k > 0) *cur++ = scale * ((*in >> 4)       );
4544          } else if (depth == 2) {
4545             for (k=x*img_n; k >= 4; k-=4, ++in) {
4546                *cur++ = scale * ((*in >> 6)       );
4547                *cur++ = scale * ((*in >> 4) & 0x03);
4548                *cur++ = scale * ((*in >> 2) & 0x03);
4549                *cur++ = scale * ((*in     ) & 0x03);
4550             }
4551             if (k > 0) *cur++ = scale * ((*in >> 6)       );
4552             if (k > 1) *cur++ = scale * ((*in >> 4) & 0x03);
4553             if (k > 2) *cur++ = scale * ((*in >> 2) & 0x03);
4554          } else if (depth == 1) {
4555             for (k=x*img_n; k >= 8; k-=8, ++in) {
4556                *cur++ = scale * ((*in >> 7)       );
4557                *cur++ = scale * ((*in >> 6) & 0x01);
4558                *cur++ = scale * ((*in >> 5) & 0x01);
4559                *cur++ = scale * ((*in >> 4) & 0x01);
4560                *cur++ = scale * ((*in >> 3) & 0x01);
4561                *cur++ = scale * ((*in >> 2) & 0x01);
4562                *cur++ = scale * ((*in >> 1) & 0x01);
4563                *cur++ = scale * ((*in     ) & 0x01);
4564             }
4565             if (k > 0) *cur++ = scale * ((*in >> 7)       );
4566             if (k > 1) *cur++ = scale * ((*in >> 6) & 0x01);
4567             if (k > 2) *cur++ = scale * ((*in >> 5) & 0x01);
4568             if (k > 3) *cur++ = scale * ((*in >> 4) & 0x01);
4569             if (k > 4) *cur++ = scale * ((*in >> 3) & 0x01);
4570             if (k > 5) *cur++ = scale * ((*in >> 2) & 0x01);
4571             if (k > 6) *cur++ = scale * ((*in >> 1) & 0x01);
4572          }
4573          if (img_n != out_n) {
4574             int q;
4575             // insert alpha = 255
4576             cur = a->out + stride*j;
4577             if (img_n == 1) {
4578                for (q=x-1; q >= 0; --q) {
4579                   cur[q*2+1] = 255;
4580                   cur[q*2+0] = cur[q];
4581                }
4582             } else {
4583                STBI_ASSERT(img_n == 3);
4584                for (q=x-1; q >= 0; --q) {
4585                   cur[q*4+3] = 255;
4586                   cur[q*4+2] = cur[q*3+2];
4587                   cur[q*4+1] = cur[q*3+1];
4588                   cur[q*4+0] = cur[q*3+0];
4589                }
4590             }
4591          }
4592       }
4593    } else if (depth == 16) {
4594       // force the image data from big-endian to platform-native.
4595       // this is done in a separate pass due to the decoding relying
4596       // on the data being untouched, but could probably be done
4597       // per-line during decode if care is taken.
4598       stbi_uc *cur = a->out;
4599       stbi__uint16 *cur16 = (stbi__uint16*)cur;
4600 
4601       for(i=0; i < x*y*out_n; ++i,cur16++,cur+=2) {
4602          *cur16 = (cur[0] << 8) | cur[1];
4603       }
4604    }
4605 
4606    return 1;
4607 }
4608 
stbi__create_png_image(stbi__png * a,stbi_uc * image_data,stbi__uint32 image_data_len,int out_n,int depth,int color,int interlaced)4609 static int stbi__create_png_image(stbi__png *a, stbi_uc *image_data, stbi__uint32 image_data_len, int out_n, int depth, int color, int interlaced)
4610 {
4611    int bytes = (depth == 16 ? 2 : 1);
4612    int out_bytes = out_n * bytes;
4613    stbi_uc *final;
4614    int p;
4615    if (!interlaced)
4616       return stbi__create_png_image_raw(a, image_data, image_data_len, out_n, a->s->img_x, a->s->img_y, depth, color);
4617 
4618    // de-interlacing
4619    final = (stbi_uc *) stbi__malloc_mad3(a->s->img_x, a->s->img_y, out_bytes, 0);
4620    for (p=0; p < 7; ++p) {
4621       int xorig[] = { 0,4,0,2,0,1,0 };
4622       int yorig[] = { 0,0,4,0,2,0,1 };
4623       int xspc[]  = { 8,8,4,4,2,2,1 };
4624       int yspc[]  = { 8,8,8,4,4,2,2 };
4625       int i,j,x,y;
4626       // pass1_x[4] = 0, pass1_x[5] = 1, pass1_x[12] = 1
4627       x = (a->s->img_x - xorig[p] + xspc[p]-1) / xspc[p];
4628       y = (a->s->img_y - yorig[p] + yspc[p]-1) / yspc[p];
4629       if (x && y) {
4630          stbi__uint32 img_len = ((((a->s->img_n * x * depth) + 7) >> 3) + 1) * y;
4631          if (!stbi__create_png_image_raw(a, image_data, image_data_len, out_n, x, y, depth, color)) {
4632             STBI_FREE(final);
4633             return 0;
4634          }
4635          for (j=0; j < y; ++j) {
4636             for (i=0; i < x; ++i) {
4637                int out_y = j*yspc[p]+yorig[p];
4638                int out_x = i*xspc[p]+xorig[p];
4639                memcpy(final + out_y*a->s->img_x*out_bytes + out_x*out_bytes,
4640                       a->out + (j*x+i)*out_bytes, out_bytes);
4641             }
4642          }
4643          STBI_FREE(a->out);
4644          image_data += img_len;
4645          image_data_len -= img_len;
4646       }
4647    }
4648    a->out = final;
4649 
4650    return 1;
4651 }
4652 
stbi__compute_transparency(stbi__png * z,stbi_uc tc[3],int out_n)4653 static int stbi__compute_transparency(stbi__png *z, stbi_uc tc[3], int out_n)
4654 {
4655    stbi__context *s = z->s;
4656    stbi__uint32 i, pixel_count = s->img_x * s->img_y;
4657    stbi_uc *p = z->out;
4658 
4659    // compute color-based transparency, assuming we've
4660    // already got 255 as the alpha value in the output
4661    STBI_ASSERT(out_n == 2 || out_n == 4);
4662 
4663    if (out_n == 2) {
4664       for (i=0; i < pixel_count; ++i) {
4665          p[1] = (p[0] == tc[0] ? 0 : 255);
4666          p += 2;
4667       }
4668    } else {
4669       for (i=0; i < pixel_count; ++i) {
4670          if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2])
4671             p[3] = 0;
4672          p += 4;
4673       }
4674    }
4675    return 1;
4676 }
4677 
stbi__compute_transparency16(stbi__png * z,stbi__uint16 tc[3],int out_n)4678 static int stbi__compute_transparency16(stbi__png *z, stbi__uint16 tc[3], int out_n)
4679 {
4680    stbi__context *s = z->s;
4681    stbi__uint32 i, pixel_count = s->img_x * s->img_y;
4682    stbi__uint16 *p = (stbi__uint16*) z->out;
4683 
4684    // compute color-based transparency, assuming we've
4685    // already got 65535 as the alpha value in the output
4686    STBI_ASSERT(out_n == 2 || out_n == 4);
4687 
4688    if (out_n == 2) {
4689       for (i = 0; i < pixel_count; ++i) {
4690          p[1] = (p[0] == tc[0] ? 0 : 65535);
4691          p += 2;
4692       }
4693    } else {
4694       for (i = 0; i < pixel_count; ++i) {
4695          if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2])
4696             p[3] = 0;
4697          p += 4;
4698       }
4699    }
4700    return 1;
4701 }
4702 
stbi__expand_png_palette(stbi__png * a,stbi_uc * palette,int len,int pal_img_n)4703 static int stbi__expand_png_palette(stbi__png *a, stbi_uc *palette, int len, int pal_img_n)
4704 {
4705    stbi__uint32 i, pixel_count = a->s->img_x * a->s->img_y;
4706    stbi_uc *p, *temp_out, *orig = a->out;
4707 
4708    p = (stbi_uc *) stbi__malloc_mad2(pixel_count, pal_img_n, 0);
4709    if (p == NULL) return stbi__err("outofmem", "Out of memory");
4710 
4711    // between here and free(out) below, exitting would leak
4712    temp_out = p;
4713 
4714    if (pal_img_n == 3) {
4715       for (i=0; i < pixel_count; ++i) {
4716          int n = orig[i]*4;
4717          p[0] = palette[n  ];
4718          p[1] = palette[n+1];
4719          p[2] = palette[n+2];
4720          p += 3;
4721       }
4722    } else {
4723       for (i=0; i < pixel_count; ++i) {
4724          int n = orig[i]*4;
4725          p[0] = palette[n  ];
4726          p[1] = palette[n+1];
4727          p[2] = palette[n+2];
4728          p[3] = palette[n+3];
4729          p += 4;
4730       }
4731    }
4732    STBI_FREE(a->out);
4733    a->out = temp_out;
4734 
4735    STBI_NOTUSED(len);
4736 
4737    return 1;
4738 }
4739 
4740 static int stbi__unpremultiply_on_load = 0;
4741 static int stbi__de_iphone_flag = 0;
4742 
stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply)4743 STBIDEF void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply)
4744 {
4745    stbi__unpremultiply_on_load = flag_true_if_should_unpremultiply;
4746 }
4747 
stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert)4748 STBIDEF void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert)
4749 {
4750    stbi__de_iphone_flag = flag_true_if_should_convert;
4751 }
4752 
stbi__de_iphone(stbi__png * z)4753 static void stbi__de_iphone(stbi__png *z)
4754 {
4755    stbi__context *s = z->s;
4756    stbi__uint32 i, pixel_count = s->img_x * s->img_y;
4757    stbi_uc *p = z->out;
4758 
4759    if (s->img_out_n == 3) {  // convert bgr to rgb
4760       for (i=0; i < pixel_count; ++i) {
4761          stbi_uc t = p[0];
4762          p[0] = p[2];
4763          p[2] = t;
4764          p += 3;
4765       }
4766    } else {
4767       STBI_ASSERT(s->img_out_n == 4);
4768       if (stbi__unpremultiply_on_load) {
4769          // convert bgr to rgb and unpremultiply
4770          for (i=0; i < pixel_count; ++i) {
4771             stbi_uc a = p[3];
4772             stbi_uc t = p[0];
4773             if (a) {
4774                stbi_uc half = a / 2;
4775                p[0] = (p[2] * 255 + half) / a;
4776                p[1] = (p[1] * 255 + half) / a;
4777                p[2] = ( t   * 255 + half) / a;
4778             } else {
4779                p[0] = p[2];
4780                p[2] = t;
4781             }
4782             p += 4;
4783          }
4784       } else {
4785          // convert bgr to rgb
4786          for (i=0; i < pixel_count; ++i) {
4787             stbi_uc t = p[0];
4788             p[0] = p[2];
4789             p[2] = t;
4790             p += 4;
4791          }
4792       }
4793    }
4794 }
4795 
4796 #define STBI__PNG_TYPE(a,b,c,d)  (((unsigned) (a) << 24) + ((unsigned) (b) << 16) + ((unsigned) (c) << 8) + (unsigned) (d))
4797 
stbi__parse_png_file(stbi__png * z,int scan,int req_comp)4798 static int stbi__parse_png_file(stbi__png *z, int scan, int req_comp)
4799 {
4800    stbi_uc palette[1024], pal_img_n=0;
4801    stbi_uc has_trans=0, tc[3]={0};
4802    stbi__uint16 tc16[3];
4803    stbi__uint32 ioff=0, idata_limit=0, i, pal_len=0;
4804    int first=1,k,interlace=0, color=0, is_iphone=0;
4805    stbi__context *s = z->s;
4806 
4807    z->expanded = NULL;
4808    z->idata = NULL;
4809    z->out = NULL;
4810 
4811    if (!stbi__check_png_header(s)) return 0;
4812 
4813    if (scan == STBI__SCAN_type) return 1;
4814 
4815    for (;;) {
4816       stbi__pngchunk c = stbi__get_chunk_header(s);
4817       switch (c.type) {
4818          case STBI__PNG_TYPE('C','g','B','I'):
4819             is_iphone = 1;
4820             stbi__skip(s, c.length);
4821             break;
4822          case STBI__PNG_TYPE('I','H','D','R'): {
4823             int comp,filter;
4824             if (!first) return stbi__err("multiple IHDR","Corrupt PNG");
4825             first = 0;
4826             if (c.length != 13) return stbi__err("bad IHDR len","Corrupt PNG");
4827             s->img_x = stbi__get32be(s); if (s->img_x > (1 << 24)) return stbi__err("too large","Very large image (corrupt?)");
4828             s->img_y = stbi__get32be(s); if (s->img_y > (1 << 24)) return stbi__err("too large","Very large image (corrupt?)");
4829             z->depth = stbi__get8(s);  if (z->depth != 1 && z->depth != 2 && z->depth != 4 && z->depth != 8 && z->depth != 16)  return stbi__err("1/2/4/8/16-bit only","PNG not supported: 1/2/4/8/16-bit only");
4830             color = stbi__get8(s);  if (color > 6)         return stbi__err("bad ctype","Corrupt PNG");
4831             if (color == 3 && z->depth == 16)                  return stbi__err("bad ctype","Corrupt PNG");
4832             if (color == 3) pal_img_n = 3; else if (color & 1) return stbi__err("bad ctype","Corrupt PNG");
4833             comp  = stbi__get8(s);  if (comp) return stbi__err("bad comp method","Corrupt PNG");
4834             filter= stbi__get8(s);  if (filter) return stbi__err("bad filter method","Corrupt PNG");
4835             interlace = stbi__get8(s); if (interlace>1) return stbi__err("bad interlace method","Corrupt PNG");
4836             if (!s->img_x || !s->img_y) return stbi__err("0-pixel image","Corrupt PNG");
4837             if (!pal_img_n) {
4838                s->img_n = (color & 2 ? 3 : 1) + (color & 4 ? 1 : 0);
4839                if ((1 << 30) / s->img_x / s->img_n < s->img_y) return stbi__err("too large", "Image too large to decode");
4840                if (scan == STBI__SCAN_header) return 1;
4841             } else {
4842                // if paletted, then pal_n is our final components, and
4843                // img_n is # components to decompress/filter.
4844                s->img_n = 1;
4845                if ((1 << 30) / s->img_x / 4 < s->img_y) return stbi__err("too large","Corrupt PNG");
4846                // if SCAN_header, have to scan to see if we have a tRNS
4847             }
4848             break;
4849          }
4850 
4851          case STBI__PNG_TYPE('P','L','T','E'):  {
4852             if (first) return stbi__err("first not IHDR", "Corrupt PNG");
4853             if (c.length > 256*3) return stbi__err("invalid PLTE","Corrupt PNG");
4854             pal_len = c.length / 3;
4855             if (pal_len * 3 != c.length) return stbi__err("invalid PLTE","Corrupt PNG");
4856             for (i=0; i < pal_len; ++i) {
4857                palette[i*4+0] = stbi__get8(s);
4858                palette[i*4+1] = stbi__get8(s);
4859                palette[i*4+2] = stbi__get8(s);
4860                palette[i*4+3] = 255;
4861             }
4862             break;
4863          }
4864 
4865          case STBI__PNG_TYPE('t','R','N','S'): {
4866             if (first) return stbi__err("first not IHDR", "Corrupt PNG");
4867             if (z->idata) return stbi__err("tRNS after IDAT","Corrupt PNG");
4868             if (pal_img_n) {
4869                if (scan == STBI__SCAN_header) { s->img_n = 4; return 1; }
4870                if (pal_len == 0) return stbi__err("tRNS before PLTE","Corrupt PNG");
4871                if (c.length > pal_len) return stbi__err("bad tRNS len","Corrupt PNG");
4872                pal_img_n = 4;
4873                for (i=0; i < c.length; ++i)
4874                   palette[i*4+3] = stbi__get8(s);
4875             } else {
4876                if (!(s->img_n & 1)) return stbi__err("tRNS with alpha","Corrupt PNG");
4877                if (c.length != (stbi__uint32) s->img_n*2) return stbi__err("bad tRNS len","Corrupt PNG");
4878                has_trans = 1;
4879                if (z->depth == 16) {
4880                   for (k = 0; k < s->img_n; ++k) tc16[k] = (stbi__uint16)stbi__get16be(s); // copy the values as-is
4881                } else {
4882                   for (k = 0; k < s->img_n; ++k) tc[k] = (stbi_uc)(stbi__get16be(s) & 255) * stbi__depth_scale_table[z->depth]; // non 8-bit images will be larger
4883                }
4884             }
4885             break;
4886          }
4887 
4888          case STBI__PNG_TYPE('I','D','A','T'): {
4889             if (first) return stbi__err("first not IHDR", "Corrupt PNG");
4890             if (pal_img_n && !pal_len) return stbi__err("no PLTE","Corrupt PNG");
4891             if (scan == STBI__SCAN_header) { s->img_n = pal_img_n; return 1; }
4892             if ((int)(ioff + c.length) < (int)ioff) return 0;
4893             if (ioff + c.length > idata_limit) {
4894                stbi__uint32 idata_limit_old = idata_limit;
4895                stbi_uc *p;
4896                if (idata_limit == 0) idata_limit = c.length > 4096 ? c.length : 4096;
4897                while (ioff + c.length > idata_limit)
4898                   idata_limit *= 2;
4899                STBI_NOTUSED(idata_limit_old);
4900                p = (stbi_uc *) STBI_REALLOC_SIZED(z->idata, idata_limit_old, idata_limit); if (p == NULL) return stbi__err("outofmem", "Out of memory");
4901                z->idata = p;
4902             }
4903             if (!stbi__getn(s, z->idata+ioff,c.length)) return stbi__err("outofdata","Corrupt PNG");
4904             ioff += c.length;
4905             break;
4906          }
4907 
4908          case STBI__PNG_TYPE('I','E','N','D'): {
4909             stbi__uint32 raw_len, bpl;
4910             if (first) return stbi__err("first not IHDR", "Corrupt PNG");
4911             if (scan != STBI__SCAN_load) return 1;
4912             if (z->idata == NULL) return stbi__err("no IDAT","Corrupt PNG");
4913             // initial guess for decoded data size to avoid unnecessary reallocs
4914             bpl = (s->img_x * z->depth + 7) / 8; // bytes per line, per component
4915             raw_len = bpl * s->img_y * s->img_n /* pixels */ + s->img_y /* filter mode per row */;
4916             z->expanded = (stbi_uc *) stbi_zlib_decode_malloc_guesssize_headerflag((char *) z->idata, ioff, raw_len, (int *) &raw_len, !is_iphone);
4917             if (z->expanded == NULL) return 0; // zlib should set error
4918             STBI_FREE(z->idata); z->idata = NULL;
4919             if ((req_comp == s->img_n+1 && req_comp != 3 && !pal_img_n) || has_trans)
4920                s->img_out_n = s->img_n+1;
4921             else
4922                s->img_out_n = s->img_n;
4923             if (!stbi__create_png_image(z, z->expanded, raw_len, s->img_out_n, z->depth, color, interlace)) return 0;
4924             if (has_trans) {
4925                if (z->depth == 16) {
4926                   if (!stbi__compute_transparency16(z, tc16, s->img_out_n)) return 0;
4927                } else {
4928                   if (!stbi__compute_transparency(z, tc, s->img_out_n)) return 0;
4929                }
4930             }
4931             if (is_iphone && stbi__de_iphone_flag && s->img_out_n > 2)
4932                stbi__de_iphone(z);
4933             if (pal_img_n) {
4934                // pal_img_n == 3 or 4
4935                s->img_n = pal_img_n; // record the actual colors we had
4936                s->img_out_n = pal_img_n;
4937                if (req_comp >= 3) s->img_out_n = req_comp;
4938                if (!stbi__expand_png_palette(z, palette, pal_len, s->img_out_n))
4939                   return 0;
4940             } else if (has_trans) {
4941                // non-paletted image with tRNS -> source image has (constant) alpha
4942                ++s->img_n;
4943             }
4944             STBI_FREE(z->expanded); z->expanded = NULL;
4945             return 1;
4946          }
4947 
4948          default:
4949             // if critical, fail
4950             if (first) return stbi__err("first not IHDR", "Corrupt PNG");
4951             if ((c.type & (1 << 29)) == 0) {
4952                #ifndef STBI_NO_FAILURE_STRINGS
4953                // not threadsafe
4954                static char invalid_chunk[] = "XXXX PNG chunk not known";
4955                invalid_chunk[0] = STBI__BYTECAST(c.type >> 24);
4956                invalid_chunk[1] = STBI__BYTECAST(c.type >> 16);
4957                invalid_chunk[2] = STBI__BYTECAST(c.type >>  8);
4958                invalid_chunk[3] = STBI__BYTECAST(c.type >>  0);
4959                #endif
4960                return stbi__err(invalid_chunk, "PNG not supported: unknown PNG chunk type");
4961             }
4962             stbi__skip(s, c.length);
4963             break;
4964       }
4965       // end of PNG chunk, read and skip CRC
4966       stbi__get32be(s);
4967    }
4968 }
4969 
stbi__do_png(stbi__png * p,int * x,int * y,int * n,int req_comp,stbi__result_info * ri)4970 static void *stbi__do_png(stbi__png *p, int *x, int *y, int *n, int req_comp, stbi__result_info *ri)
4971 {
4972    void *result=NULL;
4973    if (req_comp < 0 || req_comp > 4) return stbi__errpuc("bad req_comp", "Internal error");
4974    if (stbi__parse_png_file(p, STBI__SCAN_load, req_comp)) {
4975       if (p->depth < 8)
4976          ri->bits_per_channel = 8;
4977       else
4978          ri->bits_per_channel = p->depth;
4979       result = p->out;
4980       p->out = NULL;
4981       if (req_comp && req_comp != p->s->img_out_n) {
4982          if (ri->bits_per_channel == 8)
4983             result = stbi__convert_format((unsigned char *) result, p->s->img_out_n, req_comp, p->s->img_x, p->s->img_y);
4984          else
4985             result = stbi__convert_format16((stbi__uint16 *) result, p->s->img_out_n, req_comp, p->s->img_x, p->s->img_y);
4986          p->s->img_out_n = req_comp;
4987          if (result == NULL) return result;
4988       }
4989       *x = p->s->img_x;
4990       *y = p->s->img_y;
4991       if (n) *n = p->s->img_n;
4992    }
4993    STBI_FREE(p->out);      p->out      = NULL;
4994    STBI_FREE(p->expanded); p->expanded = NULL;
4995    STBI_FREE(p->idata);    p->idata    = NULL;
4996 
4997    return result;
4998 }
4999 
stbi__png_load(stbi__context * s,int * x,int * y,int * comp,int req_comp,stbi__result_info * ri)5000 static void *stbi__png_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
5001 {
5002    stbi__png p;
5003    p.s = s;
5004    return stbi__do_png(&p, x,y,comp,req_comp, ri);
5005 }
5006 
stbi__png_test(stbi__context * s)5007 static int stbi__png_test(stbi__context *s)
5008 {
5009    int r;
5010    r = stbi__check_png_header(s);
5011    stbi__rewind(s);
5012    return r;
5013 }
5014 
stbi__png_info_raw(stbi__png * p,int * x,int * y,int * comp)5015 static int stbi__png_info_raw(stbi__png *p, int *x, int *y, int *comp)
5016 {
5017    if (!stbi__parse_png_file(p, STBI__SCAN_header, 0)) {
5018       stbi__rewind( p->s );
5019       return 0;
5020    }
5021    if (x) *x = p->s->img_x;
5022    if (y) *y = p->s->img_y;
5023    if (comp) *comp = p->s->img_n;
5024    return 1;
5025 }
5026 
stbi__png_info(stbi__context * s,int * x,int * y,int * comp)5027 static int stbi__png_info(stbi__context *s, int *x, int *y, int *comp)
5028 {
5029    stbi__png p;
5030    p.s = s;
5031    return stbi__png_info_raw(&p, x, y, comp);
5032 }
5033 
stbi__png_is16(stbi__context * s)5034 static int stbi__png_is16(stbi__context *s)
5035 {
5036    stbi__png p;
5037    p.s = s;
5038    if (!stbi__png_info_raw(&p, NULL, NULL, NULL))
5039 	   return 0;
5040    if (p.depth != 16) {
5041       stbi__rewind(p.s);
5042       return 0;
5043    }
5044    return 1;
5045 }
5046 #endif
5047 
5048 // Microsoft/Windows BMP image
5049 
5050 #ifndef STBI_NO_BMP
stbi__bmp_test_raw(stbi__context * s)5051 static int stbi__bmp_test_raw(stbi__context *s)
5052 {
5053    int r;
5054    int sz;
5055    if (stbi__get8(s) != 'B') return 0;
5056    if (stbi__get8(s) != 'M') return 0;
5057    stbi__get32le(s); // discard filesize
5058    stbi__get16le(s); // discard reserved
5059    stbi__get16le(s); // discard reserved
5060    stbi__get32le(s); // discard data offset
5061    sz = stbi__get32le(s);
5062    r = (sz == 12 || sz == 40 || sz == 56 || sz == 108 || sz == 124);
5063    return r;
5064 }
5065 
stbi__bmp_test(stbi__context * s)5066 static int stbi__bmp_test(stbi__context *s)
5067 {
5068    int r = stbi__bmp_test_raw(s);
5069    stbi__rewind(s);
5070    return r;
5071 }
5072 
5073 
5074 // returns 0..31 for the highest set bit
stbi__high_bit(unsigned int z)5075 static int stbi__high_bit(unsigned int z)
5076 {
5077    int n=0;
5078    if (z == 0) return -1;
5079    if (z >= 0x10000) { n += 16; z >>= 16; }
5080    if (z >= 0x00100) { n +=  8; z >>=  8; }
5081    if (z >= 0x00010) { n +=  4; z >>=  4; }
5082    if (z >= 0x00004) { n +=  2; z >>=  2; }
5083    if (z >= 0x00002) { n +=  1;/* >>=  1;*/ }
5084    return n;
5085 }
5086 
stbi__bitcount(unsigned int a)5087 static int stbi__bitcount(unsigned int a)
5088 {
5089    a = (a & 0x55555555) + ((a >>  1) & 0x55555555); // max 2
5090    a = (a & 0x33333333) + ((a >>  2) & 0x33333333); // max 4
5091    a = (a + (a >> 4)) & 0x0f0f0f0f; // max 8 per 4, now 8 bits
5092    a = (a + (a >> 8)); // max 16 per 8 bits
5093    a = (a + (a >> 16)); // max 32 per 8 bits
5094    return a & 0xff;
5095 }
5096 
5097 // extract an arbitrarily-aligned N-bit value (N=bits)
5098 // from v, and then make it 8-bits long and fractionally
5099 // extend it to full full range.
stbi__shiftsigned(unsigned int v,int shift,int bits)5100 static int stbi__shiftsigned(unsigned int v, int shift, int bits)
5101 {
5102    static unsigned int mul_table[9] = {
5103       0,
5104       0xff/*0b11111111*/, 0x55/*0b01010101*/, 0x49/*0b01001001*/, 0x11/*0b00010001*/,
5105       0x21/*0b00100001*/, 0x41/*0b01000001*/, 0x81/*0b10000001*/, 0x01/*0b00000001*/,
5106    };
5107    static unsigned int shift_table[9] = {
5108       0, 0,0,1,0,2,4,6,0,
5109    };
5110    if (shift < 0)
5111       v <<= -shift;
5112    else
5113       v >>= shift;
5114    STBI_ASSERT(v >= 0 && v < 256);
5115    v >>= (8-bits);
5116    STBI_ASSERT(bits >= 0 && bits <= 8);
5117    return (int) ((unsigned) v * mul_table[bits]) >> shift_table[bits];
5118 }
5119 
5120 typedef struct
5121 {
5122    int bpp, offset, hsz;
5123    unsigned int mr,mg,mb,ma, all_a;
5124 } stbi__bmp_data;
5125 
stbi__bmp_parse_header(stbi__context * s,stbi__bmp_data * info)5126 static void *stbi__bmp_parse_header(stbi__context *s, stbi__bmp_data *info)
5127 {
5128    int hsz;
5129    if (stbi__get8(s) != 'B' || stbi__get8(s) != 'M') return stbi__errpuc("not BMP", "Corrupt BMP");
5130    stbi__get32le(s); // discard filesize
5131    stbi__get16le(s); // discard reserved
5132    stbi__get16le(s); // discard reserved
5133    info->offset = stbi__get32le(s);
5134    info->hsz = hsz = stbi__get32le(s);
5135    info->mr = info->mg = info->mb = info->ma = 0;
5136 
5137    if (hsz != 12 && hsz != 40 && hsz != 56 && hsz != 108 && hsz != 124) return stbi__errpuc("unknown BMP", "BMP type not supported: unknown");
5138    if (hsz == 12) {
5139       s->img_x = stbi__get16le(s);
5140       s->img_y = stbi__get16le(s);
5141    } else {
5142       s->img_x = stbi__get32le(s);
5143       s->img_y = stbi__get32le(s);
5144    }
5145    if (stbi__get16le(s) != 1) return stbi__errpuc("bad BMP", "bad BMP");
5146    info->bpp = stbi__get16le(s);
5147    if (hsz != 12) {
5148       int compress = stbi__get32le(s);
5149       if (compress == 1 || compress == 2) return stbi__errpuc("BMP RLE", "BMP type not supported: RLE");
5150       stbi__get32le(s); // discard sizeof
5151       stbi__get32le(s); // discard hres
5152       stbi__get32le(s); // discard vres
5153       stbi__get32le(s); // discard colorsused
5154       stbi__get32le(s); // discard max important
5155       if (hsz == 40 || hsz == 56) {
5156          if (hsz == 56) {
5157             stbi__get32le(s);
5158             stbi__get32le(s);
5159             stbi__get32le(s);
5160             stbi__get32le(s);
5161          }
5162          if (info->bpp == 16 || info->bpp == 32) {
5163             if (compress == 0) {
5164                if (info->bpp == 32) {
5165                   info->mr = 0xffu << 16;
5166                   info->mg = 0xffu <<  8;
5167                   info->mb = 0xffu <<  0;
5168                   info->ma = 0xffu << 24;
5169                   info->all_a = 0; // if all_a is 0 at end, then we loaded alpha channel but it was all 0
5170                } else {
5171                   info->mr = 31u << 10;
5172                   info->mg = 31u <<  5;
5173                   info->mb = 31u <<  0;
5174                }
5175             } else if (compress == 3) {
5176                info->mr = stbi__get32le(s);
5177                info->mg = stbi__get32le(s);
5178                info->mb = stbi__get32le(s);
5179                // not documented, but generated by photoshop and handled by mspaint
5180                if (info->mr == info->mg && info->mg == info->mb) {
5181                   // ?!?!?
5182                   return stbi__errpuc("bad BMP", "bad BMP");
5183                }
5184             } else
5185                return stbi__errpuc("bad BMP", "bad BMP");
5186          }
5187       } else {
5188          int i;
5189          if (hsz != 108 && hsz != 124)
5190             return stbi__errpuc("bad BMP", "bad BMP");
5191          info->mr = stbi__get32le(s);
5192          info->mg = stbi__get32le(s);
5193          info->mb = stbi__get32le(s);
5194          info->ma = stbi__get32le(s);
5195          stbi__get32le(s); // discard color space
5196          for (i=0; i < 12; ++i)
5197             stbi__get32le(s); // discard color space parameters
5198          if (hsz == 124) {
5199             stbi__get32le(s); // discard rendering intent
5200             stbi__get32le(s); // discard offset of profile data
5201             stbi__get32le(s); // discard size of profile data
5202             stbi__get32le(s); // discard reserved
5203          }
5204       }
5205    }
5206    return (void *) 1;
5207 }
5208 
5209 
stbi__bmp_load(stbi__context * s,int * x,int * y,int * comp,int req_comp,stbi__result_info * ri)5210 static void *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
5211 {
5212    stbi_uc *out;
5213    unsigned int mr=0,mg=0,mb=0,ma=0, all_a;
5214    stbi_uc pal[256][4];
5215    int psize=0,i,j,width;
5216    int flip_vertically, pad, target;
5217    stbi__bmp_data info;
5218    STBI_NOTUSED(ri);
5219 
5220    info.all_a = 255;
5221    if (stbi__bmp_parse_header(s, &info) == NULL)
5222       return NULL; // error code already set
5223 
5224    flip_vertically = ((int) s->img_y) > 0;
5225    s->img_y = abs((int) s->img_y);
5226 
5227    mr = info.mr;
5228    mg = info.mg;
5229    mb = info.mb;
5230    ma = info.ma;
5231    all_a = info.all_a;
5232 
5233    if (info.hsz == 12) {
5234       if (info.bpp < 24)
5235          psize = (info.offset - 14 - 24) / 3;
5236    } else {
5237       if (info.bpp < 16)
5238          psize = (info.offset - 14 - info.hsz) >> 2;
5239    }
5240 
5241    if (info.bpp == 24 && ma == 0xff000000)
5242       s->img_n = 3;
5243    else
5244       s->img_n = ma ? 4 : 3;
5245    if (req_comp && req_comp >= 3) // we can directly decode 3 or 4
5246       target = req_comp;
5247    else
5248       target = s->img_n; // if they want monochrome, we'll post-convert
5249 
5250    // sanity-check size
5251    if (!stbi__mad3sizes_valid(target, s->img_x, s->img_y, 0))
5252       return stbi__errpuc("too large", "Corrupt BMP");
5253 
5254    out = (stbi_uc *) stbi__malloc_mad3(target, s->img_x, s->img_y, 0);
5255    if (!out) return stbi__errpuc("outofmem", "Out of memory");
5256    if (info.bpp < 16) {
5257       int z=0;
5258       if (psize == 0 || psize > 256) { STBI_FREE(out); return stbi__errpuc("invalid", "Corrupt BMP"); }
5259       for (i=0; i < psize; ++i) {
5260          pal[i][2] = stbi__get8(s);
5261          pal[i][1] = stbi__get8(s);
5262          pal[i][0] = stbi__get8(s);
5263          if (info.hsz != 12) stbi__get8(s);
5264          pal[i][3] = 255;
5265       }
5266       stbi__skip(s, info.offset - 14 - info.hsz - psize * (info.hsz == 12 ? 3 : 4));
5267       if (info.bpp == 1) width = (s->img_x + 7) >> 3;
5268       else if (info.bpp == 4) width = (s->img_x + 1) >> 1;
5269       else if (info.bpp == 8) width = s->img_x;
5270       else { STBI_FREE(out); return stbi__errpuc("bad bpp", "Corrupt BMP"); }
5271       pad = (-width)&3;
5272       if (info.bpp == 1) {
5273          for (j=0; j < (int) s->img_y; ++j) {
5274             int bit_offset = 7, v = stbi__get8(s);
5275             for (i=0; i < (int) s->img_x; ++i) {
5276                int color = (v>>bit_offset)&0x1;
5277                out[z++] = pal[color][0];
5278                out[z++] = pal[color][1];
5279                out[z++] = pal[color][2];
5280                if (target == 4) out[z++] = 255;
5281                if (i+1 == (int) s->img_x) break;
5282                if((--bit_offset) < 0) {
5283                   bit_offset = 7;
5284                   v = stbi__get8(s);
5285                }
5286             }
5287             stbi__skip(s, pad);
5288          }
5289       } else {
5290          for (j=0; j < (int) s->img_y; ++j) {
5291             for (i=0; i < (int) s->img_x; i += 2) {
5292                int v=stbi__get8(s),v2=0;
5293                if (info.bpp == 4) {
5294                   v2 = v & 15;
5295                   v >>= 4;
5296                }
5297                out[z++] = pal[v][0];
5298                out[z++] = pal[v][1];
5299                out[z++] = pal[v][2];
5300                if (target == 4) out[z++] = 255;
5301                if (i+1 == (int) s->img_x) break;
5302                v = (info.bpp == 8) ? stbi__get8(s) : v2;
5303                out[z++] = pal[v][0];
5304                out[z++] = pal[v][1];
5305                out[z++] = pal[v][2];
5306                if (target == 4) out[z++] = 255;
5307             }
5308             stbi__skip(s, pad);
5309          }
5310       }
5311    } else {
5312       int rshift=0,gshift=0,bshift=0,ashift=0,rcount=0,gcount=0,bcount=0,acount=0;
5313       int z = 0;
5314       int easy=0;
5315       stbi__skip(s, info.offset - 14 - info.hsz);
5316       if (info.bpp == 24) width = 3 * s->img_x;
5317       else if (info.bpp == 16) width = 2*s->img_x;
5318       else /* bpp = 32 and pad = 0 */ width=0;
5319       pad = (-width) & 3;
5320       if (info.bpp == 24) {
5321          easy = 1;
5322       } else if (info.bpp == 32) {
5323          if (mb == 0xff && mg == 0xff00 && mr == 0x00ff0000 && ma == 0xff000000)
5324             easy = 2;
5325       }
5326       if (!easy) {
5327          if (!mr || !mg || !mb) { STBI_FREE(out); return stbi__errpuc("bad masks", "Corrupt BMP"); }
5328          // right shift amt to put high bit in position #7
5329          rshift = stbi__high_bit(mr)-7; rcount = stbi__bitcount(mr);
5330          gshift = stbi__high_bit(mg)-7; gcount = stbi__bitcount(mg);
5331          bshift = stbi__high_bit(mb)-7; bcount = stbi__bitcount(mb);
5332          ashift = stbi__high_bit(ma)-7; acount = stbi__bitcount(ma);
5333       }
5334       for (j=0; j < (int) s->img_y; ++j) {
5335          if (easy) {
5336             for (i=0; i < (int) s->img_x; ++i) {
5337                unsigned char a;
5338                out[z+2] = stbi__get8(s);
5339                out[z+1] = stbi__get8(s);
5340                out[z+0] = stbi__get8(s);
5341                z += 3;
5342                a = (easy == 2 ? stbi__get8(s) : 255);
5343                all_a |= a;
5344                if (target == 4) out[z++] = a;
5345             }
5346          } else {
5347             int bpp = info.bpp;
5348             for (i=0; i < (int) s->img_x; ++i) {
5349                stbi__uint32 v = (bpp == 16 ? (stbi__uint32) stbi__get16le(s) : stbi__get32le(s));
5350                unsigned int a;
5351                out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mr, rshift, rcount));
5352                out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mg, gshift, gcount));
5353                out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mb, bshift, bcount));
5354                a = (ma ? stbi__shiftsigned(v & ma, ashift, acount) : 255);
5355                all_a |= a;
5356                if (target == 4) out[z++] = STBI__BYTECAST(a);
5357             }
5358          }
5359          stbi__skip(s, pad);
5360       }
5361    }
5362 
5363    // if alpha channel is all 0s, replace with all 255s
5364    if (target == 4 && all_a == 0)
5365       for (i=4*s->img_x*s->img_y-1; i >= 0; i -= 4)
5366          out[i] = 255;
5367 
5368    if (flip_vertically) {
5369       stbi_uc t;
5370       for (j=0; j < (int) s->img_y>>1; ++j) {
5371          stbi_uc *p1 = out +      j     *s->img_x*target;
5372          stbi_uc *p2 = out + (s->img_y-1-j)*s->img_x*target;
5373          for (i=0; i < (int) s->img_x*target; ++i) {
5374             t = p1[i]; p1[i] = p2[i]; p2[i] = t;
5375          }
5376       }
5377    }
5378 
5379    if (req_comp && req_comp != target) {
5380       out = stbi__convert_format(out, target, req_comp, s->img_x, s->img_y);
5381       if (out == NULL) return out; // stbi__convert_format frees input on failure
5382    }
5383 
5384    *x = s->img_x;
5385    *y = s->img_y;
5386    if (comp) *comp = s->img_n;
5387    return out;
5388 }
5389 #endif
5390 
5391 // Targa Truevision - TGA
5392 // by Jonathan Dummer
5393 #ifndef STBI_NO_TGA
5394 // returns STBI_rgb or whatever, 0 on error
stbi__tga_get_comp(int bits_per_pixel,int is_grey,int * is_rgb16)5395 static int stbi__tga_get_comp(int bits_per_pixel, int is_grey, int* is_rgb16)
5396 {
5397    // only RGB or RGBA (incl. 16bit) or grey allowed
5398    if (is_rgb16) *is_rgb16 = 0;
5399    switch(bits_per_pixel) {
5400       case 8:  return STBI_grey;
5401       case 16: if(is_grey) return STBI_grey_alpha;
5402                // fallthrough
5403       case 15: if(is_rgb16) *is_rgb16 = 1;
5404                return STBI_rgb;
5405       case 24: // fallthrough
5406       case 32: return bits_per_pixel/8;
5407       default: return 0;
5408    }
5409 }
5410 
stbi__tga_info(stbi__context * s,int * x,int * y,int * comp)5411 static int stbi__tga_info(stbi__context *s, int *x, int *y, int *comp)
5412 {
5413     int tga_w, tga_h, tga_comp, tga_image_type, tga_bits_per_pixel, tga_colormap_bpp;
5414     int sz, tga_colormap_type;
5415     stbi__get8(s);                   // discard Offset
5416     tga_colormap_type = stbi__get8(s); // colormap type
5417     if( tga_colormap_type > 1 ) {
5418         stbi__rewind(s);
5419         return 0;      // only RGB or indexed allowed
5420     }
5421     tga_image_type = stbi__get8(s); // image type
5422     if ( tga_colormap_type == 1 ) { // colormapped (paletted) image
5423         if (tga_image_type != 1 && tga_image_type != 9) {
5424             stbi__rewind(s);
5425             return 0;
5426         }
5427         stbi__skip(s,4);       // skip index of first colormap entry and number of entries
5428         sz = stbi__get8(s);    //   check bits per palette color entry
5429         if ( (sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32) ) {
5430             stbi__rewind(s);
5431             return 0;
5432         }
5433         stbi__skip(s,4);       // skip image x and y origin
5434         tga_colormap_bpp = sz;
5435     } else { // "normal" image w/o colormap - only RGB or grey allowed, +/- RLE
5436         if ( (tga_image_type != 2) && (tga_image_type != 3) && (tga_image_type != 10) && (tga_image_type != 11) ) {
5437             stbi__rewind(s);
5438             return 0; // only RGB or grey allowed, +/- RLE
5439         }
5440         stbi__skip(s,9); // skip colormap specification and image x/y origin
5441         tga_colormap_bpp = 0;
5442     }
5443     tga_w = stbi__get16le(s);
5444     if( tga_w < 1 ) {
5445         stbi__rewind(s);
5446         return 0;   // test width
5447     }
5448     tga_h = stbi__get16le(s);
5449     if( tga_h < 1 ) {
5450         stbi__rewind(s);
5451         return 0;   // test height
5452     }
5453     tga_bits_per_pixel = stbi__get8(s); // bits per pixel
5454     stbi__get8(s); // ignore alpha bits
5455     if (tga_colormap_bpp != 0) {
5456         if((tga_bits_per_pixel != 8) && (tga_bits_per_pixel != 16)) {
5457             // when using a colormap, tga_bits_per_pixel is the size of the indexes
5458             // I don't think anything but 8 or 16bit indexes makes sense
5459             stbi__rewind(s);
5460             return 0;
5461         }
5462         tga_comp = stbi__tga_get_comp(tga_colormap_bpp, 0, NULL);
5463     } else {
5464         tga_comp = stbi__tga_get_comp(tga_bits_per_pixel, (tga_image_type == 3) || (tga_image_type == 11), NULL);
5465     }
5466     if(!tga_comp) {
5467       stbi__rewind(s);
5468       return 0;
5469     }
5470     if (x) *x = tga_w;
5471     if (y) *y = tga_h;
5472     if (comp) *comp = tga_comp;
5473     return 1;                   // seems to have passed everything
5474 }
5475 
stbi__tga_test(stbi__context * s)5476 static int stbi__tga_test(stbi__context *s)
5477 {
5478    int res = 0;
5479    int sz, tga_color_type;
5480    stbi__get8(s);      //   discard Offset
5481    tga_color_type = stbi__get8(s);   //   color type
5482    if ( tga_color_type > 1 ) goto errorEnd;   //   only RGB or indexed allowed
5483    sz = stbi__get8(s);   //   image type
5484    if ( tga_color_type == 1 ) { // colormapped (paletted) image
5485       if (sz != 1 && sz != 9) goto errorEnd; // colortype 1 demands image type 1 or 9
5486       stbi__skip(s,4);       // skip index of first colormap entry and number of entries
5487       sz = stbi__get8(s);    //   check bits per palette color entry
5488       if ( (sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32) ) goto errorEnd;
5489       stbi__skip(s,4);       // skip image x and y origin
5490    } else { // "normal" image w/o colormap
5491       if ( (sz != 2) && (sz != 3) && (sz != 10) && (sz != 11) ) goto errorEnd; // only RGB or grey allowed, +/- RLE
5492       stbi__skip(s,9); // skip colormap specification and image x/y origin
5493    }
5494    if ( stbi__get16le(s) < 1 ) goto errorEnd;      //   test width
5495    if ( stbi__get16le(s) < 1 ) goto errorEnd;      //   test height
5496    sz = stbi__get8(s);   //   bits per pixel
5497    if ( (tga_color_type == 1) && (sz != 8) && (sz != 16) ) goto errorEnd; // for colormapped images, bpp is size of an index
5498    if ( (sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32) ) goto errorEnd;
5499 
5500    res = 1; // if we got this far, everything's good and we can return 1 instead of 0
5501 
5502 errorEnd:
5503    stbi__rewind(s);
5504    return res;
5505 }
5506 
5507 // read 16bit value and convert to 24bit RGB
stbi__tga_read_rgb16(stbi__context * s,stbi_uc * out)5508 static void stbi__tga_read_rgb16(stbi__context *s, stbi_uc* out)
5509 {
5510    stbi__uint16 px = (stbi__uint16)stbi__get16le(s);
5511    stbi__uint16 fiveBitMask = 31;
5512    // we have 3 channels with 5bits each
5513    int r = (px >> 10) & fiveBitMask;
5514    int g = (px >> 5) & fiveBitMask;
5515    int b = px & fiveBitMask;
5516    // Note that this saves the data in RGB(A) order, so it doesn't need to be swapped later
5517    out[0] = (stbi_uc)((r * 255)/31);
5518    out[1] = (stbi_uc)((g * 255)/31);
5519    out[2] = (stbi_uc)((b * 255)/31);
5520 
5521    // some people claim that the most significant bit might be used for alpha
5522    // (possibly if an alpha-bit is set in the "image descriptor byte")
5523    // but that only made 16bit test images completely translucent..
5524    // so let's treat all 15 and 16bit TGAs as RGB with no alpha.
5525 }
5526 
stbi__tga_load(stbi__context * s,int * x,int * y,int * comp,int req_comp,stbi__result_info * ri)5527 static void *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
5528 {
5529    //   read in the TGA header stuff
5530    int tga_offset = stbi__get8(s);
5531    int tga_indexed = stbi__get8(s);
5532    int tga_image_type = stbi__get8(s);
5533    int tga_is_RLE = 0;
5534    int tga_palette_start = stbi__get16le(s);
5535    int tga_palette_len = stbi__get16le(s);
5536    int tga_palette_bits = stbi__get8(s);
5537    int tga_x_origin = stbi__get16le(s);
5538    int tga_y_origin = stbi__get16le(s);
5539    int tga_width = stbi__get16le(s);
5540    int tga_height = stbi__get16le(s);
5541    int tga_bits_per_pixel = stbi__get8(s);
5542    int tga_comp, tga_rgb16=0;
5543    int tga_inverted = stbi__get8(s);
5544    // int tga_alpha_bits = tga_inverted & 15; // the 4 lowest bits - unused (useless?)
5545    //   image data
5546    unsigned char *tga_data;
5547    unsigned char *tga_palette = NULL;
5548    int i, j;
5549    unsigned char raw_data[4] = {0};
5550    int RLE_count = 0;
5551    int RLE_repeating = 0;
5552    int read_next_pixel = 1;
5553    STBI_NOTUSED(ri);
5554    STBI_NOTUSED(tga_x_origin); // @TODO
5555    STBI_NOTUSED(tga_y_origin); // @TODO
5556 
5557    //   do a tiny bit of precessing
5558    if ( tga_image_type >= 8 )
5559    {
5560       tga_image_type -= 8;
5561       tga_is_RLE = 1;
5562    }
5563    tga_inverted = 1 - ((tga_inverted >> 5) & 1);
5564 
5565    //   If I'm paletted, then I'll use the number of bits from the palette
5566    if ( tga_indexed ) tga_comp = stbi__tga_get_comp(tga_palette_bits, 0, &tga_rgb16);
5567    else tga_comp = stbi__tga_get_comp(tga_bits_per_pixel, (tga_image_type == 3), &tga_rgb16);
5568 
5569    if(!tga_comp) // shouldn't really happen, stbi__tga_test() should have ensured basic consistency
5570       return stbi__errpuc("bad format", "Can't find out TGA pixelformat");
5571 
5572    //   tga info
5573    *x = tga_width;
5574    *y = tga_height;
5575    if (comp) *comp = tga_comp;
5576 
5577    if (!stbi__mad3sizes_valid(tga_width, tga_height, tga_comp, 0))
5578       return stbi__errpuc("too large", "Corrupt TGA");
5579 
5580    tga_data = (unsigned char*)stbi__malloc_mad3(tga_width, tga_height, tga_comp, 0);
5581    if (!tga_data) return stbi__errpuc("outofmem", "Out of memory");
5582 
5583    // skip to the data's starting position (offset usually = 0)
5584    stbi__skip(s, tga_offset );
5585 
5586    if ( !tga_indexed && !tga_is_RLE && !tga_rgb16 ) {
5587       for (i=0; i < tga_height; ++i) {
5588          int row = tga_inverted ? tga_height -i - 1 : i;
5589          stbi_uc *tga_row = tga_data + row*tga_width*tga_comp;
5590          stbi__getn(s, tga_row, tga_width * tga_comp);
5591       }
5592    } else  {
5593       //   do I need to load a palette?
5594       if ( tga_indexed)
5595       {
5596          //   any data to skip? (offset usually = 0)
5597          stbi__skip(s, tga_palette_start );
5598          //   load the palette
5599          tga_palette = (unsigned char*)stbi__malloc_mad2(tga_palette_len, tga_comp, 0);
5600          if (!tga_palette) {
5601             STBI_FREE(tga_data);
5602             return stbi__errpuc("outofmem", "Out of memory");
5603          }
5604          if (tga_rgb16) {
5605             stbi_uc *pal_entry = tga_palette;
5606             STBI_ASSERT(tga_comp == STBI_rgb);
5607             for (i=0; i < tga_palette_len; ++i) {
5608                stbi__tga_read_rgb16(s, pal_entry);
5609                pal_entry += tga_comp;
5610             }
5611          } else if (!stbi__getn(s, tga_palette, tga_palette_len * tga_comp)) {
5612                STBI_FREE(tga_data);
5613                STBI_FREE(tga_palette);
5614                return stbi__errpuc("bad palette", "Corrupt TGA");
5615          }
5616       }
5617       //   load the data
5618       for (i=0; i < tga_width * tga_height; ++i)
5619       {
5620          //   if I'm in RLE mode, do I need to get a RLE stbi__pngchunk?
5621          if ( tga_is_RLE )
5622          {
5623             if ( RLE_count == 0 )
5624             {
5625                //   yep, get the next byte as a RLE command
5626                int RLE_cmd = stbi__get8(s);
5627                RLE_count = 1 + (RLE_cmd & 127);
5628                RLE_repeating = RLE_cmd >> 7;
5629                read_next_pixel = 1;
5630             } else if ( !RLE_repeating )
5631             {
5632                read_next_pixel = 1;
5633             }
5634          } else
5635          {
5636             read_next_pixel = 1;
5637          }
5638          //   OK, if I need to read a pixel, do it now
5639          if ( read_next_pixel )
5640          {
5641             //   load however much data we did have
5642             if ( tga_indexed )
5643             {
5644                // read in index, then perform the lookup
5645                int pal_idx = (tga_bits_per_pixel == 8) ? stbi__get8(s) : stbi__get16le(s);
5646                if ( pal_idx >= tga_palette_len ) {
5647                   // invalid index
5648                   pal_idx = 0;
5649                }
5650                pal_idx *= tga_comp;
5651                for (j = 0; j < tga_comp; ++j) {
5652                   raw_data[j] = tga_palette[pal_idx+j];
5653                }
5654             } else if(tga_rgb16) {
5655                STBI_ASSERT(tga_comp == STBI_rgb);
5656                stbi__tga_read_rgb16(s, raw_data);
5657             } else {
5658                //   read in the data raw
5659                for (j = 0; j < tga_comp; ++j) {
5660                   raw_data[j] = stbi__get8(s);
5661                }
5662             }
5663             //   clear the reading flag for the next pixel
5664             read_next_pixel = 0;
5665          } // end of reading a pixel
5666 
5667          // copy data
5668          for (j = 0; j < tga_comp; ++j)
5669            tga_data[i*tga_comp+j] = raw_data[j];
5670 
5671          //   in case we're in RLE mode, keep counting down
5672          --RLE_count;
5673       }
5674       //   do I need to invert the image?
5675       if ( tga_inverted )
5676       {
5677          for (j = 0; j*2 < tga_height; ++j)
5678          {
5679             int index1 = j * tga_width * tga_comp;
5680             int index2 = (tga_height - 1 - j) * tga_width * tga_comp;
5681             for (i = tga_width * tga_comp; i > 0; --i)
5682             {
5683                unsigned char temp = tga_data[index1];
5684                tga_data[index1] = tga_data[index2];
5685                tga_data[index2] = temp;
5686                ++index1;
5687                ++index2;
5688             }
5689          }
5690       }
5691       //   clear my palette, if I had one
5692       if ( tga_palette != NULL )
5693       {
5694          STBI_FREE( tga_palette );
5695       }
5696    }
5697 
5698    // swap RGB - if the source data was RGB16, it already is in the right order
5699    if (tga_comp >= 3 && !tga_rgb16)
5700    {
5701       unsigned char* tga_pixel = tga_data;
5702       for (i=0; i < tga_width * tga_height; ++i)
5703       {
5704          unsigned char temp = tga_pixel[0];
5705          tga_pixel[0] = tga_pixel[2];
5706          tga_pixel[2] = temp;
5707          tga_pixel += tga_comp;
5708       }
5709    }
5710 
5711    // convert to target component count
5712    if (req_comp && req_comp != tga_comp)
5713       tga_data = stbi__convert_format(tga_data, tga_comp, req_comp, tga_width, tga_height);
5714 
5715    //   the things I do to get rid of an error message, and yet keep
5716    //   Microsoft's C compilers happy... [8^(
5717    tga_palette_start = tga_palette_len = tga_palette_bits =
5718          tga_x_origin = tga_y_origin = 0;
5719    STBI_NOTUSED(tga_palette_start);
5720    //   OK, done
5721    return tga_data;
5722 }
5723 #endif
5724 
5725 // *************************************************************************************************
5726 // Photoshop PSD loader -- PD by Thatcher Ulrich, integration by Nicolas Schulz, tweaked by STB
5727 
5728 #ifndef STBI_NO_PSD
stbi__psd_test(stbi__context * s)5729 static int stbi__psd_test(stbi__context *s)
5730 {
5731    int r = (stbi__get32be(s) == 0x38425053);
5732    stbi__rewind(s);
5733    return r;
5734 }
5735 
stbi__psd_decode_rle(stbi__context * s,stbi_uc * p,int pixelCount)5736 static int stbi__psd_decode_rle(stbi__context *s, stbi_uc *p, int pixelCount)
5737 {
5738    int count, nleft, len;
5739 
5740    count = 0;
5741    while ((nleft = pixelCount - count) > 0) {
5742       len = stbi__get8(s);
5743       if (len == 128) {
5744          // No-op.
5745       } else if (len < 128) {
5746          // Copy next len+1 bytes literally.
5747          len++;
5748          if (len > nleft) return 0; // corrupt data
5749          count += len;
5750          while (len) {
5751             *p = stbi__get8(s);
5752             p += 4;
5753             len--;
5754          }
5755       } else if (len > 128) {
5756          stbi_uc   val;
5757          // Next -len+1 bytes in the dest are replicated from next source byte.
5758          // (Interpret len as a negative 8-bit int.)
5759          len = 257 - len;
5760          if (len > nleft) return 0; // corrupt data
5761          val = stbi__get8(s);
5762          count += len;
5763          while (len) {
5764             *p = val;
5765             p += 4;
5766             len--;
5767          }
5768       }
5769    }
5770 
5771    return 1;
5772 }
5773 
stbi__psd_load(stbi__context * s,int * x,int * y,int * comp,int req_comp,stbi__result_info * ri,int bpc)5774 static void *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc)
5775 {
5776    int pixelCount;
5777    int channelCount, compression;
5778    int channel, i;
5779    int bitdepth;
5780    int w,h;
5781    stbi_uc *out;
5782    STBI_NOTUSED(ri);
5783 
5784    // Check identifier
5785    if (stbi__get32be(s) != 0x38425053)   // "8BPS"
5786       return stbi__errpuc("not PSD", "Corrupt PSD image");
5787 
5788    // Check file type version.
5789    if (stbi__get16be(s) != 1)
5790       return stbi__errpuc("wrong version", "Unsupported version of PSD image");
5791 
5792    // Skip 6 reserved bytes.
5793    stbi__skip(s, 6 );
5794 
5795    // Read the number of channels (R, G, B, A, etc).
5796    channelCount = stbi__get16be(s);
5797    if (channelCount < 0 || channelCount > 16)
5798       return stbi__errpuc("wrong channel count", "Unsupported number of channels in PSD image");
5799 
5800    // Read the rows and columns of the image.
5801    h = stbi__get32be(s);
5802    w = stbi__get32be(s);
5803 
5804    // Make sure the depth is 8 bits.
5805    bitdepth = stbi__get16be(s);
5806    if (bitdepth != 8 && bitdepth != 16)
5807       return stbi__errpuc("unsupported bit depth", "PSD bit depth is not 8 or 16 bit");
5808 
5809    // Make sure the color mode is RGB.
5810    // Valid options are:
5811    //   0: Bitmap
5812    //   1: Grayscale
5813    //   2: Indexed color
5814    //   3: RGB color
5815    //   4: CMYK color
5816    //   7: Multichannel
5817    //   8: Duotone
5818    //   9: Lab color
5819    if (stbi__get16be(s) != 3)
5820       return stbi__errpuc("wrong color format", "PSD is not in RGB color format");
5821 
5822    // Skip the Mode Data.  (It's the palette for indexed color; other info for other modes.)
5823    stbi__skip(s,stbi__get32be(s) );
5824 
5825    // Skip the image resources.  (resolution, pen tool paths, etc)
5826    stbi__skip(s, stbi__get32be(s) );
5827 
5828    // Skip the reserved data.
5829    stbi__skip(s, stbi__get32be(s) );
5830 
5831    // Find out if the data is compressed.
5832    // Known values:
5833    //   0: no compression
5834    //   1: RLE compressed
5835    compression = stbi__get16be(s);
5836    if (compression > 1)
5837       return stbi__errpuc("bad compression", "PSD has an unknown compression format");
5838 
5839    // Check size
5840    if (!stbi__mad3sizes_valid(4, w, h, 0))
5841       return stbi__errpuc("too large", "Corrupt PSD");
5842 
5843    // Create the destination image.
5844 
5845    if (!compression && bitdepth == 16 && bpc == 16) {
5846       out = (stbi_uc *) stbi__malloc_mad3(8, w, h, 0);
5847       ri->bits_per_channel = 16;
5848    } else
5849       out = (stbi_uc *) stbi__malloc(4 * w*h);
5850 
5851    if (!out) return stbi__errpuc("outofmem", "Out of memory");
5852    pixelCount = w*h;
5853 
5854    // Initialize the data to zero.
5855    //memset( out, 0, pixelCount * 4 );
5856 
5857    // Finally, the image data.
5858    if (compression) {
5859       // RLE as used by .PSD and .TIFF
5860       // Loop until you get the number of unpacked bytes you are expecting:
5861       //     Read the next source byte into n.
5862       //     If n is between 0 and 127 inclusive, copy the next n+1 bytes literally.
5863       //     Else if n is between -127 and -1 inclusive, copy the next byte -n+1 times.
5864       //     Else if n is 128, noop.
5865       // Endloop
5866 
5867       // The RLE-compressed data is preceded by a 2-byte data count for each row in the data,
5868       // which we're going to just skip.
5869       stbi__skip(s, h * channelCount * 2 );
5870 
5871       // Read the RLE data by channel.
5872       for (channel = 0; channel < 4; channel++) {
5873          stbi_uc *p;
5874 
5875          p = out+channel;
5876          if (channel >= channelCount) {
5877             // Fill this channel with default data.
5878             for (i = 0; i < pixelCount; i++, p += 4)
5879                *p = (channel == 3 ? 255 : 0);
5880          } else {
5881             // Read the RLE data.
5882             if (!stbi__psd_decode_rle(s, p, pixelCount)) {
5883                STBI_FREE(out);
5884                return stbi__errpuc("corrupt", "bad RLE data");
5885             }
5886          }
5887       }
5888 
5889    } else {
5890       // We're at the raw image data.  It's each channel in order (Red, Green, Blue, Alpha, ...)
5891       // where each channel consists of an 8-bit (or 16-bit) value for each pixel in the image.
5892 
5893       // Read the data by channel.
5894       for (channel = 0; channel < 4; channel++) {
5895          if (channel >= channelCount) {
5896             // Fill this channel with default data.
5897             if (bitdepth == 16 && bpc == 16) {
5898                stbi__uint16 *q = ((stbi__uint16 *) out) + channel;
5899                stbi__uint16 val = channel == 3 ? 65535 : 0;
5900                for (i = 0; i < pixelCount; i++, q += 4)
5901                   *q = val;
5902             } else {
5903                stbi_uc *p = out+channel;
5904                stbi_uc val = channel == 3 ? 255 : 0;
5905                for (i = 0; i < pixelCount; i++, p += 4)
5906                   *p = val;
5907             }
5908          } else {
5909             if (ri->bits_per_channel == 16) {    // output bpc
5910                stbi__uint16 *q = ((stbi__uint16 *) out) + channel;
5911                for (i = 0; i < pixelCount; i++, q += 4)
5912                   *q = (stbi__uint16) stbi__get16be(s);
5913             } else {
5914                stbi_uc *p = out+channel;
5915                if (bitdepth == 16) {  // input bpc
5916                   for (i = 0; i < pixelCount; i++, p += 4)
5917                      *p = (stbi_uc) (stbi__get16be(s) >> 8);
5918                } else {
5919                   for (i = 0; i < pixelCount; i++, p += 4)
5920                      *p = stbi__get8(s);
5921                }
5922             }
5923          }
5924       }
5925    }
5926 
5927    // remove weird white matte from PSD
5928    if (channelCount >= 4) {
5929       if (ri->bits_per_channel == 16) {
5930          for (i=0; i < w*h; ++i) {
5931             stbi__uint16 *pixel = (stbi__uint16 *) out + 4*i;
5932             if (pixel[3] != 0 && pixel[3] != 65535) {
5933                float a = pixel[3] / 65535.0f;
5934                float ra = 1.0f / a;
5935                float inv_a = 65535.0f * (1 - ra);
5936                pixel[0] = (stbi__uint16) (pixel[0]*ra + inv_a);
5937                pixel[1] = (stbi__uint16) (pixel[1]*ra + inv_a);
5938                pixel[2] = (stbi__uint16) (pixel[2]*ra + inv_a);
5939             }
5940          }
5941       } else {
5942          for (i=0; i < w*h; ++i) {
5943             unsigned char *pixel = out + 4*i;
5944             if (pixel[3] != 0 && pixel[3] != 255) {
5945                float a = pixel[3] / 255.0f;
5946                float ra = 1.0f / a;
5947                float inv_a = 255.0f * (1 - ra);
5948                pixel[0] = (unsigned char) (pixel[0]*ra + inv_a);
5949                pixel[1] = (unsigned char) (pixel[1]*ra + inv_a);
5950                pixel[2] = (unsigned char) (pixel[2]*ra + inv_a);
5951             }
5952          }
5953       }
5954    }
5955 
5956    // convert to desired output format
5957    if (req_comp && req_comp != 4) {
5958       if (ri->bits_per_channel == 16)
5959          out = (stbi_uc *) stbi__convert_format16((stbi__uint16 *) out, 4, req_comp, w, h);
5960       else
5961          out = stbi__convert_format(out, 4, req_comp, w, h);
5962       if (out == NULL) return out; // stbi__convert_format frees input on failure
5963    }
5964 
5965    if (comp) *comp = 4;
5966    *y = h;
5967    *x = w;
5968 
5969    return out;
5970 }
5971 #endif
5972 
5973 // *************************************************************************************************
5974 // Softimage PIC loader
5975 // by Tom Seddon
5976 //
5977 // See http://softimage.wiki.softimage.com/index.php/INFO:_PIC_file_format
5978 // See http://ozviz.wasp.uwa.edu.au/~pbourke/dataformats/softimagepic/
5979 
5980 #ifndef STBI_NO_PIC
stbi__pic_is4(stbi__context * s,const char * str)5981 static int stbi__pic_is4(stbi__context *s,const char *str)
5982 {
5983    int i;
5984    for (i=0; i<4; ++i)
5985       if (stbi__get8(s) != (stbi_uc)str[i])
5986          return 0;
5987 
5988    return 1;
5989 }
5990 
stbi__pic_test_core(stbi__context * s)5991 static int stbi__pic_test_core(stbi__context *s)
5992 {
5993    int i;
5994 
5995    if (!stbi__pic_is4(s,"\x53\x80\xF6\x34"))
5996       return 0;
5997 
5998    for(i=0;i<84;++i)
5999       stbi__get8(s);
6000 
6001    if (!stbi__pic_is4(s,"PICT"))
6002       return 0;
6003 
6004    return 1;
6005 }
6006 
6007 typedef struct
6008 {
6009    stbi_uc size,type,channel;
6010 } stbi__pic_packet;
6011 
stbi__readval(stbi__context * s,int channel,stbi_uc * dest)6012 static stbi_uc *stbi__readval(stbi__context *s, int channel, stbi_uc *dest)
6013 {
6014    int mask=0x80, i;
6015 
6016    for (i=0; i<4; ++i, mask>>=1) {
6017       if (channel & mask) {
6018          if (stbi__at_eof(s)) return stbi__errpuc("bad file","PIC file too short");
6019          dest[i]=stbi__get8(s);
6020       }
6021    }
6022 
6023    return dest;
6024 }
6025 
stbi__copyval(int channel,stbi_uc * dest,const stbi_uc * src)6026 static void stbi__copyval(int channel,stbi_uc *dest,const stbi_uc *src)
6027 {
6028    int mask=0x80,i;
6029 
6030    for (i=0;i<4; ++i, mask>>=1)
6031       if (channel&mask)
6032          dest[i]=src[i];
6033 }
6034 
stbi__pic_load_core(stbi__context * s,int width,int height,int * comp,stbi_uc * result)6035 static stbi_uc *stbi__pic_load_core(stbi__context *s,int width,int height,int *comp, stbi_uc *result)
6036 {
6037    int act_comp=0,num_packets=0,y,chained;
6038    stbi__pic_packet packets[10];
6039 
6040    // this will (should...) cater for even some bizarre stuff like having data
6041     // for the same channel in multiple packets.
6042    do {
6043       stbi__pic_packet *packet;
6044 
6045       if (num_packets==sizeof(packets)/sizeof(packets[0]))
6046          return stbi__errpuc("bad format","too many packets");
6047 
6048       packet = &packets[num_packets++];
6049 
6050       chained = stbi__get8(s);
6051       packet->size    = stbi__get8(s);
6052       packet->type    = stbi__get8(s);
6053       packet->channel = stbi__get8(s);
6054 
6055       act_comp |= packet->channel;
6056 
6057       if (stbi__at_eof(s))          return stbi__errpuc("bad file","file too short (reading packets)");
6058       if (packet->size != 8)  return stbi__errpuc("bad format","packet isn't 8bpp");
6059    } while (chained);
6060 
6061    *comp = (act_comp & 0x10 ? 4 : 3); // has alpha channel?
6062 
6063    for(y=0; y<height; ++y) {
6064       int packet_idx;
6065 
6066       for(packet_idx=0; packet_idx < num_packets; ++packet_idx) {
6067          stbi__pic_packet *packet = &packets[packet_idx];
6068          stbi_uc *dest = result+y*width*4;
6069 
6070          switch (packet->type) {
6071             default:
6072                return stbi__errpuc("bad format","packet has bad compression type");
6073 
6074             case 0: {//uncompressed
6075                int x;
6076 
6077                for(x=0;x<width;++x, dest+=4)
6078                   if (!stbi__readval(s,packet->channel,dest))
6079                      return 0;
6080                break;
6081             }
6082 
6083             case 1://Pure RLE
6084                {
6085                   int left=width, i;
6086 
6087                   while (left>0) {
6088                      stbi_uc count,value[4];
6089 
6090                      count=stbi__get8(s);
6091                      if (stbi__at_eof(s))   return stbi__errpuc("bad file","file too short (pure read count)");
6092 
6093                      if (count > left)
6094                         count = (stbi_uc) left;
6095 
6096                      if (!stbi__readval(s,packet->channel,value))  return 0;
6097 
6098                      for(i=0; i<count; ++i,dest+=4)
6099                         stbi__copyval(packet->channel,dest,value);
6100                      left -= count;
6101                   }
6102                }
6103                break;
6104 
6105             case 2: {//Mixed RLE
6106                int left=width;
6107                while (left>0) {
6108                   int count = stbi__get8(s), i;
6109                   if (stbi__at_eof(s))  return stbi__errpuc("bad file","file too short (mixed read count)");
6110 
6111                   if (count >= 128) { // Repeated
6112                      stbi_uc value[4];
6113 
6114                      if (count==128)
6115                         count = stbi__get16be(s);
6116                      else
6117                         count -= 127;
6118                      if (count > left)
6119                         return stbi__errpuc("bad file","scanline overrun");
6120 
6121                      if (!stbi__readval(s,packet->channel,value))
6122                         return 0;
6123 
6124                      for(i=0;i<count;++i, dest += 4)
6125                         stbi__copyval(packet->channel,dest,value);
6126                   } else { // Raw
6127                      ++count;
6128                      if (count>left) return stbi__errpuc("bad file","scanline overrun");
6129 
6130                      for(i=0;i<count;++i, dest+=4)
6131                         if (!stbi__readval(s,packet->channel,dest))
6132                            return 0;
6133                   }
6134                   left-=count;
6135                }
6136                break;
6137             }
6138          }
6139       }
6140    }
6141 
6142    return result;
6143 }
6144 
stbi__pic_load(stbi__context * s,int * px,int * py,int * comp,int req_comp,stbi__result_info * ri)6145 static void *stbi__pic_load(stbi__context *s,int *px,int *py,int *comp,int req_comp, stbi__result_info *ri)
6146 {
6147    stbi_uc *result;
6148    int i, x,y, internal_comp;
6149    STBI_NOTUSED(ri);
6150 
6151    if (!comp) comp = &internal_comp;
6152 
6153    for (i=0; i<92; ++i)
6154       stbi__get8(s);
6155 
6156    x = stbi__get16be(s);
6157    y = stbi__get16be(s);
6158    if (stbi__at_eof(s))  return stbi__errpuc("bad file","file too short (pic header)");
6159    if (!stbi__mad3sizes_valid(x, y, 4, 0)) return stbi__errpuc("too large", "PIC image too large to decode");
6160 
6161    stbi__get32be(s); //skip `ratio'
6162    stbi__get16be(s); //skip `fields'
6163    stbi__get16be(s); //skip `pad'
6164 
6165    // intermediate buffer is RGBA
6166    result = (stbi_uc *) stbi__malloc_mad3(x, y, 4, 0);
6167    memset(result, 0xff, x*y*4);
6168 
6169    if (!stbi__pic_load_core(s,x,y,comp, result)) {
6170       STBI_FREE(result);
6171       result=0;
6172    }
6173    *px = x;
6174    *py = y;
6175    if (req_comp == 0) req_comp = *comp;
6176    result=stbi__convert_format(result,4,req_comp,x,y);
6177 
6178    return result;
6179 }
6180 
stbi__pic_test(stbi__context * s)6181 static int stbi__pic_test(stbi__context *s)
6182 {
6183    int r = stbi__pic_test_core(s);
6184    stbi__rewind(s);
6185    return r;
6186 }
6187 #endif
6188 
6189 // *************************************************************************************************
6190 // GIF loader -- public domain by Jean-Marc Lienher -- simplified/shrunk by stb
6191 
6192 #ifndef STBI_NO_GIF
6193 typedef struct
6194 {
6195    stbi__int16 prefix;
6196    stbi_uc first;
6197    stbi_uc suffix;
6198 } stbi__gif_lzw;
6199 
6200 typedef struct
6201 {
6202    int w,h;
6203    stbi_uc *out;                 // output buffer (always 4 components)
6204    stbi_uc *background;          // The current "background" as far as a gif is concerned
6205    stbi_uc *history;
6206    int flags, bgindex, ratio, transparent, eflags;
6207    stbi_uc  pal[256][4];
6208    stbi_uc lpal[256][4];
6209    stbi__gif_lzw codes[8192];
6210    stbi_uc *color_table;
6211    int parse, step;
6212    int lflags;
6213    int start_x, start_y;
6214    int max_x, max_y;
6215    int cur_x, cur_y;
6216    int line_size;
6217    int delay;
6218 } stbi__gif;
6219 
stbi__gif_test_raw(stbi__context * s)6220 static int stbi__gif_test_raw(stbi__context *s)
6221 {
6222    int sz;
6223    if (stbi__get8(s) != 'G' || stbi__get8(s) != 'I' || stbi__get8(s) != 'F' || stbi__get8(s) != '8') return 0;
6224    sz = stbi__get8(s);
6225    if (sz != '9' && sz != '7') return 0;
6226    if (stbi__get8(s) != 'a') return 0;
6227    return 1;
6228 }
6229 
stbi__gif_test(stbi__context * s)6230 static int stbi__gif_test(stbi__context *s)
6231 {
6232    int r = stbi__gif_test_raw(s);
6233    stbi__rewind(s);
6234    return r;
6235 }
6236 
stbi__gif_parse_colortable(stbi__context * s,stbi_uc pal[256][4],int num_entries,int transp)6237 static void stbi__gif_parse_colortable(stbi__context *s, stbi_uc pal[256][4], int num_entries, int transp)
6238 {
6239    int i;
6240    for (i=0; i < num_entries; ++i) {
6241       pal[i][2] = stbi__get8(s);
6242       pal[i][1] = stbi__get8(s);
6243       pal[i][0] = stbi__get8(s);
6244       pal[i][3] = transp == i ? 0 : 255;
6245    }
6246 }
6247 
stbi__gif_header(stbi__context * s,stbi__gif * g,int * comp,int is_info)6248 static int stbi__gif_header(stbi__context *s, stbi__gif *g, int *comp, int is_info)
6249 {
6250    stbi_uc version;
6251    if (stbi__get8(s) != 'G' || stbi__get8(s) != 'I' || stbi__get8(s) != 'F' || stbi__get8(s) != '8')
6252       return stbi__err("not GIF", "Corrupt GIF");
6253 
6254    version = stbi__get8(s);
6255    if (version != '7' && version != '9')    return stbi__err("not GIF", "Corrupt GIF");
6256    if (stbi__get8(s) != 'a')                return stbi__err("not GIF", "Corrupt GIF");
6257 
6258    stbi__g_failure_reason = "";
6259    g->w = stbi__get16le(s);
6260    g->h = stbi__get16le(s);
6261    g->flags = stbi__get8(s);
6262    g->bgindex = stbi__get8(s);
6263    g->ratio = stbi__get8(s);
6264    g->transparent = -1;
6265 
6266    if (comp != 0) *comp = 4;  // can't actually tell whether it's 3 or 4 until we parse the comments
6267 
6268    if (is_info) return 1;
6269 
6270    if (g->flags & 0x80)
6271       stbi__gif_parse_colortable(s,g->pal, 2 << (g->flags & 7), -1);
6272 
6273    return 1;
6274 }
6275 
stbi__gif_info_raw(stbi__context * s,int * x,int * y,int * comp)6276 static int stbi__gif_info_raw(stbi__context *s, int *x, int *y, int *comp)
6277 {
6278    stbi__gif* g = (stbi__gif*) stbi__malloc(sizeof(stbi__gif));
6279    if (!stbi__gif_header(s, g, comp, 1)) {
6280       STBI_FREE(g);
6281       stbi__rewind( s );
6282       return 0;
6283    }
6284    if (x) *x = g->w;
6285    if (y) *y = g->h;
6286    STBI_FREE(g);
6287    return 1;
6288 }
6289 
stbi__out_gif_code(stbi__gif * g,stbi__uint16 code)6290 static void stbi__out_gif_code(stbi__gif *g, stbi__uint16 code)
6291 {
6292    stbi_uc *p, *c;
6293    int idx;
6294 
6295    // recurse to decode the prefixes, since the linked-list is backwards,
6296    // and working backwards through an interleaved image would be nasty
6297    if (g->codes[code].prefix >= 0)
6298       stbi__out_gif_code(g, g->codes[code].prefix);
6299 
6300    if (g->cur_y >= g->max_y) return;
6301 
6302    idx = g->cur_x + g->cur_y;
6303    p = &g->out[idx];
6304    g->history[idx / 4] = 1;
6305 
6306    c = &g->color_table[g->codes[code].suffix * 4];
6307    if (c[3] > 128) { // don't render transparent pixels;
6308       p[0] = c[2];
6309       p[1] = c[1];
6310       p[2] = c[0];
6311       p[3] = c[3];
6312    }
6313    g->cur_x += 4;
6314 
6315    if (g->cur_x >= g->max_x) {
6316       g->cur_x = g->start_x;
6317       g->cur_y += g->step;
6318 
6319       while (g->cur_y >= g->max_y && g->parse > 0) {
6320          g->step = (1 << g->parse) * g->line_size;
6321          g->cur_y = g->start_y + (g->step >> 1);
6322          --g->parse;
6323       }
6324    }
6325 }
6326 
stbi__process_gif_raster(stbi__context * s,stbi__gif * g)6327 static stbi_uc *stbi__process_gif_raster(stbi__context *s, stbi__gif *g)
6328 {
6329    stbi_uc lzw_cs;
6330    stbi__int32 len, init_code;
6331    stbi__uint32 first;
6332    stbi__int32 codesize, codemask, avail, oldcode, bits, valid_bits, clear;
6333    stbi__gif_lzw *p;
6334 
6335    lzw_cs = stbi__get8(s);
6336    if (lzw_cs > 12) return NULL;
6337    clear = 1 << lzw_cs;
6338    first = 1;
6339    codesize = lzw_cs + 1;
6340    codemask = (1 << codesize) - 1;
6341    bits = 0;
6342    valid_bits = 0;
6343    for (init_code = 0; init_code < clear; init_code++) {
6344       g->codes[init_code].prefix = -1;
6345       g->codes[init_code].first = (stbi_uc) init_code;
6346       g->codes[init_code].suffix = (stbi_uc) init_code;
6347    }
6348 
6349    // support no starting clear code
6350    avail = clear+2;
6351    oldcode = -1;
6352 
6353    len = 0;
6354    for(;;) {
6355       if (valid_bits < codesize) {
6356          if (len == 0) {
6357             len = stbi__get8(s); // start new block
6358             if (len == 0)
6359                return g->out;
6360          }
6361          --len;
6362          bits |= (stbi__int32) stbi__get8(s) << valid_bits;
6363          valid_bits += 8;
6364       } else {
6365          stbi__int32 code = bits & codemask;
6366          bits >>= codesize;
6367          valid_bits -= codesize;
6368          // @OPTIMIZE: is there some way we can accelerate the non-clear path?
6369          if (code == clear) {  // clear code
6370             codesize = lzw_cs + 1;
6371             codemask = (1 << codesize) - 1;
6372             avail = clear + 2;
6373             oldcode = -1;
6374             first = 0;
6375          } else if (code == clear + 1) { // end of stream code
6376             stbi__skip(s, len);
6377             while ((len = stbi__get8(s)) > 0)
6378                stbi__skip(s,len);
6379             return g->out;
6380          } else if (code <= avail) {
6381             if (first) {
6382                return stbi__errpuc("no clear code", "Corrupt GIF");
6383             }
6384 
6385             if (oldcode >= 0) {
6386                p = &g->codes[avail++];
6387                if (avail > 8192) {
6388                   return stbi__errpuc("too many codes", "Corrupt GIF");
6389                }
6390 
6391                p->prefix = (stbi__int16) oldcode;
6392                p->first = g->codes[oldcode].first;
6393                p->suffix = (code == avail) ? p->first : g->codes[code].first;
6394             } else if (code == avail)
6395                return stbi__errpuc("illegal code in raster", "Corrupt GIF");
6396 
6397             stbi__out_gif_code(g, (stbi__uint16) code);
6398 
6399             if ((avail & codemask) == 0 && avail <= 0x0FFF) {
6400                codesize++;
6401                codemask = (1 << codesize) - 1;
6402             }
6403 
6404             oldcode = code;
6405          } else {
6406             return stbi__errpuc("illegal code in raster", "Corrupt GIF");
6407          }
6408       }
6409    }
6410 }
6411 
6412 // this function is designed to support animated gifs, although stb_image doesn't support it
6413 // two back is the image from two frames ago, used for a very specific disposal format
stbi__gif_load_next(stbi__context * s,stbi__gif * g,int * comp,int req_comp,stbi_uc * two_back)6414 static stbi_uc *stbi__gif_load_next(stbi__context *s, stbi__gif *g, int *comp, int req_comp, stbi_uc *two_back)
6415 {
6416    int dispose;
6417    int first_frame;
6418    int pi;
6419    int pcount;
6420    STBI_NOTUSED(req_comp);
6421 
6422    // on first frame, any non-written pixels get the background colour (non-transparent)
6423    first_frame = 0;
6424    if (g->out == 0) {
6425       if (!stbi__gif_header(s, g, comp,0)) return 0; // stbi__g_failure_reason set by stbi__gif_header
6426       if (!stbi__mad3sizes_valid(4, g->w, g->h, 0))
6427          return stbi__errpuc("too large", "GIF image is too large");
6428       pcount = g->w * g->h;
6429       g->out = (stbi_uc *) stbi__malloc(4 * pcount);
6430       g->background = (stbi_uc *) stbi__malloc(4 * pcount);
6431       g->history = (stbi_uc *) stbi__malloc(pcount);
6432       if (!g->out || !g->background || !g->history)
6433          return stbi__errpuc("outofmem", "Out of memory");
6434 
6435       // image is treated as "transparent" at the start - ie, nothing overwrites the current background;
6436       // background colour is only used for pixels that are not rendered first frame, after that "background"
6437       // color refers to the color that was there the previous frame.
6438       memset(g->out, 0x00, 4 * pcount);
6439       memset(g->background, 0x00, 4 * pcount); // state of the background (starts transparent)
6440       memset(g->history, 0x00, pcount);        // pixels that were affected previous frame
6441       first_frame = 1;
6442    } else {
6443       // second frame - how do we dispoase of the previous one?
6444       dispose = (g->eflags & 0x1C) >> 2;
6445       pcount = g->w * g->h;
6446 
6447       if ((dispose == 3) && (two_back == 0)) {
6448          dispose = 2; // if I don't have an image to revert back to, default to the old background
6449       }
6450 
6451       if (dispose == 3) { // use previous graphic
6452          for (pi = 0; pi < pcount; ++pi) {
6453             if (g->history[pi]) {
6454                memcpy( &g->out[pi * 4], &two_back[pi * 4], 4 );
6455             }
6456          }
6457       } else if (dispose == 2) {
6458          // restore what was changed last frame to background before that frame;
6459          for (pi = 0; pi < pcount; ++pi) {
6460             if (g->history[pi]) {
6461                memcpy( &g->out[pi * 4], &g->background[pi * 4], 4 );
6462             }
6463          }
6464       } else {
6465          // This is a non-disposal case eithe way, so just
6466          // leave the pixels as is, and they will become the new background
6467          // 1: do not dispose
6468          // 0:  not specified.
6469       }
6470 
6471       // background is what out is after the undoing of the previou frame;
6472       memcpy( g->background, g->out, 4 * g->w * g->h );
6473    }
6474 
6475    // clear my history;
6476    memset( g->history, 0x00, g->w * g->h );        // pixels that were affected previous frame
6477 
6478    for (;;) {
6479       int tag = stbi__get8(s);
6480       switch (tag) {
6481          case 0x2C: /* Image Descriptor */
6482          {
6483             stbi__int32 x, y, w, h;
6484             stbi_uc *o;
6485 
6486             x = stbi__get16le(s);
6487             y = stbi__get16le(s);
6488             w = stbi__get16le(s);
6489             h = stbi__get16le(s);
6490             if (((x + w) > (g->w)) || ((y + h) > (g->h)))
6491                return stbi__errpuc("bad Image Descriptor", "Corrupt GIF");
6492 
6493             g->line_size = g->w * 4;
6494             g->start_x = x * 4;
6495             g->start_y = y * g->line_size;
6496             g->max_x   = g->start_x + w * 4;
6497             g->max_y   = g->start_y + h * g->line_size;
6498             g->cur_x   = g->start_x;
6499             g->cur_y   = g->start_y;
6500 
6501             // if the width of the specified rectangle is 0, that means
6502             // we may not see *any* pixels or the image is malformed;
6503             // to make sure this is caught, move the current y down to
6504             // max_y (which is what out_gif_code checks).
6505             if (w == 0)
6506                g->cur_y = g->max_y;
6507 
6508             g->lflags = stbi__get8(s);
6509 
6510             if (g->lflags & 0x40) {
6511                g->step = 8 * g->line_size; // first interlaced spacing
6512                g->parse = 3;
6513             } else {
6514                g->step = g->line_size;
6515                g->parse = 0;
6516             }
6517 
6518             if (g->lflags & 0x80) {
6519                stbi__gif_parse_colortable(s,g->lpal, 2 << (g->lflags & 7), g->eflags & 0x01 ? g->transparent : -1);
6520                g->color_table = (stbi_uc *) g->lpal;
6521             } else if (g->flags & 0x80) {
6522                g->color_table = (stbi_uc *) g->pal;
6523             } else
6524                return stbi__errpuc("missing color table", "Corrupt GIF");
6525 
6526             o = stbi__process_gif_raster(s, g);
6527             if (!o) return NULL;
6528 
6529             // if this was the first frame,
6530             pcount = g->w * g->h;
6531             if (first_frame && (g->bgindex > 0)) {
6532                // if first frame, any pixel not drawn to gets the background color
6533                for (pi = 0; pi < pcount; ++pi) {
6534                   if (g->history[pi] == 0) {
6535                      g->pal[g->bgindex][3] = 255; // just in case it was made transparent, undo that; It will be reset next frame if need be;
6536                      memcpy( &g->out[pi * 4], &g->pal[g->bgindex], 4 );
6537                   }
6538                }
6539             }
6540 
6541             return o;
6542          }
6543 
6544          case 0x21: // Comment Extension.
6545          {
6546             int len;
6547             int ext = stbi__get8(s);
6548             if (ext == 0xF9) { // Graphic Control Extension.
6549                len = stbi__get8(s);
6550                if (len == 4) {
6551                   g->eflags = stbi__get8(s);
6552                   g->delay = 10 * stbi__get16le(s); // delay - 1/100th of a second, saving as 1/1000ths.
6553 
6554                   // unset old transparent
6555                   if (g->transparent >= 0) {
6556                      g->pal[g->transparent][3] = 255;
6557                   }
6558                   if (g->eflags & 0x01) {
6559                      g->transparent = stbi__get8(s);
6560                      if (g->transparent >= 0) {
6561                         g->pal[g->transparent][3] = 0;
6562                      }
6563                   } else {
6564                      // don't need transparent
6565                      stbi__skip(s, 1);
6566                      g->transparent = -1;
6567                   }
6568                } else {
6569                   stbi__skip(s, len);
6570                   break;
6571                }
6572             }
6573             while ((len = stbi__get8(s)) != 0) {
6574                stbi__skip(s, len);
6575             }
6576             break;
6577          }
6578 
6579          case 0x3B: // gif stream termination code
6580             return (stbi_uc *) s; // using '1' causes warning on some compilers
6581 
6582          default:
6583             return stbi__errpuc("unknown code", "Corrupt GIF");
6584       }
6585    }
6586 }
6587 
stbi__load_gif_main(stbi__context * s,int ** delays,int * x,int * y,int * z,int * comp,int req_comp)6588 static void *stbi__load_gif_main(stbi__context *s, int **delays, int *x, int *y, int *z, int *comp, int req_comp)
6589 {
6590    if (stbi__gif_test(s)) {
6591       int layers = 0;
6592       stbi_uc *u = 0;
6593       stbi_uc *out = 0;
6594       stbi_uc *two_back = 0;
6595       stbi__gif g;
6596       int stride;
6597       memset(&g, 0, sizeof(g));
6598       if (delays) {
6599          *delays = 0;
6600       }
6601 
6602       do {
6603          u = stbi__gif_load_next(s, &g, comp, req_comp, two_back);
6604          if (u == (stbi_uc *) s) u = 0;  // end of animated gif marker
6605 
6606          if (u) {
6607             *x = g.w;
6608             *y = g.h;
6609             ++layers;
6610             stride = g.w * g.h * 4;
6611 
6612             if (out) {
6613                out = (stbi_uc*) STBI_REALLOC( out, layers * stride );
6614                if (delays) {
6615                   *delays = (int*) STBI_REALLOC( *delays, sizeof(int) * layers );
6616                }
6617             } else {
6618                out = (stbi_uc*)stbi__malloc( layers * stride );
6619                if (delays) {
6620                   *delays = (int*) stbi__malloc( layers * sizeof(int) );
6621                }
6622             }
6623             memcpy( out + ((layers - 1) * stride), u, stride );
6624             if (layers >= 2) {
6625                two_back = out - 2 * stride;
6626             }
6627 
6628             if (delays) {
6629                (*delays)[layers - 1U] = g.delay;
6630             }
6631          }
6632       } while (u != 0);
6633 
6634       // free temp buffer;
6635       STBI_FREE(g.out);
6636       STBI_FREE(g.history);
6637       STBI_FREE(g.background);
6638 
6639       // do the final conversion after loading everything;
6640       if (req_comp && req_comp != 4)
6641          out = stbi__convert_format(out, 4, req_comp, layers * g.w, g.h);
6642 
6643       *z = layers;
6644       return out;
6645    } else {
6646       return stbi__errpuc("not GIF", "Image was not as a gif type.");
6647    }
6648 }
6649 
stbi__gif_load(stbi__context * s,int * x,int * y,int * comp,int req_comp,stbi__result_info * ri)6650 static void *stbi__gif_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
6651 {
6652    stbi_uc *u = 0;
6653    stbi__gif g;
6654    memset(&g, 0, sizeof(g));
6655    STBI_NOTUSED(ri);
6656 
6657    u = stbi__gif_load_next(s, &g, comp, req_comp, 0);
6658    if (u == (stbi_uc *) s) u = 0;  // end of animated gif marker
6659    if (u) {
6660       *x = g.w;
6661       *y = g.h;
6662 
6663       // moved conversion to after successful load so that the same
6664       // can be done for multiple frames.
6665       if (req_comp && req_comp != 4)
6666          u = stbi__convert_format(u, 4, req_comp, g.w, g.h);
6667    } else if (g.out) {
6668       // if there was an error and we allocated an image buffer, free it!
6669       STBI_FREE(g.out);
6670    }
6671 
6672    // free buffers needed for multiple frame loading;
6673    STBI_FREE(g.history);
6674    STBI_FREE(g.background);
6675 
6676    return u;
6677 }
6678 
stbi__gif_info(stbi__context * s,int * x,int * y,int * comp)6679 static int stbi__gif_info(stbi__context *s, int *x, int *y, int *comp)
6680 {
6681    return stbi__gif_info_raw(s,x,y,comp);
6682 }
6683 #endif
6684 
6685 // *************************************************************************************************
6686 // Radiance RGBE HDR loader
6687 // originally by Nicolas Schulz
6688 #ifndef STBI_NO_HDR
stbi__hdr_test_core(stbi__context * s,const char * signature)6689 static int stbi__hdr_test_core(stbi__context *s, const char *signature)
6690 {
6691    int i;
6692    for (i=0; signature[i]; ++i)
6693       if (stbi__get8(s) != signature[i])
6694           return 0;
6695    stbi__rewind(s);
6696    return 1;
6697 }
6698 
stbi__hdr_test(stbi__context * s)6699 static int stbi__hdr_test(stbi__context* s)
6700 {
6701    int r = stbi__hdr_test_core(s, "#?RADIANCE\n");
6702    stbi__rewind(s);
6703    if(!r) {
6704        r = stbi__hdr_test_core(s, "#?RGBE\n");
6705        stbi__rewind(s);
6706    }
6707    return r;
6708 }
6709 
6710 #define STBI__HDR_BUFLEN  1024
stbi__hdr_gettoken(stbi__context * z,char * buffer)6711 static char *stbi__hdr_gettoken(stbi__context *z, char *buffer)
6712 {
6713    int len=0;
6714    char c = '\0';
6715 
6716    c = (char) stbi__get8(z);
6717 
6718    while (!stbi__at_eof(z) && c != '\n') {
6719       buffer[len++] = c;
6720       if (len == STBI__HDR_BUFLEN-1) {
6721          // flush to end of line
6722          while (!stbi__at_eof(z) && stbi__get8(z) != '\n')
6723             ;
6724          break;
6725       }
6726       c = (char) stbi__get8(z);
6727    }
6728 
6729    buffer[len] = 0;
6730    return buffer;
6731 }
6732 
stbi__hdr_convert(float * output,stbi_uc * input,int req_comp)6733 static void stbi__hdr_convert(float *output, stbi_uc *input, int req_comp)
6734 {
6735    if ( input[3] != 0 ) {
6736       float f1;
6737       // Exponent
6738       f1 = (float) ldexp(1.0f, input[3] - (int)(128 + 8));
6739       if (req_comp <= 2)
6740          output[0] = (input[0] + input[1] + input[2]) * f1 / 3;
6741       else {
6742          output[0] = input[0] * f1;
6743          output[1] = input[1] * f1;
6744          output[2] = input[2] * f1;
6745       }
6746       if (req_comp == 2) output[1] = 1;
6747       if (req_comp == 4) output[3] = 1;
6748    } else {
6749       switch (req_comp) {
6750          case 4: output[3] = 1; /* fallthrough */
6751          case 3: output[0] = output[1] = output[2] = 0;
6752                  break;
6753          case 2: output[1] = 1; /* fallthrough */
6754          case 1: output[0] = 0;
6755                  break;
6756       }
6757    }
6758 }
6759 
stbi__hdr_load(stbi__context * s,int * x,int * y,int * comp,int req_comp,stbi__result_info * ri)6760 static float *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
6761 {
6762    char buffer[STBI__HDR_BUFLEN];
6763    char *token;
6764    int valid = 0;
6765    int width, height;
6766    stbi_uc *scanline;
6767    float *hdr_data;
6768    int len;
6769    unsigned char count, value;
6770    int i, j, k, c1,c2, z;
6771    const char *headerToken;
6772    STBI_NOTUSED(ri);
6773 
6774    // Check identifier
6775    headerToken = stbi__hdr_gettoken(s,buffer);
6776    if (strcmp(headerToken, "#?RADIANCE") != 0 && strcmp(headerToken, "#?RGBE") != 0)
6777       return stbi__errpf("not HDR", "Corrupt HDR image");
6778 
6779    // Parse header
6780    for(;;) {
6781       token = stbi__hdr_gettoken(s,buffer);
6782       if (token[0] == 0) break;
6783       if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0) valid = 1;
6784    }
6785 
6786    if (!valid)    return stbi__errpf("unsupported format", "Unsupported HDR format");
6787 
6788    // Parse width and height
6789    // can't use sscanf() if we're not using stdio!
6790    token = stbi__hdr_gettoken(s,buffer);
6791    if (strncmp(token, "-Y ", 3))  return stbi__errpf("unsupported data layout", "Unsupported HDR format");
6792    token += 3;
6793    height = (int) strtol(token, &token, 10);
6794    while (*token == ' ') ++token;
6795    if (strncmp(token, "+X ", 3))  return stbi__errpf("unsupported data layout", "Unsupported HDR format");
6796    token += 3;
6797    width = (int) strtol(token, NULL, 10);
6798 
6799    *x = width;
6800    *y = height;
6801 
6802    if (comp) *comp = 3;
6803    if (req_comp == 0) req_comp = 3;
6804 
6805    if (!stbi__mad4sizes_valid(width, height, req_comp, sizeof(float), 0))
6806       return stbi__errpf("too large", "HDR image is too large");
6807 
6808    // Read data
6809    hdr_data = (float *) stbi__malloc_mad4(width, height, req_comp, sizeof(float), 0);
6810    if (!hdr_data)
6811       return stbi__errpf("outofmem", "Out of memory");
6812 
6813    // Load image data
6814    // image data is stored as some number of sca
6815    if ( width < 8 || width >= 32768) {
6816       // Read flat data
6817       for (j=0; j < height; ++j) {
6818          for (i=0; i < width; ++i) {
6819             stbi_uc rgbe[4];
6820            main_decode_loop:
6821             stbi__getn(s, rgbe, 4);
6822             stbi__hdr_convert(hdr_data + j * width * req_comp + i * req_comp, rgbe, req_comp);
6823          }
6824       }
6825    } else {
6826       // Read RLE-encoded data
6827       scanline = NULL;
6828 
6829       for (j = 0; j < height; ++j) {
6830          c1 = stbi__get8(s);
6831          c2 = stbi__get8(s);
6832          len = stbi__get8(s);
6833          if (c1 != 2 || c2 != 2 || (len & 0x80)) {
6834             // not run-length encoded, so we have to actually use THIS data as a decoded
6835             // pixel (note this can't be a valid pixel--one of RGB must be >= 128)
6836             stbi_uc rgbe[4];
6837             rgbe[0] = (stbi_uc) c1;
6838             rgbe[1] = (stbi_uc) c2;
6839             rgbe[2] = (stbi_uc) len;
6840             rgbe[3] = (stbi_uc) stbi__get8(s);
6841             stbi__hdr_convert(hdr_data, rgbe, req_comp);
6842             i = 1;
6843             j = 0;
6844             STBI_FREE(scanline);
6845             goto main_decode_loop; // yes, this makes no sense
6846          }
6847          len <<= 8;
6848          len |= stbi__get8(s);
6849          if (len != width) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("invalid decoded scanline length", "corrupt HDR"); }
6850          if (scanline == NULL) {
6851             scanline = (stbi_uc *) stbi__malloc_mad2(width, 4, 0);
6852             if (!scanline) {
6853                STBI_FREE(hdr_data);
6854                return stbi__errpf("outofmem", "Out of memory");
6855             }
6856          }
6857 
6858          for (k = 0; k < 4; ++k) {
6859             int nleft;
6860             i = 0;
6861             while ((nleft = width - i) > 0) {
6862                count = stbi__get8(s);
6863                if (count > 128) {
6864                   // Run
6865                   value = stbi__get8(s);
6866                   count -= 128;
6867                   if (count > nleft) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("corrupt", "bad RLE data in HDR"); }
6868                   for (z = 0; z < count; ++z)
6869                      scanline[i++ * 4 + k] = value;
6870                } else {
6871                   // Dump
6872                   if (count > nleft) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("corrupt", "bad RLE data in HDR"); }
6873                   for (z = 0; z < count; ++z)
6874                      scanline[i++ * 4 + k] = stbi__get8(s);
6875                }
6876             }
6877          }
6878          for (i=0; i < width; ++i)
6879             stbi__hdr_convert(hdr_data+(j*width + i)*req_comp, scanline + i*4, req_comp);
6880       }
6881       if (scanline)
6882          STBI_FREE(scanline);
6883    }
6884 
6885    return hdr_data;
6886 }
6887 
stbi__hdr_info(stbi__context * s,int * x,int * y,int * comp)6888 static int stbi__hdr_info(stbi__context *s, int *x, int *y, int *comp)
6889 {
6890    char buffer[STBI__HDR_BUFLEN];
6891    char *token;
6892    int valid = 0;
6893    int dummy;
6894 
6895    if (!x) x = &dummy;
6896    if (!y) y = &dummy;
6897    if (!comp) comp = &dummy;
6898 
6899    if (stbi__hdr_test(s) == 0) {
6900        stbi__rewind( s );
6901        return 0;
6902    }
6903 
6904    for(;;) {
6905       token = stbi__hdr_gettoken(s,buffer);
6906       if (token[0] == 0) break;
6907       if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0) valid = 1;
6908    }
6909 
6910    if (!valid) {
6911        stbi__rewind( s );
6912        return 0;
6913    }
6914    token = stbi__hdr_gettoken(s,buffer);
6915    if (strncmp(token, "-Y ", 3)) {
6916        stbi__rewind( s );
6917        return 0;
6918    }
6919    token += 3;
6920    *y = (int) strtol(token, &token, 10);
6921    while (*token == ' ') ++token;
6922    if (strncmp(token, "+X ", 3)) {
6923        stbi__rewind( s );
6924        return 0;
6925    }
6926    token += 3;
6927    *x = (int) strtol(token, NULL, 10);
6928    *comp = 3;
6929    return 1;
6930 }
6931 #endif // STBI_NO_HDR
6932 
6933 #ifndef STBI_NO_BMP
stbi__bmp_info(stbi__context * s,int * x,int * y,int * comp)6934 static int stbi__bmp_info(stbi__context *s, int *x, int *y, int *comp)
6935 {
6936    void *p;
6937    stbi__bmp_data info;
6938 
6939    info.all_a = 255;
6940    p = stbi__bmp_parse_header(s, &info);
6941    stbi__rewind( s );
6942    if (p == NULL)
6943       return 0;
6944    if (x) *x = s->img_x;
6945    if (y) *y = s->img_y;
6946    if (comp) {
6947       if (info.bpp == 24 && info.ma == 0xff000000)
6948          *comp = 3;
6949       else
6950          *comp = info.ma ? 4 : 3;
6951    }
6952    return 1;
6953 }
6954 #endif
6955 
6956 #ifndef STBI_NO_PSD
stbi__psd_info(stbi__context * s,int * x,int * y,int * comp)6957 static int stbi__psd_info(stbi__context *s, int *x, int *y, int *comp)
6958 {
6959    int channelCount, dummy, depth;
6960    if (!x) x = &dummy;
6961    if (!y) y = &dummy;
6962    if (!comp) comp = &dummy;
6963    if (stbi__get32be(s) != 0x38425053) {
6964        stbi__rewind( s );
6965        return 0;
6966    }
6967    if (stbi__get16be(s) != 1) {
6968        stbi__rewind( s );
6969        return 0;
6970    }
6971    stbi__skip(s, 6);
6972    channelCount = stbi__get16be(s);
6973    if (channelCount < 0 || channelCount > 16) {
6974        stbi__rewind( s );
6975        return 0;
6976    }
6977    *y = stbi__get32be(s);
6978    *x = stbi__get32be(s);
6979    depth = stbi__get16be(s);
6980    if (depth != 8 && depth != 16) {
6981        stbi__rewind( s );
6982        return 0;
6983    }
6984    if (stbi__get16be(s) != 3) {
6985        stbi__rewind( s );
6986        return 0;
6987    }
6988    *comp = 4;
6989    return 1;
6990 }
6991 
stbi__psd_is16(stbi__context * s)6992 static int stbi__psd_is16(stbi__context *s)
6993 {
6994    int channelCount, depth;
6995    if (stbi__get32be(s) != 0x38425053) {
6996        stbi__rewind( s );
6997        return 0;
6998    }
6999    if (stbi__get16be(s) != 1) {
7000        stbi__rewind( s );
7001        return 0;
7002    }
7003    stbi__skip(s, 6);
7004    channelCount = stbi__get16be(s);
7005    if (channelCount < 0 || channelCount > 16) {
7006        stbi__rewind( s );
7007        return 0;
7008    }
7009    (void) stbi__get32be(s);
7010    (void) stbi__get32be(s);
7011    depth = stbi__get16be(s);
7012    if (depth != 16) {
7013        stbi__rewind( s );
7014        return 0;
7015    }
7016    return 1;
7017 }
7018 #endif
7019 
7020 #ifndef STBI_NO_PIC
stbi__pic_info(stbi__context * s,int * x,int * y,int * comp)7021 static int stbi__pic_info(stbi__context *s, int *x, int *y, int *comp)
7022 {
7023    int act_comp=0,num_packets=0,chained,dummy;
7024    stbi__pic_packet packets[10];
7025 
7026    if (!x) x = &dummy;
7027    if (!y) y = &dummy;
7028    if (!comp) comp = &dummy;
7029 
7030    if (!stbi__pic_is4(s,"\x53\x80\xF6\x34")) {
7031       stbi__rewind(s);
7032       return 0;
7033    }
7034 
7035    stbi__skip(s, 88);
7036 
7037    *x = stbi__get16be(s);
7038    *y = stbi__get16be(s);
7039    if (stbi__at_eof(s)) {
7040       stbi__rewind( s);
7041       return 0;
7042    }
7043    if ( (*x) != 0 && (1 << 28) / (*x) < (*y)) {
7044       stbi__rewind( s );
7045       return 0;
7046    }
7047 
7048    stbi__skip(s, 8);
7049 
7050    do {
7051       stbi__pic_packet *packet;
7052 
7053       if (num_packets==sizeof(packets)/sizeof(packets[0]))
7054          return 0;
7055 
7056       packet = &packets[num_packets++];
7057       chained = stbi__get8(s);
7058       packet->size    = stbi__get8(s);
7059       packet->type    = stbi__get8(s);
7060       packet->channel = stbi__get8(s);
7061       act_comp |= packet->channel;
7062 
7063       if (stbi__at_eof(s)) {
7064           stbi__rewind( s );
7065           return 0;
7066       }
7067       if (packet->size != 8) {
7068           stbi__rewind( s );
7069           return 0;
7070       }
7071    } while (chained);
7072 
7073    *comp = (act_comp & 0x10 ? 4 : 3);
7074 
7075    return 1;
7076 }
7077 #endif
7078 
7079 // *************************************************************************************************
7080 // Portable Gray Map and Portable Pixel Map loader
7081 // by Ken Miller
7082 //
7083 // PGM: http://netpbm.sourceforge.net/doc/pgm.html
7084 // PPM: http://netpbm.sourceforge.net/doc/ppm.html
7085 //
7086 // Known limitations:
7087 //    Does not support comments in the header section
7088 //    Does not support ASCII image data (formats P2 and P3)
7089 //    Does not support 16-bit-per-channel
7090 
7091 #ifndef STBI_NO_PNM
7092 
stbi__pnm_test(stbi__context * s)7093 static int      stbi__pnm_test(stbi__context *s)
7094 {
7095    char p, t;
7096    p = (char) stbi__get8(s);
7097    t = (char) stbi__get8(s);
7098    if (p != 'P' || (t != '5' && t != '6')) {
7099        stbi__rewind( s );
7100        return 0;
7101    }
7102    return 1;
7103 }
7104 
stbi__pnm_load(stbi__context * s,int * x,int * y,int * comp,int req_comp,stbi__result_info * ri)7105 static void *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
7106 {
7107    stbi_uc *out;
7108    STBI_NOTUSED(ri);
7109 
7110    if (!stbi__pnm_info(s, (int *)&s->img_x, (int *)&s->img_y, (int *)&s->img_n))
7111       return 0;
7112 
7113    *x = s->img_x;
7114    *y = s->img_y;
7115    if (comp) *comp = s->img_n;
7116 
7117    if (!stbi__mad3sizes_valid(s->img_n, s->img_x, s->img_y, 0))
7118       return stbi__errpuc("too large", "PNM too large");
7119 
7120    out = (stbi_uc *) stbi__malloc_mad3(s->img_n, s->img_x, s->img_y, 0);
7121    if (!out) return stbi__errpuc("outofmem", "Out of memory");
7122    stbi__getn(s, out, s->img_n * s->img_x * s->img_y);
7123 
7124    if (req_comp && req_comp != s->img_n) {
7125       out = stbi__convert_format(out, s->img_n, req_comp, s->img_x, s->img_y);
7126       if (out == NULL) return out; // stbi__convert_format frees input on failure
7127    }
7128    return out;
7129 }
7130 
stbi__pnm_isspace(char c)7131 static int      stbi__pnm_isspace(char c)
7132 {
7133    return c == ' ' || c == '\t' || c == '\n' || c == '\v' || c == '\f' || c == '\r';
7134 }
7135 
stbi__pnm_skip_whitespace(stbi__context * s,char * c)7136 static void     stbi__pnm_skip_whitespace(stbi__context *s, char *c)
7137 {
7138    for (;;) {
7139       while (!stbi__at_eof(s) && stbi__pnm_isspace(*c))
7140          *c = (char) stbi__get8(s);
7141 
7142       if (stbi__at_eof(s) || *c != '#')
7143          break;
7144 
7145       while (!stbi__at_eof(s) && *c != '\n' && *c != '\r' )
7146          *c = (char) stbi__get8(s);
7147    }
7148 }
7149 
stbi__pnm_isdigit(char c)7150 static int      stbi__pnm_isdigit(char c)
7151 {
7152    return c >= '0' && c <= '9';
7153 }
7154 
stbi__pnm_getinteger(stbi__context * s,char * c)7155 static int      stbi__pnm_getinteger(stbi__context *s, char *c)
7156 {
7157    int value = 0;
7158 
7159    while (!stbi__at_eof(s) && stbi__pnm_isdigit(*c)) {
7160       value = value*10 + (*c - '0');
7161       *c = (char) stbi__get8(s);
7162    }
7163 
7164    return value;
7165 }
7166 
stbi__pnm_info(stbi__context * s,int * x,int * y,int * comp)7167 static int      stbi__pnm_info(stbi__context *s, int *x, int *y, int *comp)
7168 {
7169    int maxv, dummy;
7170    char c, p, t;
7171 
7172    if (!x) x = &dummy;
7173    if (!y) y = &dummy;
7174    if (!comp) comp = &dummy;
7175 
7176    stbi__rewind(s);
7177 
7178    // Get identifier
7179    p = (char) stbi__get8(s);
7180    t = (char) stbi__get8(s);
7181    if (p != 'P' || (t != '5' && t != '6')) {
7182        stbi__rewind(s);
7183        return 0;
7184    }
7185 
7186    *comp = (t == '6') ? 3 : 1;  // '5' is 1-component .pgm; '6' is 3-component .ppm
7187 
7188    c = (char) stbi__get8(s);
7189    stbi__pnm_skip_whitespace(s, &c);
7190 
7191    *x = stbi__pnm_getinteger(s, &c); // read width
7192    stbi__pnm_skip_whitespace(s, &c);
7193 
7194    *y = stbi__pnm_getinteger(s, &c); // read height
7195    stbi__pnm_skip_whitespace(s, &c);
7196 
7197    maxv = stbi__pnm_getinteger(s, &c);  // read max value
7198 
7199    if (maxv > 255)
7200       return stbi__err("max value > 255", "PPM image not 8-bit");
7201    else
7202       return 1;
7203 }
7204 #endif
7205 
stbi__info_main(stbi__context * s,int * x,int * y,int * comp)7206 static int stbi__info_main(stbi__context *s, int *x, int *y, int *comp)
7207 {
7208    #ifndef STBI_NO_JPEG
7209    if (stbi__jpeg_info(s, x, y, comp)) return 1;
7210    #endif
7211 
7212    #ifndef STBI_NO_PNG
7213    if (stbi__png_info(s, x, y, comp))  return 1;
7214    #endif
7215 
7216    #ifndef STBI_NO_GIF
7217    if (stbi__gif_info(s, x, y, comp))  return 1;
7218    #endif
7219 
7220    #ifndef STBI_NO_BMP
7221    if (stbi__bmp_info(s, x, y, comp))  return 1;
7222    #endif
7223 
7224    #ifndef STBI_NO_PSD
7225    if (stbi__psd_info(s, x, y, comp))  return 1;
7226    #endif
7227 
7228    #ifndef STBI_NO_PIC
7229    if (stbi__pic_info(s, x, y, comp))  return 1;
7230    #endif
7231 
7232    #ifndef STBI_NO_PNM
7233    if (stbi__pnm_info(s, x, y, comp))  return 1;
7234    #endif
7235 
7236    #ifndef STBI_NO_HDR
7237    if (stbi__hdr_info(s, x, y, comp))  return 1;
7238    #endif
7239 
7240    // test tga last because it's a crappy test!
7241    #ifndef STBI_NO_TGA
7242    if (stbi__tga_info(s, x, y, comp))
7243        return 1;
7244    #endif
7245    return stbi__err("unknown image type", "Image not of any known type, or corrupt");
7246 }
7247 
stbi__is_16_main(stbi__context * s)7248 static int stbi__is_16_main(stbi__context *s)
7249 {
7250    #ifndef STBI_NO_PNG
7251    if (stbi__png_is16(s))  return 1;
7252    #endif
7253 
7254    #ifndef STBI_NO_PSD
7255    if (stbi__psd_is16(s))  return 1;
7256    #endif
7257 
7258    return 0;
7259 }
7260 
7261 #ifndef STBI_NO_STDIO
stbi_info(char const * filename,int * x,int * y,int * comp)7262 STBIDEF int stbi_info(char const *filename, int *x, int *y, int *comp)
7263 {
7264     FILE *f = stbi__fopen(filename, "rb");
7265     int result;
7266     if (!f) return stbi__err("can't fopen", "Unable to open file");
7267     result = stbi_info_from_file(f, x, y, comp);
7268     fclose(f);
7269     return result;
7270 }
7271 
stbi_info_from_file(FILE * f,int * x,int * y,int * comp)7272 STBIDEF int stbi_info_from_file(FILE *f, int *x, int *y, int *comp)
7273 {
7274    int r;
7275    stbi__context s;
7276    long pos = ftell(f);
7277    stbi__start_file(&s, f);
7278    r = stbi__info_main(&s,x,y,comp);
7279    fseek(f,pos,SEEK_SET);
7280    return r;
7281 }
7282 
stbi_is_16_bit(char const * filename)7283 STBIDEF int stbi_is_16_bit(char const *filename)
7284 {
7285     FILE *f = stbi__fopen(filename, "rb");
7286     int result;
7287     if (!f) return stbi__err("can't fopen", "Unable to open file");
7288     result = stbi_is_16_bit_from_file(f);
7289     fclose(f);
7290     return result;
7291 }
7292 
stbi_is_16_bit_from_file(FILE * f)7293 STBIDEF int stbi_is_16_bit_from_file(FILE *f)
7294 {
7295    int r;
7296    stbi__context s;
7297    long pos = ftell(f);
7298    stbi__start_file(&s, f);
7299    r = stbi__is_16_main(&s);
7300    fseek(f,pos,SEEK_SET);
7301    return r;
7302 }
7303 #endif // !STBI_NO_STDIO
7304 
stbi_info_from_memory(stbi_uc const * buffer,int len,int * x,int * y,int * comp)7305 STBIDEF int stbi_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp)
7306 {
7307    stbi__context s;
7308    stbi__start_mem(&s,buffer,len);
7309    return stbi__info_main(&s,x,y,comp);
7310 }
7311 
stbi_info_from_callbacks(stbi_io_callbacks const * c,void * user,int * x,int * y,int * comp)7312 STBIDEF int stbi_info_from_callbacks(stbi_io_callbacks const *c, void *user, int *x, int *y, int *comp)
7313 {
7314    stbi__context s;
7315    stbi__start_callbacks(&s, (stbi_io_callbacks *) c, user);
7316    return stbi__info_main(&s,x,y,comp);
7317 }
7318 
stbi_is_16_bit_from_memory(stbi_uc const * buffer,int len)7319 STBIDEF int stbi_is_16_bit_from_memory(stbi_uc const *buffer, int len)
7320 {
7321    stbi__context s;
7322    stbi__start_mem(&s,buffer,len);
7323    return stbi__is_16_main(&s);
7324 }
7325 
stbi_is_16_bit_from_callbacks(stbi_io_callbacks const * c,void * user)7326 STBIDEF int stbi_is_16_bit_from_callbacks(stbi_io_callbacks const *c, void *user)
7327 {
7328    stbi__context s;
7329    stbi__start_callbacks(&s, (stbi_io_callbacks *) c, user);
7330    return stbi__is_16_main(&s);
7331 }
7332 
7333 #endif // STB_IMAGE_IMPLEMENTATION
7334 
7335 /*
7336    revision history:
7337       2.20  (2019-02-07) support utf8 filenames in Windows; fix warnings and platform ifdefs
7338       2.19  (2018-02-11) fix warning
7339       2.18  (2018-01-30) fix warnings
7340       2.17  (2018-01-29) change sbti__shiftsigned to avoid clang -O2 bug
7341                          1-bit BMP
7342                          *_is_16_bit api
7343                          avoid warnings
7344       2.16  (2017-07-23) all functions have 16-bit variants;
7345                          STBI_NO_STDIO works again;
7346                          compilation fixes;
7347                          fix rounding in unpremultiply;
7348                          optimize vertical flip;
7349                          disable raw_len validation;
7350                          documentation fixes
7351       2.15  (2017-03-18) fix png-1,2,4 bug; now all Imagenet JPGs decode;
7352                          warning fixes; disable run-time SSE detection on gcc;
7353                          uniform handling of optional "return" values;
7354                          thread-safe initialization of zlib tables
7355       2.14  (2017-03-03) remove deprecated STBI_JPEG_OLD; fixes for Imagenet JPGs
7356       2.13  (2016-11-29) add 16-bit API, only supported for PNG right now
7357       2.12  (2016-04-02) fix typo in 2.11 PSD fix that caused crashes
7358       2.11  (2016-04-02) allocate large structures on the stack
7359                          remove white matting for transparent PSD
7360                          fix reported channel count for PNG & BMP
7361                          re-enable SSE2 in non-gcc 64-bit
7362                          support RGB-formatted JPEG
7363                          read 16-bit PNGs (only as 8-bit)
7364       2.10  (2016-01-22) avoid warning introduced in 2.09 by STBI_REALLOC_SIZED
7365       2.09  (2016-01-16) allow comments in PNM files
7366                          16-bit-per-pixel TGA (not bit-per-component)
7367                          info() for TGA could break due to .hdr handling
7368                          info() for BMP to shares code instead of sloppy parse
7369                          can use STBI_REALLOC_SIZED if allocator doesn't support realloc
7370                          code cleanup
7371       2.08  (2015-09-13) fix to 2.07 cleanup, reading RGB PSD as RGBA
7372       2.07  (2015-09-13) fix compiler warnings
7373                          partial animated GIF support
7374                          limited 16-bpc PSD support
7375                          #ifdef unused functions
7376                          bug with < 92 byte PIC,PNM,HDR,TGA
7377       2.06  (2015-04-19) fix bug where PSD returns wrong '*comp' value
7378       2.05  (2015-04-19) fix bug in progressive JPEG handling, fix warning
7379       2.04  (2015-04-15) try to re-enable SIMD on MinGW 64-bit
7380       2.03  (2015-04-12) extra corruption checking (mmozeiko)
7381                          stbi_set_flip_vertically_on_load (nguillemot)
7382                          fix NEON support; fix mingw support
7383       2.02  (2015-01-19) fix incorrect assert, fix warning
7384       2.01  (2015-01-17) fix various warnings; suppress SIMD on gcc 32-bit without -msse2
7385       2.00b (2014-12-25) fix STBI_MALLOC in progressive JPEG
7386       2.00  (2014-12-25) optimize JPG, including x86 SSE2 & NEON SIMD (ryg)
7387                          progressive JPEG (stb)
7388                          PGM/PPM support (Ken Miller)
7389                          STBI_MALLOC,STBI_REALLOC,STBI_FREE
7390                          GIF bugfix -- seemingly never worked
7391                          STBI_NO_*, STBI_ONLY_*
7392       1.48  (2014-12-14) fix incorrectly-named assert()
7393       1.47  (2014-12-14) 1/2/4-bit PNG support, both direct and paletted (Omar Cornut & stb)
7394                          optimize PNG (ryg)
7395                          fix bug in interlaced PNG with user-specified channel count (stb)
7396       1.46  (2014-08-26)
7397               fix broken tRNS chunk (colorkey-style transparency) in non-paletted PNG
7398       1.45  (2014-08-16)
7399               fix MSVC-ARM internal compiler error by wrapping malloc
7400       1.44  (2014-08-07)
7401               various warning fixes from Ronny Chevalier
7402       1.43  (2014-07-15)
7403               fix MSVC-only compiler problem in code changed in 1.42
7404       1.42  (2014-07-09)
7405               don't define _CRT_SECURE_NO_WARNINGS (affects user code)
7406               fixes to stbi__cleanup_jpeg path
7407               added STBI_ASSERT to avoid requiring assert.h
7408       1.41  (2014-06-25)
7409               fix search&replace from 1.36 that messed up comments/error messages
7410       1.40  (2014-06-22)
7411               fix gcc struct-initialization warning
7412       1.39  (2014-06-15)
7413               fix to TGA optimization when req_comp != number of components in TGA;
7414               fix to GIF loading because BMP wasn't rewinding (whoops, no GIFs in my test suite)
7415               add support for BMP version 5 (more ignored fields)
7416       1.38  (2014-06-06)
7417               suppress MSVC warnings on integer casts truncating values
7418               fix accidental rename of 'skip' field of I/O
7419       1.37  (2014-06-04)
7420               remove duplicate typedef
7421       1.36  (2014-06-03)
7422               convert to header file single-file library
7423               if de-iphone isn't set, load iphone images color-swapped instead of returning NULL
7424       1.35  (2014-05-27)
7425               various warnings
7426               fix broken STBI_SIMD path
7427               fix bug where stbi_load_from_file no longer left file pointer in correct place
7428               fix broken non-easy path for 32-bit BMP (possibly never used)
7429               TGA optimization by Arseny Kapoulkine
7430       1.34  (unknown)
7431               use STBI_NOTUSED in stbi__resample_row_generic(), fix one more leak in tga failure case
7432       1.33  (2011-07-14)
7433               make stbi_is_hdr work in STBI_NO_HDR (as specified), minor compiler-friendly improvements
7434       1.32  (2011-07-13)
7435               support for "info" function for all supported filetypes (SpartanJ)
7436       1.31  (2011-06-20)
7437               a few more leak fixes, bug in PNG handling (SpartanJ)
7438       1.30  (2011-06-11)
7439               added ability to load files via callbacks to accomidate custom input streams (Ben Wenger)
7440               removed deprecated format-specific test/load functions
7441               removed support for installable file formats (stbi_loader) -- would have been broken for IO callbacks anyway
7442               error cases in bmp and tga give messages and don't leak (Raymond Barbiero, grisha)
7443               fix inefficiency in decoding 32-bit BMP (David Woo)
7444       1.29  (2010-08-16)
7445               various warning fixes from Aurelien Pocheville
7446       1.28  (2010-08-01)
7447               fix bug in GIF palette transparency (SpartanJ)
7448       1.27  (2010-08-01)
7449               cast-to-stbi_uc to fix warnings
7450       1.26  (2010-07-24)
7451               fix bug in file buffering for PNG reported by SpartanJ
7452       1.25  (2010-07-17)
7453               refix trans_data warning (Won Chun)
7454       1.24  (2010-07-12)
7455               perf improvements reading from files on platforms with lock-heavy fgetc()
7456               minor perf improvements for jpeg
7457               deprecated type-specific functions so we'll get feedback if they're needed
7458               attempt to fix trans_data warning (Won Chun)
7459       1.23    fixed bug in iPhone support
7460       1.22  (2010-07-10)
7461               removed image *writing* support
7462               stbi_info support from Jetro Lauha
7463               GIF support from Jean-Marc Lienher
7464               iPhone PNG-extensions from James Brown
7465               warning-fixes from Nicolas Schulz and Janez Zemva (i.stbi__err. Janez (U+017D)emva)
7466       1.21    fix use of 'stbi_uc' in header (reported by jon blow)
7467       1.20    added support for Softimage PIC, by Tom Seddon
7468       1.19    bug in interlaced PNG corruption check (found by ryg)
7469       1.18  (2008-08-02)
7470               fix a threading bug (local mutable static)
7471       1.17    support interlaced PNG
7472       1.16    major bugfix - stbi__convert_format converted one too many pixels
7473       1.15    initialize some fields for thread safety
7474       1.14    fix threadsafe conversion bug
7475               header-file-only version (#define STBI_HEADER_FILE_ONLY before including)
7476       1.13    threadsafe
7477       1.12    const qualifiers in the API
7478       1.11    Support installable IDCT, colorspace conversion routines
7479       1.10    Fixes for 64-bit (don't use "unsigned long")
7480               optimized upsampling by Fabian "ryg" Giesen
7481       1.09    Fix format-conversion for PSD code (bad global variables!)
7482       1.08    Thatcher Ulrich's PSD code integrated by Nicolas Schulz
7483       1.07    attempt to fix C++ warning/errors again
7484       1.06    attempt to fix C++ warning/errors again
7485       1.05    fix TGA loading to return correct *comp and use good luminance calc
7486       1.04    default float alpha is 1, not 255; use 'void *' for stbi_image_free
7487       1.03    bugfixes to STBI_NO_STDIO, STBI_NO_HDR
7488       1.02    support for (subset of) HDR files, float interface for preferred access to them
7489       1.01    fix bug: possible bug in handling right-side up bmps... not sure
7490               fix bug: the stbi__bmp_load() and stbi__tga_load() functions didn't work at all
7491       1.00    interface to zlib that skips zlib header
7492       0.99    correct handling of alpha in palette
7493       0.98    TGA loader by lonesock; dynamically add loaders (untested)
7494       0.97    jpeg errors on too large a file; also catch another malloc failure
7495       0.96    fix detection of invalid v value - particleman@mollyrocket forum
7496       0.95    during header scan, seek to markers in case of padding
7497       0.94    STBI_NO_STDIO to disable stdio usage; rename all #defines the same
7498       0.93    handle jpegtran output; verbose errors
7499       0.92    read 4,8,16,24,32-bit BMP files of several formats
7500       0.91    output 24-bit Windows 3.0 BMP files
7501       0.90    fix a few more warnings; bump version number to approach 1.0
7502       0.61    bugfixes due to Marc LeBlanc, Christopher Lloyd
7503       0.60    fix compiling as c++
7504       0.59    fix warnings: merge Dave Moore's -Wall fixes
7505       0.58    fix bug: zlib uncompressed mode len/nlen was wrong endian
7506       0.57    fix bug: jpg last huffman symbol before marker was >9 bits but less than 16 available
7507       0.56    fix bug: zlib uncompressed mode len vs. nlen
7508       0.55    fix bug: restart_interval not initialized to 0
7509       0.54    allow NULL for 'int *comp'
7510       0.53    fix bug in png 3->4; speedup png decoding
7511       0.52    png handles req_comp=3,4 directly; minor cleanup; jpeg comments
7512       0.51    obey req_comp requests, 1-component jpegs return as 1-component,
7513               on 'test' only check type, not whether we support this variant
7514       0.50  (2006-11-19)
7515               first released version
7516 */
7517 
7518 
7519 /*
7520 ------------------------------------------------------------------------------
7521 This software is available under 2 licenses -- choose whichever you prefer.
7522 ------------------------------------------------------------------------------
7523 ALTERNATIVE A - MIT License
7524 Copyright (c) 2017 Sean Barrett
7525 Permission is hereby granted, free of charge, to any person obtaining a copy of
7526 this software and associated documentation files (the "Software"), to deal in
7527 the Software without restriction, including without limitation the rights to
7528 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
7529 of the Software, and to permit persons to whom the Software is furnished to do
7530 so, subject to the following conditions:
7531 The above copyright notice and this permission notice shall be included in all
7532 copies or substantial portions of the Software.
7533 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
7534 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
7535 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
7536 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
7537 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
7538 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
7539 SOFTWARE.
7540 ------------------------------------------------------------------------------
7541 ALTERNATIVE B - Public Domain (www.unlicense.org)
7542 This is free and unencumbered software released into the public domain.
7543 Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
7544 software, either in source code form or as a compiled binary, for any purpose,
7545 commercial or non-commercial, and by any means.
7546 In jurisdictions that recognize copyright laws, the author or authors of this
7547 software dedicate any and all copyright interest in the software to the public
7548 domain. We make this dedication for the benefit of the public at large and to
7549 the detriment of our heirs and successors. We intend this dedication to be an
7550 overt act of relinquishment in perpetuity of all present and future rights to
7551 this software under copyright law.
7552 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
7553 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
7554 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
7555 AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
7556 ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
7557 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
7558 ------------------------------------------------------------------------------
7559 */
7560