1 /* stb_image - v2.22 - public domain image loader - http://nothings.org/stb
2                                   no warranty implied; use at your own risk
3 
4    Do this:
5       #define STB_IMAGE_IMPLEMENTATION
6    before you include this file in *one* C or C++ file to create the implementation.
7 
8    // i.e. it should look like this:
9    #include ...
10    #include ...
11    #include ...
12    #define STB_IMAGE_IMPLEMENTATION
13    #include "stb_image.h"
14 
15    You can #define STBI_ASSERT(x) before the #include to avoid using assert.h.
16    And #define STBI_MALLOC, STBI_REALLOC, and STBI_FREE to avoid using malloc,realloc,free
17 
18 
19    QUICK NOTES:
20       Primarily of interest to game developers and other people who can
21           avoid problematic images and only need the trivial interface
22 
23       JPEG baseline & progressive (12 bpc/arithmetic not supported, same as stock IJG lib)
24       PNG 1/2/4/8/16-bit-per-channel
25 
26       TGA (not sure what subset, if a subset)
27       BMP non-1bpp, non-RLE
28       PSD (composited view only, no extra channels, 8/16 bit-per-channel)
29 
30       GIF (*comp always reports as 4-channel)
31       HDR (radiance rgbE format)
32       PIC (Softimage PIC)
33       PNM (PPM and PGM binary only)
34 
35       Animated GIF still needs a proper API, but here's one way to do it:
36           http://gist.github.com/urraka/685d9a6340b26b830d49
37 
38       - decode from memory or through FILE (define STBI_NO_STDIO to remove code)
39       - decode from arbitrary I/O callbacks
40       - SIMD acceleration on x86/x64 (SSE2) and ARM (NEON)
41 
42    Full documentation under "DOCUMENTATION" below.
43 
44 
45 LICENSE
46 
47   See end of file for license information.
48 
49 RECENT REVISION HISTORY:
50 
51       2.22  (2019-03-04) gif fixes, fix warnings
52       2.21  (2019-02-25) fix typo in comment
53       2.20  (2019-02-07) support utf8 filenames in Windows; fix warnings and platform ifdefs
54       2.19  (2018-02-11) fix warning
55       2.18  (2018-01-30) fix warnings
56       2.17  (2018-01-29) bugfix, 1-bit BMP, 16-bitness query, fix warnings
57       2.16  (2017-07-23) all functions have 16-bit variants; optimizations; bugfixes
58       2.15  (2017-03-18) fix png-1,2,4; all Imagenet JPGs; no runtime SSE detection on GCC
59       2.14  (2017-03-03) remove deprecated STBI_JPEG_OLD; fixes for Imagenet JPGs
60       2.13  (2016-12-04) experimental 16-bit API, only for PNG so far; fixes
61       2.12  (2016-04-02) fix typo in 2.11 PSD fix that caused crashes
62       2.11  (2016-04-02) 16-bit PNGS; enable SSE2 in non-gcc x64
63                          RGB-format JPEG; remove white matting in PSD;
64                          allocate large structures on the stack;
65                          correct channel count for PNG & BMP
66       2.10  (2016-01-22) avoid warning introduced in 2.09
67       2.09  (2016-01-16) 16-bit TGA; comments in PNM files; STBI_REALLOC_SIZED
68 
69    See end of file for full revision history.
70 
71 
72  ============================    Contributors    =========================
73 
74  Image formats                          Extensions, features
75     Sean Barrett (jpeg, png, bmp)          Jetro Lauha (stbi_info)
76     Nicolas Schulz (hdr, psd)              Martin "SpartanJ" Golini (stbi_info)
77     Jonathan Dummer (tga)                  James "moose2000" Brown (iPhone PNG)
78     Jean-Marc Lienher (gif)                Ben "Disch" Wenger (io callbacks)
79     Tom Seddon (pic)                       Omar Cornut (1/2/4-bit PNG)
80     Thatcher Ulrich (psd)                  Nicolas Guillemot (vertical flip)
81     Ken Miller (pgm, ppm)                  Richard Mitton (16-bit PSD)
82     github:urraka (animated gif)           Junggon Kim (PNM comments)
83     Christopher Forseth (animated gif)     Daniel Gibson (16-bit TGA)
84                                            socks-the-fox (16-bit PNG)
85                                            Jeremy Sawicki (handle all ImageNet JPGs)
86  Optimizations & bugfixes                  Mikhail Morozov (1-bit BMP)
87     Fabian "ryg" Giesen                    Anael Seghezzi (is-16-bit query)
88     Arseny Kapoulkine
89     John-Mark Allen
90     Carmelo J Fdez-Aguera
91 
92  Bug & warning fixes
93     Marc LeBlanc            David Woo          Guillaume George   Martins Mozeiko
94     Christpher Lloyd        Jerry Jansson      Joseph Thomson     Phil Jordan
95     Dave Moore              Roy Eltham         Hayaki Saito       Nathan Reed
96     Won Chun                Luke Graham        Johan Duparc       Nick Verigakis
97     the Horde3D community   Thomas Ruf         Ronny Chevalier    github:rlyeh
98     Janez Zemva             John Bartholomew   Michal Cichon      github:romigrou
99     Jonathan Blow           Ken Hamada         Tero Hanninen      github:svdijk
100     Laurent Gomila          Cort Stratton      Sergio Gonzalez    github:snagar
101     Aruelien Pocheville     Thibault Reuille   Cass Everitt       github:Zelex
102     Ryamond Barbiero        Paul Du Bois       Engin Manap        github:grim210
103     Aldo Culquicondor       Philipp Wiesemann  Dale Weiler        github:sammyhw
104     Oriol Ferrer Mesia      Josh Tobin         Matthew Gregan     github:phprus
105     Julian Raschke          Gregory Mullen     Baldur Karlsson    github:poppolopoppo
106     Christian Floisand      Kevin Schmidt      JR Smith           github:darealshinji
107     Blazej Dariusz Roszkowski                                     github:Michaelangel007
108 */
109 
110 #ifndef STBI_INCLUDE_STB_IMAGE_H
111 #define STBI_INCLUDE_STB_IMAGE_H
112 
113 // DOCUMENTATION
114 //
115 // Limitations:
116 //    - no 12-bit-per-channel JPEG
117 //    - no JPEGs with arithmetic coding
118 //    - GIF always returns *comp=4
119 //
120 // Basic usage (see HDR discussion below for HDR usage):
121 //    int x,y,n;
122 //    unsigned char *data = stbi_load(filename, &x, &y, &n, 0);
123 //    // ... process data if not NULL ...
124 //    // ... x = width, y = height, n = # 8-bit components per pixel ...
125 //    // ... replace '0' with '1'..'4' to force that many components per pixel
126 //    // ... but 'n' will always be the number that it would have been if you said 0
127 //    stbi_image_free(data)
128 //
129 // Standard parameters:
130 //    int *x                 -- outputs image width in pixels
131 //    int *y                 -- outputs image height in pixels
132 //    int *channels_in_file  -- outputs # of image components in image file
133 //    int desired_channels   -- if non-zero, # of image components requested in result
134 //
135 // The return value from an image loader is an 'unsigned char *' which points
136 // to the pixel data, or NULL on an allocation failure or if the image is
137 // corrupt or invalid. The pixel data consists of *y scanlines of *x pixels,
138 // with each pixel consisting of N interleaved 8-bit components; the first
139 // pixel pointed to is top-left-most in the image. There is no padding between
140 // image scanlines or between pixels, regardless of format. The number of
141 // components N is 'desired_channels' if desired_channels is non-zero, or
142 // *channels_in_file otherwise. If desired_channels is non-zero,
143 // *channels_in_file has the number of components that _would_ have been
144 // output otherwise. E.g. if you set desired_channels to 4, you will always
145 // get RGBA output, but you can check *channels_in_file to see if it's trivially
146 // opaque because e.g. there were only 3 channels in the source image.
147 //
148 // An output image with N components has the following components interleaved
149 // in this order in each pixel:
150 //
151 //     N=#comp     components
152 //       1           grey
153 //       2           grey, alpha
154 //       3           red, green, blue
155 //       4           red, green, blue, alpha
156 //
157 // If image loading fails for any reason, the return value will be NULL,
158 // and *x, *y, *channels_in_file will be unchanged. The function
159 // stbi_failure_reason() can be queried for an extremely brief, end-user
160 // unfriendly explanation of why the load failed. Define STBI_NO_FAILURE_STRINGS
161 // to avoid compiling these strings at all, and STBI_FAILURE_USERMSG to get slightly
162 // more user-friendly ones.
163 //
164 // Paletted PNG, BMP, GIF, and PIC images are automatically depalettized.
165 //
166 // ===========================================================================
167 //
168 // UNICODE:
169 //
170 //   If compiling for Windows and you wish to use Unicode filenames, compile
171 //   with
172 //       #define STBI_WINDOWS_UTF8
173 //   and pass utf8-encoded filenames. Call stbi_convert_wchar_to_utf8 to convert
174 //   Windows wchar_t filenames to utf8.
175 //
176 // ===========================================================================
177 //
178 // Philosophy
179 //
180 // stb libraries are designed with the following priorities:
181 //
182 //    1. easy to use
183 //    2. easy to maintain
184 //    3. good performance
185 //
186 // Sometimes I let "good performance" creep up in priority over "easy to maintain",
187 // and for best performance I may provide less-easy-to-use APIs that give higher
188 // performance, in addition to the easy-to-use ones. Nevertheless, it's important
189 // to keep in mind that from the standpoint of you, a client of this library,
190 // all you care about is #1 and #3, and stb libraries DO NOT emphasize #3 above all.
191 //
192 // Some secondary priorities arise directly from the first two, some of which
193 // provide more explicit reasons why performance can't be emphasized.
194 //
195 //    - Portable ("ease of use")
196 //    - Small source code footprint ("easy to maintain")
197 //    - No dependencies ("ease of use")
198 //
199 // ===========================================================================
200 //
201 // I/O callbacks
202 //
203 // I/O callbacks allow you to read from arbitrary sources, like packaged
204 // files or some other source. Data read from callbacks are processed
205 // through a small internal buffer (currently 128 bytes) to try to reduce
206 // overhead.
207 //
208 // The three functions you must define are "read" (reads some bytes of data),
209 // "skip" (skips some bytes of data), "eof" (reports if the stream is at the end).
210 //
211 // ===========================================================================
212 //
213 // SIMD support
214 //
215 // The JPEG decoder will try to automatically use SIMD kernels on x86 when
216 // supported by the compiler. For ARM Neon support, you must explicitly
217 // request it.
218 //
219 // (The old do-it-yourself SIMD API is no longer supported in the current
220 // code.)
221 //
222 // On x86, SSE2 will automatically be used when available based on a run-time
223 // test; if not, the generic C versions are used as a fall-back. On ARM targets,
224 // the typical path is to have separate builds for NEON and non-NEON devices
225 // (at least this is true for iOS and Android). Therefore, the NEON support is
226 // toggled by a build flag: define STBI_NEON to get NEON loops.
227 //
228 // If for some reason you do not want to use any of SIMD code, or if
229 // you have issues compiling it, you can disable it entirely by
230 // defining STBI_NO_SIMD.
231 //
232 // ===========================================================================
233 //
234 // HDR image support   (disable by defining STBI_NO_HDR)
235 //
236 // stb_image supports loading HDR images in general, and currently the Radiance
237 // .HDR file format specifically. You can still load any file through the existing
238 // interface; if you attempt to load an HDR file, it will be automatically remapped
239 // to LDR, assuming gamma 2.2 and an arbitrary scale factor defaulting to 1;
240 // both of these constants can be reconfigured through this interface:
241 //
242 //     stbi_hdr_to_ldr_gamma(2.2f);
243 //     stbi_hdr_to_ldr_scale(1.0f);
244 //
245 // (note, do not use _inverse_ constants; stbi_image will invert them
246 // appropriately).
247 //
248 // Additionally, there is a new, parallel interface for loading files as
249 // (linear) floats to preserve the full dynamic range:
250 //
251 //    float *data = stbi_loadf(filename, &x, &y, &n, 0);
252 //
253 // If you load LDR images through this interface, those images will
254 // be promoted to floating point values, run through the inverse of
255 // constants corresponding to the above:
256 //
257 //     stbi_ldr_to_hdr_scale(1.0f);
258 //     stbi_ldr_to_hdr_gamma(2.2f);
259 //
260 // Finally, given a filename (or an open file or memory block--see header
261 // file for details) containing image data, you can query for the "most
262 // appropriate" interface to use (that is, whether the image is HDR or
263 // not), using:
264 //
265 //     stbi_is_hdr(char *filename);
266 //
267 // ===========================================================================
268 //
269 // iPhone PNG support:
270 //
271 // By default we convert iphone-formatted PNGs back to RGB, even though
272 // they are internally encoded differently. You can disable this conversion
273 // by calling stbi_convert_iphone_png_to_rgb(0), in which case
274 // you will always just get the native iphone "format" through (which
275 // is BGR stored in RGB).
276 //
277 // Call stbi_set_unpremultiply_on_load(1) as well to force a divide per
278 // pixel to remove any premultiplied alpha *only* if the image file explicitly
279 // says there's premultiplied data (currently only happens in iPhone images,
280 // and only if iPhone convert-to-rgb processing is on).
281 //
282 // ===========================================================================
283 //
284 // ADDITIONAL CONFIGURATION
285 //
286 //  - You can suppress implementation of any of the decoders to reduce
287 //    your code footprint by #defining one or more of the following
288 //    symbols before creating the implementation.
289 //
290 //        STBI_NO_JPEG
291 //        STBI_NO_PNG
292 //        STBI_NO_BMP
293 //        STBI_NO_PSD
294 //        STBI_NO_TGA
295 //        STBI_NO_GIF
296 //        STBI_NO_HDR
297 //        STBI_NO_PIC
298 //        STBI_NO_PNM   (.ppm and .pgm)
299 //
300 //  - You can request *only* certain decoders and suppress all other ones
301 //    (this will be more forward-compatible, as addition of new decoders
302 //    doesn't require you to disable them explicitly):
303 //
304 //        STBI_ONLY_JPEG
305 //        STBI_ONLY_PNG
306 //        STBI_ONLY_BMP
307 //        STBI_ONLY_PSD
308 //        STBI_ONLY_TGA
309 //        STBI_ONLY_GIF
310 //        STBI_ONLY_HDR
311 //        STBI_ONLY_PIC
312 //        STBI_ONLY_PNM   (.ppm and .pgm)
313 //
314 //   - If you use STBI_NO_PNG (or _ONLY_ without PNG), and you still
315 //     want the zlib decoder to be available, #define STBI_SUPPORT_ZLIB
316 //
317 
318 
319 #ifndef STBI_NO_STDIO
320 #include <stdio.h>
321 #endif // STBI_NO_STDIO
322 
323 #define STBI_VERSION 1
324 
325 enum
326 {
327    STBI_default = 0, // only used for desired_channels
328 
329    STBI_grey       = 1,
330    STBI_grey_alpha = 2,
331    STBI_rgb        = 3,
332    STBI_rgb_alpha  = 4
333 };
334 
335 #include <stdlib.h>
336 typedef unsigned char stbi_uc;
337 typedef unsigned short stbi_us;
338 
339 #ifdef __cplusplus
340 extern "C" {
341 #endif
342 
343 #ifndef STBIDEF
344 #ifdef STB_IMAGE_STATIC
345 #define STBIDEF static
346 #else
347 #define STBIDEF extern
348 #endif
349 #endif
350 
351 //////////////////////////////////////////////////////////////////////////////
352 //
353 // PRIMARY API - works on images of any type
354 //
355 
356 //
357 // load image by filename, open file, or memory buffer
358 //
359 
360 typedef struct
361 {
362    int      (*read)  (void *user,char *data,int size);   // fill 'data' with 'size' bytes.  return number of bytes actually read
363    void     (*skip)  (void *user,int n);                 // skip the next 'n' bytes, or 'unget' the last -n bytes if negative
364    int      (*eof)   (void *user);                       // returns nonzero if we are at end of file/data
365 } stbi_io_callbacks;
366 
367 ////////////////////////////////////
368 //
369 // 8-bits-per-channel interface
370 //
371 
372 STBIDEF stbi_uc *stbi_load_from_memory   (stbi_uc           const *buffer, int len   , int *x, int *y, int *channels_in_file, int desired_channels);
373 STBIDEF stbi_uc *stbi_load_from_callbacks(stbi_io_callbacks const *clbk  , void *user, int *x, int *y, int *channels_in_file, int desired_channels);
374 
375 #ifndef STBI_NO_STDIO
376 STBIDEF stbi_uc *stbi_load            (char const *filename, int *x, int *y, int *channels_in_file, int desired_channels);
377 STBIDEF stbi_uc *stbi_load_from_file  (FILE *f, int *x, int *y, int *channels_in_file, int desired_channels);
378 // for stbi_load_from_file, file pointer is left pointing immediately after image
379 #endif
380 
381 #ifndef STBI_NO_GIF
382 STBIDEF stbi_uc *stbi_load_gif_from_memory(stbi_uc const *buffer, int len, int **delays, int *x, int *y, int *z, int *comp, int req_comp);
383 #endif
384 
385 #ifdef STBI_WINDOWS_UTF8
386 STBIDEF int stbi_convert_wchar_to_utf8(char *buffer, size_t bufferlen, const wchar_t* input);
387 #endif
388 
389 ////////////////////////////////////
390 //
391 // 16-bits-per-channel interface
392 //
393 
394 STBIDEF stbi_us *stbi_load_16_from_memory   (stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels);
395 STBIDEF stbi_us *stbi_load_16_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *channels_in_file, int desired_channels);
396 
397 #ifndef STBI_NO_STDIO
398 STBIDEF stbi_us *stbi_load_16          (char const *filename, int *x, int *y, int *channels_in_file, int desired_channels);
399 STBIDEF stbi_us *stbi_load_from_file_16(FILE *f, int *x, int *y, int *channels_in_file, int desired_channels);
400 #endif
401 
402 ////////////////////////////////////
403 //
404 // float-per-channel interface
405 //
406 #ifndef STBI_NO_LINEAR
407    STBIDEF float *stbi_loadf_from_memory     (stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels);
408    STBIDEF float *stbi_loadf_from_callbacks  (stbi_io_callbacks const *clbk, void *user, int *x, int *y,  int *channels_in_file, int desired_channels);
409 
410    #ifndef STBI_NO_STDIO
411    STBIDEF float *stbi_loadf            (char const *filename, int *x, int *y, int *channels_in_file, int desired_channels);
412    STBIDEF float *stbi_loadf_from_file  (FILE *f, int *x, int *y, int *channels_in_file, int desired_channels);
413    #endif
414 #endif
415 
416 #ifndef STBI_NO_HDR
417    STBIDEF void   stbi_hdr_to_ldr_gamma(float gamma);
418    STBIDEF void   stbi_hdr_to_ldr_scale(float scale);
419 #endif // STBI_NO_HDR
420 
421 #ifndef STBI_NO_LINEAR
422    STBIDEF void   stbi_ldr_to_hdr_gamma(float gamma);
423    STBIDEF void   stbi_ldr_to_hdr_scale(float scale);
424 #endif // STBI_NO_LINEAR
425 
426 // stbi_is_hdr is always defined, but always returns false if STBI_NO_HDR
427 STBIDEF int    stbi_is_hdr_from_callbacks(stbi_io_callbacks const *clbk, void *user);
428 STBIDEF int    stbi_is_hdr_from_memory(stbi_uc const *buffer, int len);
429 #ifndef STBI_NO_STDIO
430 STBIDEF int      stbi_is_hdr          (char const *filename);
431 STBIDEF int      stbi_is_hdr_from_file(FILE *f);
432 #endif // STBI_NO_STDIO
433 
434 
435 // get a VERY brief reason for failure
436 // NOT THREADSAFE
437 STBIDEF const char *stbi_failure_reason  (void);
438 
439 // free the loaded image -- this is just free()
440 STBIDEF void     stbi_image_free      (void *retval_from_stbi_load);
441 
442 // get image dimensions & components without fully decoding
443 STBIDEF int      stbi_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp);
444 STBIDEF int      stbi_info_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp);
445 STBIDEF int      stbi_is_16_bit_from_memory(stbi_uc const *buffer, int len);
446 STBIDEF int      stbi_is_16_bit_from_callbacks(stbi_io_callbacks const *clbk, void *user);
447 
448 #ifndef STBI_NO_STDIO
449 STBIDEF int      stbi_info               (char const *filename,     int *x, int *y, int *comp);
450 STBIDEF int      stbi_info_from_file     (FILE *f,                  int *x, int *y, int *comp);
451 STBIDEF int      stbi_is_16_bit          (char const *filename);
452 STBIDEF int      stbi_is_16_bit_from_file(FILE *f);
453 #endif
454 
455 
456 
457 // for image formats that explicitly notate that they have premultiplied alpha,
458 // we just return the colors as stored in the file. set this flag to force
459 // unpremultiplication. results are undefined if the unpremultiply overflow.
460 STBIDEF void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply);
461 
462 // indicate whether we should process iphone images back to canonical format,
463 // or just pass them through "as-is"
464 STBIDEF void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert);
465 
466 // flip the image vertically, so the first pixel in the output array is the bottom left
467 STBIDEF void stbi_set_flip_vertically_on_load(int flag_true_if_should_flip);
468 
469 // ZLIB client - used by PNG, available for other purposes
470 
471 STBIDEF char *stbi_zlib_decode_malloc_guesssize(const char *buffer, int len, int initial_size, int *outlen);
472 STBIDEF char *stbi_zlib_decode_malloc_guesssize_headerflag(const char *buffer, int len, int initial_size, int *outlen, int parse_header);
473 STBIDEF char *stbi_zlib_decode_malloc(const char *buffer, int len, int *outlen);
474 STBIDEF int   stbi_zlib_decode_buffer(char *obuffer, int olen, const char *ibuffer, int ilen);
475 
476 STBIDEF char *stbi_zlib_decode_noheader_malloc(const char *buffer, int len, int *outlen);
477 STBIDEF int   stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char *ibuffer, int ilen);
478 
479 
480 #ifdef __cplusplus
481 }
482 #endif
483 
484 //
485 //
486 ////   end header file   /////////////////////////////////////////////////////
487 #endif // STBI_INCLUDE_STB_IMAGE_H
488 
489 #ifdef STB_IMAGE_IMPLEMENTATION
490 
491 #if defined(STBI_ONLY_JPEG) || defined(STBI_ONLY_PNG) || defined(STBI_ONLY_BMP) \
492   || defined(STBI_ONLY_TGA) || defined(STBI_ONLY_GIF) || defined(STBI_ONLY_PSD) \
493   || defined(STBI_ONLY_HDR) || defined(STBI_ONLY_PIC) || defined(STBI_ONLY_PNM) \
494   || defined(STBI_ONLY_ZLIB)
495    #ifndef STBI_ONLY_JPEG
496    #define STBI_NO_JPEG
497    #endif
498    #ifndef STBI_ONLY_PNG
499    #define STBI_NO_PNG
500    #endif
501    #ifndef STBI_ONLY_BMP
502    #define STBI_NO_BMP
503    #endif
504    #ifndef STBI_ONLY_PSD
505    #define STBI_NO_PSD
506    #endif
507    #ifndef STBI_ONLY_TGA
508    #define STBI_NO_TGA
509    #endif
510    #ifndef STBI_ONLY_GIF
511    #define STBI_NO_GIF
512    #endif
513    #ifndef STBI_ONLY_HDR
514    #define STBI_NO_HDR
515    #endif
516    #ifndef STBI_ONLY_PIC
517    #define STBI_NO_PIC
518    #endif
519    #ifndef STBI_ONLY_PNM
520    #define STBI_NO_PNM
521    #endif
522 #endif
523 
524 #if defined(STBI_NO_PNG) && !defined(STBI_SUPPORT_ZLIB) && !defined(STBI_NO_ZLIB)
525 #define STBI_NO_ZLIB
526 #endif
527 
528 
529 #include <stdarg.h>
530 #include <stddef.h> // ptrdiff_t on osx
531 #include <stdlib.h>
532 #include <string.h>
533 #include <limits.h>
534 
535 #if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR)
536 #include <math.h>  // ldexp, pow
537 #endif
538 
539 #ifndef STBI_NO_STDIO
540 #include <stdio.h>
541 #endif
542 
543 #ifndef STBI_ASSERT
544 #include <assert.h>
545 #define STBI_ASSERT(x) assert(x)
546 #endif
547 
548 #ifdef __cplusplus
549 #define STBI_EXTERN extern "C"
550 #else
551 #define STBI_EXTERN extern
552 #endif
553 
554 
555 #ifndef _MSC_VER
556    #ifdef __cplusplus
557    #define stbi_inline inline
558    #else
559    #define stbi_inline
560    #endif
561 #else
562    #define stbi_inline __forceinline
563 #endif
564 
565 
566 #ifdef _MSC_VER
567 typedef unsigned short stbi__uint16;
568 typedef   signed short stbi__int16;
569 typedef unsigned int   stbi__uint32;
570 typedef   signed int   stbi__int32;
571 #else
572 #include <stdint.h>
573 typedef uint16_t stbi__uint16;
574 typedef int16_t  stbi__int16;
575 typedef uint32_t stbi__uint32;
576 typedef int32_t  stbi__int32;
577 #endif
578 
579 // should produce compiler error if size is wrong
580 typedef unsigned char validate_uint32[sizeof(stbi__uint32)==4 ? 1 : -1];
581 
582 #ifdef _MSC_VER
583 #define STBI_NOTUSED(v)  (void)(v)
584 #else
585 #define STBI_NOTUSED(v)  (void)sizeof(v)
586 #endif
587 
588 #ifdef _MSC_VER
589 #define STBI_HAS_LROTL
590 #endif
591 
592 #ifdef STBI_HAS_LROTL
593    #define stbi_lrot(x,y)  _lrotl(x,y)
594 #else
595    #define stbi_lrot(x,y)  (((x) << (y)) | ((x) >> (32 - (y))))
596 #endif
597 
598 #if defined(STBI_MALLOC) && defined(STBI_FREE) && (defined(STBI_REALLOC) || defined(STBI_REALLOC_SIZED))
599 // ok
600 #elif !defined(STBI_MALLOC) && !defined(STBI_FREE) && !defined(STBI_REALLOC) && !defined(STBI_REALLOC_SIZED)
601 // ok
602 #else
603 #error "Must define all or none of STBI_MALLOC, STBI_FREE, and STBI_REALLOC (or STBI_REALLOC_SIZED)."
604 #endif
605 
606 #ifndef STBI_MALLOC
607 #define STBI_MALLOC(sz)           malloc(sz)
608 #define STBI_REALLOC(p,newsz)     realloc(p,newsz)
609 #define STBI_FREE(p)              free(p)
610 #endif
611 
612 #ifndef STBI_REALLOC_SIZED
613 #define STBI_REALLOC_SIZED(p,oldsz,newsz) STBI_REALLOC(p,newsz)
614 #endif
615 
616 // x86/x64 detection
617 #if defined(__x86_64__) || defined(_M_X64)
618 #define STBI__X64_TARGET
619 #elif defined(__i386) || defined(_M_IX86)
620 #define STBI__X86_TARGET
621 #endif
622 
623 #if defined(__GNUC__) && defined(STBI__X86_TARGET) && !defined(__SSE2__) && !defined(STBI_NO_SIMD)
624 // gcc doesn't support sse2 intrinsics unless you compile with -msse2,
625 // which in turn means it gets to use SSE2 everywhere. This is unfortunate,
626 // but previous attempts to provide the SSE2 functions with runtime
627 // detection caused numerous issues. The way architecture extensions are
628 // exposed in GCC/Clang is, sadly, not really suited for one-file libs.
629 // New behavior: if compiled with -msse2, we use SSE2 without any
630 // detection; if not, we don't use it at all.
631 #define STBI_NO_SIMD
632 #endif
633 
634 #if defined(__MINGW32__) && defined(STBI__X86_TARGET) && !defined(STBI_MINGW_ENABLE_SSE2) && !defined(STBI_NO_SIMD)
635 // Note that __MINGW32__ doesn't actually mean 32-bit, so we have to avoid STBI__X64_TARGET
636 //
637 // 32-bit MinGW wants ESP to be 16-byte aligned, but this is not in the
638 // Windows ABI and VC++ as well as Windows DLLs don't maintain that invariant.
639 // As a result, enabling SSE2 on 32-bit MinGW is dangerous when not
640 // simultaneously enabling "-mstackrealign".
641 //
642 // See https://github.com/nothings/stb/issues/81 for more information.
643 //
644 // So default to no SSE2 on 32-bit MinGW. If you've read this far and added
645 // -mstackrealign to your build settings, feel free to #define STBI_MINGW_ENABLE_SSE2.
646 #define STBI_NO_SIMD
647 #endif
648 
649 #if !defined(STBI_NO_SIMD) && (defined(STBI__X86_TARGET) || defined(STBI__X64_TARGET))
650 #define STBI_SSE2
651 #include <emmintrin.h>
652 
653 #ifdef _MSC_VER
654 
655 #if _MSC_VER >= 1400  // not VC6
656 #include <intrin.h> // __cpuid
stbi__cpuid3(void)657 static int stbi__cpuid3(void)
658 {
659    int info[4];
660    __cpuid(info,1);
661    return info[3];
662 }
663 #else
stbi__cpuid3(void)664 static int stbi__cpuid3(void)
665 {
666    int res;
667    __asm {
668       mov  eax,1
669       cpuid
670       mov  res,edx
671    }
672    return res;
673 }
674 #endif
675 
676 #define STBI_SIMD_ALIGN(type, name) __declspec(align(16)) type name
677 
678 #if !defined(STBI_NO_JPEG) && defined(STBI_SSE2)
stbi__sse2_available(void)679 static int stbi__sse2_available(void)
680 {
681    int info3 = stbi__cpuid3();
682    return ((info3 >> 26) & 1) != 0;
683 }
684 #endif
685 
686 #else // assume GCC-style if not VC++
687 #define STBI_SIMD_ALIGN(type, name) type name __attribute__((aligned(16)))
688 
689 #if !defined(STBI_NO_JPEG) && defined(STBI_SSE2)
stbi__sse2_available(void)690 static int stbi__sse2_available(void)
691 {
692    // If we're even attempting to compile this on GCC/Clang, that means
693    // -msse2 is on, which means the compiler is allowed to use SSE2
694    // instructions at will, and so are we.
695    return 1;
696 }
697 #endif
698 
699 #endif
700 #endif
701 
702 // ARM NEON
703 #if defined(STBI_NO_SIMD) && defined(STBI_NEON)
704 #undef STBI_NEON
705 #endif
706 
707 #ifdef STBI_NEON
708 #include <arm_neon.h>
709 // assume GCC or Clang on ARM targets
710 #define STBI_SIMD_ALIGN(type, name) type name __attribute__((aligned(16)))
711 #endif
712 
713 #ifndef STBI_SIMD_ALIGN
714 #define STBI_SIMD_ALIGN(type, name) type name
715 #endif
716 
717 ///////////////////////////////////////////////
718 //
719 //  stbi__context struct and start_xxx functions
720 
721 // stbi__context structure is our basic context used by all images, so it
722 // contains all the IO context, plus some basic image information
723 typedef struct
724 {
725    stbi__uint32 img_x, img_y;
726    int img_n, img_out_n;
727 
728    stbi_io_callbacks io;
729    void *io_user_data;
730 
731    int read_from_callbacks;
732    int buflen;
733    stbi_uc buffer_start[128];
734 
735    stbi_uc *img_buffer, *img_buffer_end;
736    stbi_uc *img_buffer_original, *img_buffer_original_end;
737 } stbi__context;
738 
739 
740 static void stbi__refill_buffer(stbi__context *s);
741 
742 // initialize a memory-decode context
stbi__start_mem(stbi__context * s,stbi_uc const * buffer,int len)743 static void stbi__start_mem(stbi__context *s, stbi_uc const *buffer, int len)
744 {
745    s->io.read = NULL;
746    s->read_from_callbacks = 0;
747    s->img_buffer = s->img_buffer_original = (stbi_uc *) buffer;
748    s->img_buffer_end = s->img_buffer_original_end = (stbi_uc *) buffer+len;
749 }
750 
751 // initialize a callback-based context
stbi__start_callbacks(stbi__context * s,stbi_io_callbacks * c,void * user)752 static void stbi__start_callbacks(stbi__context *s, stbi_io_callbacks *c, void *user)
753 {
754    s->io = *c;
755    s->io_user_data = user;
756    s->buflen = sizeof(s->buffer_start);
757    s->read_from_callbacks = 1;
758    s->img_buffer_original = s->buffer_start;
759    stbi__refill_buffer(s);
760    s->img_buffer_original_end = s->img_buffer_end;
761 }
762 
763 #ifndef STBI_NO_STDIO
764 
stbi__stdio_read(void * user,char * data,int size)765 static int stbi__stdio_read(void *user, char *data, int size)
766 {
767    return (int) fread(data,1,size,(FILE*) user);
768 }
769 
stbi__stdio_skip(void * user,int n)770 static void stbi__stdio_skip(void *user, int n)
771 {
772    fseek((FILE*) user, n, SEEK_CUR);
773 }
774 
stbi__stdio_eof(void * user)775 static int stbi__stdio_eof(void *user)
776 {
777    return feof((FILE*) user);
778 }
779 
780 static stbi_io_callbacks stbi__stdio_callbacks =
781 {
782    stbi__stdio_read,
783    stbi__stdio_skip,
784    stbi__stdio_eof,
785 };
786 
stbi__start_file(stbi__context * s,FILE * f)787 static void stbi__start_file(stbi__context *s, FILE *f)
788 {
789    stbi__start_callbacks(s, &stbi__stdio_callbacks, (void *) f);
790 }
791 
792 //static void stop_file(stbi__context *s) { }
793 
794 #endif // !STBI_NO_STDIO
795 
stbi__rewind(stbi__context * s)796 static void stbi__rewind(stbi__context *s)
797 {
798    // conceptually rewind SHOULD rewind to the beginning of the stream,
799    // but we just rewind to the beginning of the initial buffer, because
800    // we only use it after doing 'test', which only ever looks at at most 92 bytes
801    s->img_buffer = s->img_buffer_original;
802    s->img_buffer_end = s->img_buffer_original_end;
803 }
804 
805 enum
806 {
807    STBI_ORDER_RGB,
808    STBI_ORDER_BGR
809 };
810 
811 typedef struct
812 {
813    int bits_per_channel;
814    int num_channels;
815    int channel_order;
816 } stbi__result_info;
817 
818 #ifndef STBI_NO_JPEG
819 static int      stbi__jpeg_test(stbi__context *s);
820 static void    *stbi__jpeg_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
821 static int      stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp);
822 #endif
823 
824 #ifndef STBI_NO_PNG
825 static int      stbi__png_test(stbi__context *s);
826 static void    *stbi__png_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
827 static int      stbi__png_info(stbi__context *s, int *x, int *y, int *comp);
828 static int      stbi__png_is16(stbi__context *s);
829 #endif
830 
831 #ifndef STBI_NO_BMP
832 static int      stbi__bmp_test(stbi__context *s);
833 static void    *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
834 static int      stbi__bmp_info(stbi__context *s, int *x, int *y, int *comp);
835 #endif
836 
837 #ifndef STBI_NO_TGA
838 static int      stbi__tga_test(stbi__context *s);
839 static void    *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
840 static int      stbi__tga_info(stbi__context *s, int *x, int *y, int *comp);
841 #endif
842 
843 #ifndef STBI_NO_PSD
844 static int      stbi__psd_test(stbi__context *s);
845 static void    *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc);
846 static int      stbi__psd_info(stbi__context *s, int *x, int *y, int *comp);
847 static int      stbi__psd_is16(stbi__context *s);
848 #endif
849 
850 #ifndef STBI_NO_HDR
851 static int      stbi__hdr_test(stbi__context *s);
852 static float   *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
853 static int      stbi__hdr_info(stbi__context *s, int *x, int *y, int *comp);
854 #endif
855 
856 #ifndef STBI_NO_PIC
857 static int      stbi__pic_test(stbi__context *s);
858 static void    *stbi__pic_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
859 static int      stbi__pic_info(stbi__context *s, int *x, int *y, int *comp);
860 #endif
861 
862 #ifndef STBI_NO_GIF
863 static int      stbi__gif_test(stbi__context *s);
864 static void    *stbi__gif_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
865 static void    *stbi__load_gif_main(stbi__context *s, int **delays, int *x, int *y, int *z, int *comp, int req_comp);
866 static int      stbi__gif_info(stbi__context *s, int *x, int *y, int *comp);
867 #endif
868 
869 #ifndef STBI_NO_PNM
870 static int      stbi__pnm_test(stbi__context *s);
871 static void    *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
872 static int      stbi__pnm_info(stbi__context *s, int *x, int *y, int *comp);
873 #endif
874 
875 // this is not threadsafe
876 static const char *stbi__g_failure_reason;
877 
stbi_failure_reason(void)878 STBIDEF const char *stbi_failure_reason(void)
879 {
880    return stbi__g_failure_reason;
881 }
882 
stbi__err(const char * str)883 static int stbi__err(const char *str)
884 {
885    stbi__g_failure_reason = str;
886    return 0;
887 }
888 
stbi__malloc(size_t size)889 static void *stbi__malloc(size_t size)
890 {
891     return STBI_MALLOC(size);
892 }
893 
894 // stb_image uses ints pervasively, including for offset calculations.
895 // therefore the largest decoded image size we can support with the
896 // current code, even on 64-bit targets, is INT_MAX. this is not a
897 // significant limitation for the intended use case.
898 //
899 // we do, however, need to make sure our size calculations don't
900 // overflow. hence a few helper functions for size calculations that
901 // multiply integers together, making sure that they're non-negative
902 // and no overflow occurs.
903 
904 // return 1 if the sum is valid, 0 on overflow.
905 // negative terms are considered invalid.
stbi__addsizes_valid(int a,int b)906 static int stbi__addsizes_valid(int a, int b)
907 {
908    if (b < 0) return 0;
909    // now 0 <= b <= INT_MAX, hence also
910    // 0 <= INT_MAX - b <= INTMAX.
911    // And "a + b <= INT_MAX" (which might overflow) is the
912    // same as a <= INT_MAX - b (no overflow)
913    return a <= INT_MAX - b;
914 }
915 
916 // returns 1 if the product is valid, 0 on overflow.
917 // negative factors are considered invalid.
stbi__mul2sizes_valid(int a,int b)918 static int stbi__mul2sizes_valid(int a, int b)
919 {
920    if (a < 0 || b < 0) return 0;
921    if (b == 0) return 1; // mul-by-0 is always safe
922    // portable way to check for no overflows in a*b
923    return a <= INT_MAX/b;
924 }
925 
926 // returns 1 if "a*b + add" has no negative terms/factors and doesn't overflow
stbi__mad2sizes_valid(int a,int b,int add)927 static int stbi__mad2sizes_valid(int a, int b, int add)
928 {
929    return stbi__mul2sizes_valid(a, b) && stbi__addsizes_valid(a*b, add);
930 }
931 
932 // returns 1 if "a*b*c + add" has no negative terms/factors and doesn't overflow
stbi__mad3sizes_valid(int a,int b,int c,int add)933 static int stbi__mad3sizes_valid(int a, int b, int c, int add)
934 {
935    return stbi__mul2sizes_valid(a, b) && stbi__mul2sizes_valid(a*b, c) &&
936       stbi__addsizes_valid(a*b*c, add);
937 }
938 
939 // returns 1 if "a*b*c*d + add" has no negative terms/factors and doesn't overflow
940 #if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR)
stbi__mad4sizes_valid(int a,int b,int c,int d,int add)941 static int stbi__mad4sizes_valid(int a, int b, int c, int d, int add)
942 {
943    return stbi__mul2sizes_valid(a, b) && stbi__mul2sizes_valid(a*b, c) &&
944       stbi__mul2sizes_valid(a*b*c, d) && stbi__addsizes_valid(a*b*c*d, add);
945 }
946 #endif
947 
948 // mallocs with size overflow checking
stbi__malloc_mad2(int a,int b,int add)949 static void *stbi__malloc_mad2(int a, int b, int add)
950 {
951    if (!stbi__mad2sizes_valid(a, b, add)) return NULL;
952    return stbi__malloc(a*b + add);
953 }
954 
stbi__malloc_mad3(int a,int b,int c,int add)955 static void *stbi__malloc_mad3(int a, int b, int c, int add)
956 {
957    if (!stbi__mad3sizes_valid(a, b, c, add)) return NULL;
958    return stbi__malloc(a*b*c + add);
959 }
960 
961 #if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR)
stbi__malloc_mad4(int a,int b,int c,int d,int add)962 static void *stbi__malloc_mad4(int a, int b, int c, int d, int add)
963 {
964    if (!stbi__mad4sizes_valid(a, b, c, d, add)) return NULL;
965    return stbi__malloc(a*b*c*d + add);
966 }
967 #endif
968 
969 // stbi__err - error
970 // stbi__errpf - error returning pointer to float
971 // stbi__errpuc - error returning pointer to unsigned char
972 
973 #ifdef STBI_NO_FAILURE_STRINGS
974    #define stbi__err(x,y)  0
975 #elif defined(STBI_FAILURE_USERMSG)
976    #define stbi__err(x,y)  stbi__err(y)
977 #else
978    #define stbi__err(x,y)  stbi__err(x)
979 #endif
980 
981 #define stbi__errpf(x,y)   ((float *)(size_t) (stbi__err(x,y)?NULL:NULL))
982 #define stbi__errpuc(x,y)  ((unsigned char *)(size_t) (stbi__err(x,y)?NULL:NULL))
983 
stbi_image_free(void * retval_from_stbi_load)984 STBIDEF void stbi_image_free(void *retval_from_stbi_load)
985 {
986    STBI_FREE(retval_from_stbi_load);
987 }
988 
989 #ifndef STBI_NO_LINEAR
990 static float   *stbi__ldr_to_hdr(stbi_uc *data, int x, int y, int comp);
991 #endif
992 
993 #ifndef STBI_NO_HDR
994 static stbi_uc *stbi__hdr_to_ldr(float   *data, int x, int y, int comp);
995 #endif
996 
997 static int stbi__vertically_flip_on_load = 0;
998 
stbi_set_flip_vertically_on_load(int flag_true_if_should_flip)999 STBIDEF void stbi_set_flip_vertically_on_load(int flag_true_if_should_flip)
1000 {
1001     stbi__vertically_flip_on_load = flag_true_if_should_flip;
1002 }
1003 
stbi__load_main(stbi__context * s,int * x,int * y,int * comp,int req_comp,stbi__result_info * ri,int bpc)1004 static void *stbi__load_main(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc)
1005 {
1006    memset(ri, 0, sizeof(*ri)); // make sure it's initialized if we add new fields
1007    ri->bits_per_channel = 8; // default is 8 so most paths don't have to be changed
1008    ri->channel_order = STBI_ORDER_RGB; // all current input & output are this, but this is here so we can add BGR order
1009    ri->num_channels = 0;
1010 
1011    #ifndef STBI_NO_JPEG
1012    if (stbi__jpeg_test(s)) return stbi__jpeg_load(s,x,y,comp,req_comp, ri);
1013    #endif
1014    #ifndef STBI_NO_PNG
1015    if (stbi__png_test(s))  return stbi__png_load(s,x,y,comp,req_comp, ri);
1016    #endif
1017    #ifndef STBI_NO_BMP
1018    if (stbi__bmp_test(s))  return stbi__bmp_load(s,x,y,comp,req_comp, ri);
1019    #endif
1020    #ifndef STBI_NO_GIF
1021    if (stbi__gif_test(s))  return stbi__gif_load(s,x,y,comp,req_comp, ri);
1022    #endif
1023    #ifndef STBI_NO_PSD
1024    if (stbi__psd_test(s))  return stbi__psd_load(s,x,y,comp,req_comp, ri, bpc);
1025    #endif
1026    #ifndef STBI_NO_PIC
1027    if (stbi__pic_test(s))  return stbi__pic_load(s,x,y,comp,req_comp, ri);
1028    #endif
1029    #ifndef STBI_NO_PNM
1030    if (stbi__pnm_test(s))  return stbi__pnm_load(s,x,y,comp,req_comp, ri);
1031    #endif
1032 
1033    #ifndef STBI_NO_HDR
1034    if (stbi__hdr_test(s)) {
1035       float *hdr = stbi__hdr_load(s, x,y,comp,req_comp, ri);
1036       return stbi__hdr_to_ldr(hdr, *x, *y, req_comp ? req_comp : *comp);
1037    }
1038    #endif
1039 
1040    #ifndef STBI_NO_TGA
1041    // test tga last because it's a crappy test!
1042    if (stbi__tga_test(s))
1043       return stbi__tga_load(s,x,y,comp,req_comp, ri);
1044    #endif
1045 
1046    return stbi__errpuc("unknown image type", "Image not of any known type, or corrupt");
1047 }
1048 
stbi__convert_16_to_8(stbi__uint16 * orig,int w,int h,int channels)1049 static stbi_uc *stbi__convert_16_to_8(stbi__uint16 *orig, int w, int h, int channels)
1050 {
1051    int i;
1052    int img_len = w * h * channels;
1053    stbi_uc *reduced;
1054 
1055    reduced = (stbi_uc *) stbi__malloc(img_len);
1056    if (reduced == NULL) return stbi__errpuc("outofmem", "Out of memory");
1057 
1058    for (i = 0; i < img_len; ++i)
1059       reduced[i] = (stbi_uc)((orig[i] >> 8) & 0xFF); // top half of each byte is sufficient approx of 16->8 bit scaling
1060 
1061    STBI_FREE(orig);
1062    return reduced;
1063 }
1064 
stbi__convert_8_to_16(stbi_uc * orig,int w,int h,int channels)1065 static stbi__uint16 *stbi__convert_8_to_16(stbi_uc *orig, int w, int h, int channels)
1066 {
1067    int i;
1068    int img_len = w * h * channels;
1069    stbi__uint16 *enlarged;
1070 
1071    enlarged = (stbi__uint16 *) stbi__malloc(img_len*2);
1072    if (enlarged == NULL) return (stbi__uint16 *) stbi__errpuc("outofmem", "Out of memory");
1073 
1074    for (i = 0; i < img_len; ++i)
1075       enlarged[i] = (stbi__uint16)((orig[i] << 8) + orig[i]); // replicate to high and low byte, maps 0->0, 255->0xffff
1076 
1077    STBI_FREE(orig);
1078    return enlarged;
1079 }
1080 
stbi__vertical_flip(void * image,int w,int h,int bytes_per_pixel)1081 static void stbi__vertical_flip(void *image, int w, int h, int bytes_per_pixel)
1082 {
1083    int row;
1084    size_t bytes_per_row = (size_t)w * bytes_per_pixel;
1085    stbi_uc temp[2048];
1086    stbi_uc *bytes = (stbi_uc *)image;
1087 
1088    for (row = 0; row < (h>>1); row++) {
1089       stbi_uc *row0 = bytes + row*bytes_per_row;
1090       stbi_uc *row1 = bytes + (h - row - 1)*bytes_per_row;
1091       // swap row0 with row1
1092       size_t bytes_left = bytes_per_row;
1093       while (bytes_left) {
1094          size_t bytes_copy = (bytes_left < sizeof(temp)) ? bytes_left : sizeof(temp);
1095          memcpy(temp, row0, bytes_copy);
1096          memcpy(row0, row1, bytes_copy);
1097          memcpy(row1, temp, bytes_copy);
1098          row0 += bytes_copy;
1099          row1 += bytes_copy;
1100          bytes_left -= bytes_copy;
1101       }
1102    }
1103 }
1104 
1105 #ifndef STBI_NO_GIF
stbi__vertical_flip_slices(void * image,int w,int h,int z,int bytes_per_pixel)1106 static void stbi__vertical_flip_slices(void *image, int w, int h, int z, int bytes_per_pixel)
1107 {
1108    int slice;
1109    int slice_size = w * h * bytes_per_pixel;
1110 
1111    stbi_uc *bytes = (stbi_uc *)image;
1112    for (slice = 0; slice < z; ++slice) {
1113       stbi__vertical_flip(bytes, w, h, bytes_per_pixel);
1114       bytes += slice_size;
1115    }
1116 }
1117 #endif
1118 
stbi__load_and_postprocess_8bit(stbi__context * s,int * x,int * y,int * comp,int req_comp)1119 static unsigned char *stbi__load_and_postprocess_8bit(stbi__context *s, int *x, int *y, int *comp, int req_comp)
1120 {
1121    stbi__result_info ri;
1122    void *result = stbi__load_main(s, x, y, comp, req_comp, &ri, 8);
1123 
1124    if (result == NULL)
1125       return NULL;
1126 
1127    if (ri.bits_per_channel != 8) {
1128       STBI_ASSERT(ri.bits_per_channel == 16);
1129       result = stbi__convert_16_to_8((stbi__uint16 *) result, *x, *y, req_comp == 0 ? *comp : req_comp);
1130       ri.bits_per_channel = 8;
1131    }
1132 
1133    // @TODO: move stbi__convert_format to here
1134 
1135    if (stbi__vertically_flip_on_load) {
1136       int channels = req_comp ? req_comp : *comp;
1137       stbi__vertical_flip(result, *x, *y, channels * sizeof(stbi_uc));
1138    }
1139 
1140    return (unsigned char *) result;
1141 }
1142 
stbi__load_and_postprocess_16bit(stbi__context * s,int * x,int * y,int * comp,int req_comp)1143 static stbi__uint16 *stbi__load_and_postprocess_16bit(stbi__context *s, int *x, int *y, int *comp, int req_comp)
1144 {
1145    stbi__result_info ri;
1146    void *result = stbi__load_main(s, x, y, comp, req_comp, &ri, 16);
1147 
1148    if (result == NULL)
1149       return NULL;
1150 
1151    if (ri.bits_per_channel != 16) {
1152       STBI_ASSERT(ri.bits_per_channel == 8);
1153       result = stbi__convert_8_to_16((stbi_uc *) result, *x, *y, req_comp == 0 ? *comp : req_comp);
1154       ri.bits_per_channel = 16;
1155    }
1156 
1157    // @TODO: move stbi__convert_format16 to here
1158    // @TODO: special case RGB-to-Y (and RGBA-to-YA) for 8-bit-to-16-bit case to keep more precision
1159 
1160    if (stbi__vertically_flip_on_load) {
1161       int channels = req_comp ? req_comp : *comp;
1162       stbi__vertical_flip(result, *x, *y, channels * sizeof(stbi__uint16));
1163    }
1164 
1165    return (stbi__uint16 *) result;
1166 }
1167 
1168 #if !defined(STBI_NO_HDR) && !defined(STBI_NO_LINEAR)
stbi__float_postprocess(float * result,int * x,int * y,int * comp,int req_comp)1169 static void stbi__float_postprocess(float *result, int *x, int *y, int *comp, int req_comp)
1170 {
1171    if (stbi__vertically_flip_on_load && result != NULL) {
1172       int channels = req_comp ? req_comp : *comp;
1173       stbi__vertical_flip(result, *x, *y, channels * sizeof(float));
1174    }
1175 }
1176 #endif
1177 
1178 #ifndef STBI_NO_STDIO
1179 
1180 #if defined(_MSC_VER) && defined(STBI_WINDOWS_UTF8)
1181 STBI_EXTERN __declspec(dllimport) int __stdcall MultiByteToWideChar(unsigned int cp, unsigned long flags, const char *str, int cbmb, wchar_t *widestr, int cchwide);
1182 STBI_EXTERN __declspec(dllimport) int __stdcall WideCharToMultiByte(unsigned int cp, unsigned long flags, const wchar_t *widestr, int cchwide, char *str, int cbmb, const char *defchar, int *used_default);
1183 #endif
1184 
1185 #if defined(_MSC_VER) && defined(STBI_WINDOWS_UTF8)
stbi_convert_wchar_to_utf8(char * buffer,size_t bufferlen,const wchar_t * input)1186 STBIDEF int stbi_convert_wchar_to_utf8(char *buffer, size_t bufferlen, const wchar_t* input)
1187 {
1188 	return WideCharToMultiByte(65001 /* UTF8 */, 0, input, -1, buffer, (int) bufferlen, NULL, NULL);
1189 }
1190 #endif
1191 
stbi__fopen(char const * filename,char const * mode)1192 static FILE *stbi__fopen(char const *filename, char const *mode)
1193 {
1194    FILE *f;
1195 #if defined(_MSC_VER) && defined(STBI_WINDOWS_UTF8)
1196    wchar_t wMode[64];
1197    wchar_t wFilename[1024];
1198 	if (0 == MultiByteToWideChar(65001 /* UTF8 */, 0, filename, -1, wFilename, sizeof(wFilename)))
1199       return 0;
1200 
1201 	if (0 == MultiByteToWideChar(65001 /* UTF8 */, 0, mode, -1, wMode, sizeof(wMode)))
1202       return 0;
1203 
1204 #if _MSC_VER >= 1400
1205 	if (0 != _wfopen_s(&f, wFilename, wMode))
1206 		f = 0;
1207 #else
1208    f = _wfopen(wFilename, wMode);
1209 #endif
1210 
1211 #elif defined(_MSC_VER) && _MSC_VER >= 1400
1212    if (0 != fopen_s(&f, filename, mode))
1213       f=0;
1214 #else
1215    f = fopen(filename, mode);
1216 #endif
1217    return f;
1218 }
1219 
1220 
stbi_load(char const * filename,int * x,int * y,int * comp,int req_comp)1221 STBIDEF stbi_uc *stbi_load(char const *filename, int *x, int *y, int *comp, int req_comp)
1222 {
1223    FILE *f = stbi__fopen(filename, "rb");
1224    unsigned char *result;
1225    if (!f) return stbi__errpuc("can't fopen", "Unable to open file");
1226    result = stbi_load_from_file(f,x,y,comp,req_comp);
1227    fclose(f);
1228    return result;
1229 }
1230 
stbi_load_from_file(FILE * f,int * x,int * y,int * comp,int req_comp)1231 STBIDEF stbi_uc *stbi_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
1232 {
1233    unsigned char *result;
1234    stbi__context s;
1235    stbi__start_file(&s,f);
1236    result = stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp);
1237    if (result) {
1238       // need to 'unget' all the characters in the IO buffer
1239       fseek(f, - (int) (s.img_buffer_end - s.img_buffer), SEEK_CUR);
1240    }
1241    return result;
1242 }
1243 
stbi_load_from_file_16(FILE * f,int * x,int * y,int * comp,int req_comp)1244 STBIDEF stbi__uint16 *stbi_load_from_file_16(FILE *f, int *x, int *y, int *comp, int req_comp)
1245 {
1246    stbi__uint16 *result;
1247    stbi__context s;
1248    stbi__start_file(&s,f);
1249    result = stbi__load_and_postprocess_16bit(&s,x,y,comp,req_comp);
1250    if (result) {
1251       // need to 'unget' all the characters in the IO buffer
1252       fseek(f, - (int) (s.img_buffer_end - s.img_buffer), SEEK_CUR);
1253    }
1254    return result;
1255 }
1256 
stbi_load_16(char const * filename,int * x,int * y,int * comp,int req_comp)1257 STBIDEF stbi_us *stbi_load_16(char const *filename, int *x, int *y, int *comp, int req_comp)
1258 {
1259    FILE *f = stbi__fopen(filename, "rb");
1260    stbi__uint16 *result;
1261    if (!f) return (stbi_us *) stbi__errpuc("can't fopen", "Unable to open file");
1262    result = stbi_load_from_file_16(f,x,y,comp,req_comp);
1263    fclose(f);
1264    return result;
1265 }
1266 
1267 
1268 #endif //!STBI_NO_STDIO
1269 
stbi_load_16_from_memory(stbi_uc const * buffer,int len,int * x,int * y,int * channels_in_file,int desired_channels)1270 STBIDEF stbi_us *stbi_load_16_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels)
1271 {
1272    stbi__context s;
1273    stbi__start_mem(&s,buffer,len);
1274    return stbi__load_and_postprocess_16bit(&s,x,y,channels_in_file,desired_channels);
1275 }
1276 
stbi_load_16_from_callbacks(stbi_io_callbacks const * clbk,void * user,int * x,int * y,int * channels_in_file,int desired_channels)1277 STBIDEF stbi_us *stbi_load_16_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *channels_in_file, int desired_channels)
1278 {
1279    stbi__context s;
1280    stbi__start_callbacks(&s, (stbi_io_callbacks *)clbk, user);
1281    return stbi__load_and_postprocess_16bit(&s,x,y,channels_in_file,desired_channels);
1282 }
1283 
stbi_load_from_memory(stbi_uc const * buffer,int len,int * x,int * y,int * comp,int req_comp)1284 STBIDEF stbi_uc *stbi_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
1285 {
1286    stbi__context s;
1287    stbi__start_mem(&s,buffer,len);
1288    return stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp);
1289 }
1290 
stbi_load_from_callbacks(stbi_io_callbacks const * clbk,void * user,int * x,int * y,int * comp,int req_comp)1291 STBIDEF stbi_uc *stbi_load_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp)
1292 {
1293    stbi__context s;
1294    stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user);
1295    return stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp);
1296 }
1297 
1298 #ifndef STBI_NO_GIF
stbi_load_gif_from_memory(stbi_uc const * buffer,int len,int ** delays,int * x,int * y,int * z,int * comp,int req_comp)1299 STBIDEF stbi_uc *stbi_load_gif_from_memory(stbi_uc const *buffer, int len, int **delays, int *x, int *y, int *z, int *comp, int req_comp)
1300 {
1301    unsigned char *result;
1302    stbi__context s;
1303    stbi__start_mem(&s,buffer,len);
1304 
1305    result = (unsigned char*) stbi__load_gif_main(&s, delays, x, y, z, comp, req_comp);
1306    if (stbi__vertically_flip_on_load) {
1307       stbi__vertical_flip_slices( result, *x, *y, *z, *comp );
1308    }
1309 
1310    return result;
1311 }
1312 #endif
1313 
1314 #ifndef STBI_NO_LINEAR
stbi__loadf_main(stbi__context * s,int * x,int * y,int * comp,int req_comp)1315 static float *stbi__loadf_main(stbi__context *s, int *x, int *y, int *comp, int req_comp)
1316 {
1317    unsigned char *data;
1318    #ifndef STBI_NO_HDR
1319    if (stbi__hdr_test(s)) {
1320       stbi__result_info ri;
1321       float *hdr_data = stbi__hdr_load(s,x,y,comp,req_comp, &ri);
1322       if (hdr_data)
1323          stbi__float_postprocess(hdr_data,x,y,comp,req_comp);
1324       return hdr_data;
1325    }
1326    #endif
1327    data = stbi__load_and_postprocess_8bit(s, x, y, comp, req_comp);
1328    if (data)
1329       return stbi__ldr_to_hdr(data, *x, *y, req_comp ? req_comp : *comp);
1330    return stbi__errpf("unknown image type", "Image not of any known type, or corrupt");
1331 }
1332 
stbi_loadf_from_memory(stbi_uc const * buffer,int len,int * x,int * y,int * comp,int req_comp)1333 STBIDEF float *stbi_loadf_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
1334 {
1335    stbi__context s;
1336    stbi__start_mem(&s,buffer,len);
1337    return stbi__loadf_main(&s,x,y,comp,req_comp);
1338 }
1339 
stbi_loadf_from_callbacks(stbi_io_callbacks const * clbk,void * user,int * x,int * y,int * comp,int req_comp)1340 STBIDEF float *stbi_loadf_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp)
1341 {
1342    stbi__context s;
1343    stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user);
1344    return stbi__loadf_main(&s,x,y,comp,req_comp);
1345 }
1346 
1347 #ifndef STBI_NO_STDIO
stbi_loadf(char const * filename,int * x,int * y,int * comp,int req_comp)1348 STBIDEF float *stbi_loadf(char const *filename, int *x, int *y, int *comp, int req_comp)
1349 {
1350    float *result;
1351    FILE *f = stbi__fopen(filename, "rb");
1352    if (!f) return stbi__errpf("can't fopen", "Unable to open file");
1353    result = stbi_loadf_from_file(f,x,y,comp,req_comp);
1354    fclose(f);
1355    return result;
1356 }
1357 
stbi_loadf_from_file(FILE * f,int * x,int * y,int * comp,int req_comp)1358 STBIDEF float *stbi_loadf_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
1359 {
1360    stbi__context s;
1361    stbi__start_file(&s,f);
1362    return stbi__loadf_main(&s,x,y,comp,req_comp);
1363 }
1364 #endif // !STBI_NO_STDIO
1365 
1366 #endif // !STBI_NO_LINEAR
1367 
1368 // these is-hdr-or-not is defined independent of whether STBI_NO_LINEAR is
1369 // defined, for API simplicity; if STBI_NO_LINEAR is defined, it always
1370 // reports false!
1371 
stbi_is_hdr_from_memory(stbi_uc const * buffer,int len)1372 STBIDEF int stbi_is_hdr_from_memory(stbi_uc const *buffer, int len)
1373 {
1374    #ifndef STBI_NO_HDR
1375    stbi__context s;
1376    stbi__start_mem(&s,buffer,len);
1377    return stbi__hdr_test(&s);
1378    #else
1379    STBI_NOTUSED(buffer);
1380    STBI_NOTUSED(len);
1381    return 0;
1382    #endif
1383 }
1384 
1385 #ifndef STBI_NO_STDIO
stbi_is_hdr(char const * filename)1386 STBIDEF int      stbi_is_hdr          (char const *filename)
1387 {
1388    FILE *f = stbi__fopen(filename, "rb");
1389    int result=0;
1390    if (f) {
1391       result = stbi_is_hdr_from_file(f);
1392       fclose(f);
1393    }
1394    return result;
1395 }
1396 
stbi_is_hdr_from_file(FILE * f)1397 STBIDEF int stbi_is_hdr_from_file(FILE *f)
1398 {
1399    #ifndef STBI_NO_HDR
1400    long pos = ftell(f);
1401    int res;
1402    stbi__context s;
1403    stbi__start_file(&s,f);
1404    res = stbi__hdr_test(&s);
1405    fseek(f, pos, SEEK_SET);
1406    return res;
1407    #else
1408    STBI_NOTUSED(f);
1409    return 0;
1410    #endif
1411 }
1412 #endif // !STBI_NO_STDIO
1413 
stbi_is_hdr_from_callbacks(stbi_io_callbacks const * clbk,void * user)1414 STBIDEF int      stbi_is_hdr_from_callbacks(stbi_io_callbacks const *clbk, void *user)
1415 {
1416    #ifndef STBI_NO_HDR
1417    stbi__context s;
1418    stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user);
1419    return stbi__hdr_test(&s);
1420    #else
1421    STBI_NOTUSED(clbk);
1422    STBI_NOTUSED(user);
1423    return 0;
1424    #endif
1425 }
1426 
1427 #ifndef STBI_NO_LINEAR
1428 static float stbi__l2h_gamma=2.2f, stbi__l2h_scale=1.0f;
1429 
stbi_ldr_to_hdr_gamma(float gamma)1430 STBIDEF void   stbi_ldr_to_hdr_gamma(float gamma) { stbi__l2h_gamma = gamma; }
stbi_ldr_to_hdr_scale(float scale)1431 STBIDEF void   stbi_ldr_to_hdr_scale(float scale) { stbi__l2h_scale = scale; }
1432 #endif
1433 
1434 static float stbi__h2l_gamma_i=1.0f/2.2f, stbi__h2l_scale_i=1.0f;
1435 
stbi_hdr_to_ldr_gamma(float gamma)1436 STBIDEF void   stbi_hdr_to_ldr_gamma(float gamma) { stbi__h2l_gamma_i = 1/gamma; }
stbi_hdr_to_ldr_scale(float scale)1437 STBIDEF void   stbi_hdr_to_ldr_scale(float scale) { stbi__h2l_scale_i = 1/scale; }
1438 
1439 
1440 //////////////////////////////////////////////////////////////////////////////
1441 //
1442 // Common code used by all image loaders
1443 //
1444 
1445 enum
1446 {
1447    STBI__SCAN_load=0,
1448    STBI__SCAN_type,
1449    STBI__SCAN_header
1450 };
1451 
stbi__refill_buffer(stbi__context * s)1452 static void stbi__refill_buffer(stbi__context *s)
1453 {
1454    int n = (s->io.read)(s->io_user_data,(char*)s->buffer_start,s->buflen);
1455    if (n == 0) {
1456       // at end of file, treat same as if from memory, but need to handle case
1457       // where s->img_buffer isn't pointing to safe memory, e.g. 0-byte file
1458       s->read_from_callbacks = 0;
1459       s->img_buffer = s->buffer_start;
1460       s->img_buffer_end = s->buffer_start+1;
1461       *s->img_buffer = 0;
1462    } else {
1463       s->img_buffer = s->buffer_start;
1464       s->img_buffer_end = s->buffer_start + n;
1465    }
1466 }
1467 
stbi__get8(stbi__context * s)1468 stbi_inline static stbi_uc stbi__get8(stbi__context *s)
1469 {
1470    if (s->img_buffer < s->img_buffer_end)
1471       return *s->img_buffer++;
1472    if (s->read_from_callbacks) {
1473       stbi__refill_buffer(s);
1474       return *s->img_buffer++;
1475    }
1476    return 0;
1477 }
1478 
stbi__at_eof(stbi__context * s)1479 stbi_inline static int stbi__at_eof(stbi__context *s)
1480 {
1481    if (s->io.read) {
1482       if (!(s->io.eof)(s->io_user_data)) return 0;
1483       // if feof() is true, check if buffer = end
1484       // special case: we've only got the special 0 character at the end
1485       if (s->read_from_callbacks == 0) return 1;
1486    }
1487 
1488    return s->img_buffer >= s->img_buffer_end;
1489 }
1490 
stbi__skip(stbi__context * s,int n)1491 static void stbi__skip(stbi__context *s, int n)
1492 {
1493    if (n < 0) {
1494       s->img_buffer = s->img_buffer_end;
1495       return;
1496    }
1497    if (s->io.read) {
1498       int blen = (int) (s->img_buffer_end - s->img_buffer);
1499       if (blen < n) {
1500          s->img_buffer = s->img_buffer_end;
1501          (s->io.skip)(s->io_user_data, n - blen);
1502          return;
1503       }
1504    }
1505    s->img_buffer += n;
1506 }
1507 
stbi__getn(stbi__context * s,stbi_uc * buffer,int n)1508 static int stbi__getn(stbi__context *s, stbi_uc *buffer, int n)
1509 {
1510    if (s->io.read) {
1511       int blen = (int) (s->img_buffer_end - s->img_buffer);
1512       if (blen < n) {
1513          int res, count;
1514 
1515          memcpy(buffer, s->img_buffer, blen);
1516 
1517          count = (s->io.read)(s->io_user_data, (char*) buffer + blen, n - blen);
1518          res = (count == (n-blen));
1519          s->img_buffer = s->img_buffer_end;
1520          return res;
1521       }
1522    }
1523 
1524    if (s->img_buffer+n <= s->img_buffer_end) {
1525       memcpy(buffer, s->img_buffer, n);
1526       s->img_buffer += n;
1527       return 1;
1528    } else
1529       return 0;
1530 }
1531 
stbi__get16be(stbi__context * s)1532 static int stbi__get16be(stbi__context *s)
1533 {
1534    int z = stbi__get8(s);
1535    return (z << 8) + stbi__get8(s);
1536 }
1537 
stbi__get32be(stbi__context * s)1538 static stbi__uint32 stbi__get32be(stbi__context *s)
1539 {
1540    stbi__uint32 z = stbi__get16be(s);
1541    return (z << 16) + stbi__get16be(s);
1542 }
1543 
1544 #if defined(STBI_NO_BMP) && defined(STBI_NO_TGA) && defined(STBI_NO_GIF)
1545 // nothing
1546 #else
stbi__get16le(stbi__context * s)1547 static int stbi__get16le(stbi__context *s)
1548 {
1549    int z = stbi__get8(s);
1550    return z + (stbi__get8(s) << 8);
1551 }
1552 #endif
1553 
1554 #ifndef STBI_NO_BMP
stbi__get32le(stbi__context * s)1555 static stbi__uint32 stbi__get32le(stbi__context *s)
1556 {
1557    stbi__uint32 z = stbi__get16le(s);
1558    return z + (stbi__get16le(s) << 16);
1559 }
1560 #endif
1561 
1562 #define STBI__BYTECAST(x)  ((stbi_uc) ((x) & 255))  // truncate int to byte without warnings
1563 
1564 
1565 //////////////////////////////////////////////////////////////////////////////
1566 //
1567 //  generic converter from built-in img_n to req_comp
1568 //    individual types do this automatically as much as possible (e.g. jpeg
1569 //    does all cases internally since it needs to colorspace convert anyway,
1570 //    and it never has alpha, so very few cases ). png can automatically
1571 //    interleave an alpha=255 channel, but falls back to this for other cases
1572 //
1573 //  assume data buffer is malloced, so malloc a new one and free that one
1574 //  only failure mode is malloc failing
1575 
stbi__compute_y(int r,int g,int b)1576 static stbi_uc stbi__compute_y(int r, int g, int b)
1577 {
1578    return (stbi_uc) (((r*77) + (g*150) +  (29*b)) >> 8);
1579 }
1580 
stbi__convert_format(unsigned char * data,int img_n,int req_comp,unsigned int x,unsigned int y)1581 static unsigned char *stbi__convert_format(unsigned char *data, int img_n, int req_comp, unsigned int x, unsigned int y)
1582 {
1583    int i,j;
1584    unsigned char *good;
1585 
1586    if (req_comp == img_n) return data;
1587    STBI_ASSERT(req_comp >= 1 && req_comp <= 4);
1588 
1589    good = (unsigned char *) stbi__malloc_mad3(req_comp, x, y, 0);
1590    if (good == NULL) {
1591       STBI_FREE(data);
1592       return stbi__errpuc("outofmem", "Out of memory");
1593    }
1594 
1595    for (j=0; j < (int) y; ++j) {
1596       unsigned char *src  = data + j * x * img_n   ;
1597       unsigned char *dest = good + j * x * req_comp;
1598 
1599       #define STBI__COMBO(a,b)  ((a)*8+(b))
1600       #define STBI__CASE(a,b)   case STBI__COMBO(a,b): for(i=x-1; i >= 0; --i, src += a, dest += b)
1601       // convert source image with img_n components to one with req_comp components;
1602       // avoid switch per pixel, so use switch per scanline and massive macros
1603       switch (STBI__COMBO(img_n, req_comp)) {
1604          STBI__CASE(1,2) { dest[0]=src[0]; dest[1]=255;                                     } break;
1605          STBI__CASE(1,3) { dest[0]=dest[1]=dest[2]=src[0];                                  } break;
1606          STBI__CASE(1,4) { dest[0]=dest[1]=dest[2]=src[0]; dest[3]=255;                     } break;
1607          STBI__CASE(2,1) { dest[0]=src[0];                                                  } break;
1608          STBI__CASE(2,3) { dest[0]=dest[1]=dest[2]=src[0];                                  } break;
1609          STBI__CASE(2,4) { dest[0]=dest[1]=dest[2]=src[0]; dest[3]=src[1];                  } break;
1610          STBI__CASE(3,4) { dest[0]=src[0];dest[1]=src[1];dest[2]=src[2];dest[3]=255;        } break;
1611          STBI__CASE(3,1) { dest[0]=stbi__compute_y(src[0],src[1],src[2]);                   } break;
1612          STBI__CASE(3,2) { dest[0]=stbi__compute_y(src[0],src[1],src[2]); dest[1] = 255;    } break;
1613          STBI__CASE(4,1) { dest[0]=stbi__compute_y(src[0],src[1],src[2]);                   } break;
1614          STBI__CASE(4,2) { dest[0]=stbi__compute_y(src[0],src[1],src[2]); dest[1] = src[3]; } break;
1615          STBI__CASE(4,3) { dest[0]=src[0];dest[1]=src[1];dest[2]=src[2];                    } break;
1616          default: STBI_ASSERT(0);
1617       }
1618       #undef STBI__CASE
1619    }
1620 
1621    STBI_FREE(data);
1622    return good;
1623 }
1624 
stbi__compute_y_16(int r,int g,int b)1625 static stbi__uint16 stbi__compute_y_16(int r, int g, int b)
1626 {
1627    return (stbi__uint16) (((r*77) + (g*150) +  (29*b)) >> 8);
1628 }
1629 
stbi__convert_format16(stbi__uint16 * data,int img_n,int req_comp,unsigned int x,unsigned int y)1630 static stbi__uint16 *stbi__convert_format16(stbi__uint16 *data, int img_n, int req_comp, unsigned int x, unsigned int y)
1631 {
1632    int i,j;
1633    stbi__uint16 *good;
1634 
1635    if (req_comp == img_n) return data;
1636    STBI_ASSERT(req_comp >= 1 && req_comp <= 4);
1637 
1638    good = (stbi__uint16 *) stbi__malloc(req_comp * x * y * 2);
1639    if (good == NULL) {
1640       STBI_FREE(data);
1641       return (stbi__uint16 *) stbi__errpuc("outofmem", "Out of memory");
1642    }
1643 
1644    for (j=0; j < (int) y; ++j) {
1645       stbi__uint16 *src  = data + j * x * img_n   ;
1646       stbi__uint16 *dest = good + j * x * req_comp;
1647 
1648       #define STBI__COMBO(a,b)  ((a)*8+(b))
1649       #define STBI__CASE(a,b)   case STBI__COMBO(a,b): for(i=x-1; i >= 0; --i, src += a, dest += b)
1650       // convert source image with img_n components to one with req_comp components;
1651       // avoid switch per pixel, so use switch per scanline and massive macros
1652       switch (STBI__COMBO(img_n, req_comp)) {
1653          STBI__CASE(1,2) { dest[0]=src[0]; dest[1]=0xffff;                                     } break;
1654          STBI__CASE(1,3) { dest[0]=dest[1]=dest[2]=src[0];                                     } break;
1655          STBI__CASE(1,4) { dest[0]=dest[1]=dest[2]=src[0]; dest[3]=0xffff;                     } break;
1656          STBI__CASE(2,1) { dest[0]=src[0];                                                     } break;
1657          STBI__CASE(2,3) { dest[0]=dest[1]=dest[2]=src[0];                                     } break;
1658          STBI__CASE(2,4) { dest[0]=dest[1]=dest[2]=src[0]; dest[3]=src[1];                     } break;
1659          STBI__CASE(3,4) { dest[0]=src[0];dest[1]=src[1];dest[2]=src[2];dest[3]=0xffff;        } break;
1660          STBI__CASE(3,1) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]);                   } break;
1661          STBI__CASE(3,2) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]); dest[1] = 0xffff; } break;
1662          STBI__CASE(4,1) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]);                   } break;
1663          STBI__CASE(4,2) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]); dest[1] = src[3]; } break;
1664          STBI__CASE(4,3) { dest[0]=src[0];dest[1]=src[1];dest[2]=src[2];                       } break;
1665          default: STBI_ASSERT(0);
1666       }
1667       #undef STBI__CASE
1668    }
1669 
1670    STBI_FREE(data);
1671    return good;
1672 }
1673 
1674 #ifndef STBI_NO_LINEAR
stbi__ldr_to_hdr(stbi_uc * data,int x,int y,int comp)1675 static float   *stbi__ldr_to_hdr(stbi_uc *data, int x, int y, int comp)
1676 {
1677    int i,k,n;
1678    float *output;
1679    if (!data) return NULL;
1680    output = (float *) stbi__malloc_mad4(x, y, comp, sizeof(float), 0);
1681    if (output == NULL) { STBI_FREE(data); return stbi__errpf("outofmem", "Out of memory"); }
1682    // compute number of non-alpha components
1683    if (comp & 1) n = comp; else n = comp-1;
1684    for (i=0; i < x*y; ++i) {
1685       for (k=0; k < n; ++k) {
1686          output[i*comp + k] = (float) (pow(data[i*comp+k]/255.0f, stbi__l2h_gamma) * stbi__l2h_scale);
1687       }
1688    }
1689    if (n < comp) {
1690       for (i=0; i < x*y; ++i) {
1691          output[i*comp + n] = data[i*comp + n]/255.0f;
1692       }
1693    }
1694    STBI_FREE(data);
1695    return output;
1696 }
1697 #endif
1698 
1699 #ifndef STBI_NO_HDR
1700 #define stbi__float2int(x)   ((int) (x))
stbi__hdr_to_ldr(float * data,int x,int y,int comp)1701 static stbi_uc *stbi__hdr_to_ldr(float   *data, int x, int y, int comp)
1702 {
1703    int i,k,n;
1704    stbi_uc *output;
1705    if (!data) return NULL;
1706    output = (stbi_uc *) stbi__malloc_mad3(x, y, comp, 0);
1707    if (output == NULL) { STBI_FREE(data); return stbi__errpuc("outofmem", "Out of memory"); }
1708    // compute number of non-alpha components
1709    if (comp & 1) n = comp; else n = comp-1;
1710    for (i=0; i < x*y; ++i) {
1711       for (k=0; k < n; ++k) {
1712          float z = (float) pow(data[i*comp+k]*stbi__h2l_scale_i, stbi__h2l_gamma_i) * 255 + 0.5f;
1713          if (z < 0) z = 0;
1714          if (z > 255) z = 255;
1715          output[i*comp + k] = (stbi_uc) stbi__float2int(z);
1716       }
1717       if (k < comp) {
1718          float z = data[i*comp+k] * 255 + 0.5f;
1719          if (z < 0) z = 0;
1720          if (z > 255) z = 255;
1721          output[i*comp + k] = (stbi_uc) stbi__float2int(z);
1722       }
1723    }
1724    STBI_FREE(data);
1725    return output;
1726 }
1727 #endif
1728 
1729 //////////////////////////////////////////////////////////////////////////////
1730 //
1731 //  "baseline" JPEG/JFIF decoder
1732 //
1733 //    simple implementation
1734 //      - doesn't support delayed output of y-dimension
1735 //      - simple interface (only one output format: 8-bit interleaved RGB)
1736 //      - doesn't try to recover corrupt jpegs
1737 //      - doesn't allow partial loading, loading multiple at once
1738 //      - still fast on x86 (copying globals into locals doesn't help x86)
1739 //      - allocates lots of intermediate memory (full size of all components)
1740 //        - non-interleaved case requires this anyway
1741 //        - allows good upsampling (see next)
1742 //    high-quality
1743 //      - upsampled channels are bilinearly interpolated, even across blocks
1744 //      - quality integer IDCT derived from IJG's 'slow'
1745 //    performance
1746 //      - fast huffman; reasonable integer IDCT
1747 //      - some SIMD kernels for common paths on targets with SSE2/NEON
1748 //      - uses a lot of intermediate memory, could cache poorly
1749 
1750 #ifndef STBI_NO_JPEG
1751 
1752 // huffman decoding acceleration
1753 #define FAST_BITS   9  // larger handles more cases; smaller stomps less cache
1754 
1755 typedef struct
1756 {
1757    stbi_uc  fast[1 << FAST_BITS];
1758    // weirdly, repacking this into AoS is a 10% speed loss, instead of a win
1759    stbi__uint16 code[256];
1760    stbi_uc  values[256];
1761    stbi_uc  size[257];
1762    unsigned int maxcode[18];
1763    int    delta[17];   // old 'firstsymbol' - old 'firstcode'
1764 } stbi__huffman;
1765 
1766 typedef struct
1767 {
1768    stbi__context *s;
1769    stbi__huffman huff_dc[4];
1770    stbi__huffman huff_ac[4];
1771    stbi__uint16 dequant[4][64];
1772    stbi__int16 fast_ac[4][1 << FAST_BITS];
1773 
1774 // sizes for components, interleaved MCUs
1775    int img_h_max, img_v_max;
1776    int img_mcu_x, img_mcu_y;
1777    int img_mcu_w, img_mcu_h;
1778 
1779 // definition of jpeg image component
1780    struct
1781    {
1782       int id;
1783       int h,v;
1784       int tq;
1785       int hd,ha;
1786       int dc_pred;
1787 
1788       int x,y,w2,h2;
1789       stbi_uc *data;
1790       void *raw_data, *raw_coeff;
1791       stbi_uc *linebuf;
1792       short   *coeff;   // progressive only
1793       int      coeff_w, coeff_h; // number of 8x8 coefficient blocks
1794    } img_comp[4];
1795 
1796    stbi__uint32   code_buffer; // jpeg entropy-coded buffer
1797    int            code_bits;   // number of valid bits
1798    unsigned char  marker;      // marker seen while filling entropy buffer
1799    int            nomore;      // flag if we saw a marker so must stop
1800 
1801    int            progressive;
1802    int            spec_start;
1803    int            spec_end;
1804    int            succ_high;
1805    int            succ_low;
1806    int            eob_run;
1807    int            jfif;
1808    int            app14_color_transform; // Adobe APP14 tag
1809    int            rgb;
1810 
1811    int scan_n, order[4];
1812    int restart_interval, todo;
1813 
1814 // kernels
1815    void (*idct_block_kernel)(stbi_uc *out, int out_stride, short data[64]);
1816    void (*YCbCr_to_RGB_kernel)(stbi_uc *out, const stbi_uc *y, const stbi_uc *pcb, const stbi_uc *pcr, int count, int step);
1817    stbi_uc *(*resample_row_hv_2_kernel)(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs);
1818 } stbi__jpeg;
1819 
stbi__build_huffman(stbi__huffman * h,int * count)1820 static int stbi__build_huffman(stbi__huffman *h, int *count)
1821 {
1822    int i,j,k=0;
1823    unsigned int code;
1824    // build size list for each symbol (from JPEG spec)
1825    for (i=0; i < 16; ++i)
1826       for (j=0; j < count[i]; ++j)
1827          h->size[k++] = (stbi_uc) (i+1);
1828    h->size[k] = 0;
1829 
1830    // compute actual symbols (from jpeg spec)
1831    code = 0;
1832    k = 0;
1833    for(j=1; j <= 16; ++j) {
1834       // compute delta to add to code to compute symbol id
1835       h->delta[j] = k - code;
1836       if (h->size[k] == j) {
1837          while (h->size[k] == j)
1838             h->code[k++] = (stbi__uint16) (code++);
1839          if (code-1 >= (1u << j)) return stbi__err("bad code lengths","Corrupt JPEG");
1840       }
1841       // compute largest code + 1 for this size, preshifted as needed later
1842       h->maxcode[j] = code << (16-j);
1843       code <<= 1;
1844    }
1845    h->maxcode[j] = 0xffffffff;
1846 
1847    // build non-spec acceleration table; 255 is flag for not-accelerated
1848    memset(h->fast, 255, 1 << FAST_BITS);
1849    for (i=0; i < k; ++i) {
1850       int s = h->size[i];
1851       if (s <= FAST_BITS) {
1852          int c = h->code[i] << (FAST_BITS-s);
1853          int m = 1 << (FAST_BITS-s);
1854          for (j=0; j < m; ++j) {
1855             h->fast[c+j] = (stbi_uc) i;
1856          }
1857       }
1858    }
1859    return 1;
1860 }
1861 
1862 // build a table that decodes both magnitude and value of small ACs in
1863 // one go.
stbi__build_fast_ac(stbi__int16 * fast_ac,stbi__huffman * h)1864 static void stbi__build_fast_ac(stbi__int16 *fast_ac, stbi__huffman *h)
1865 {
1866    int i;
1867    for (i=0; i < (1 << FAST_BITS); ++i) {
1868       stbi_uc fast = h->fast[i];
1869       fast_ac[i] = 0;
1870       if (fast < 255) {
1871          int rs = h->values[fast];
1872          int run = (rs >> 4) & 15;
1873          int magbits = rs & 15;
1874          int len = h->size[fast];
1875 
1876          if (magbits && len + magbits <= FAST_BITS) {
1877             // magnitude code followed by receive_extend code
1878             int k = ((i << len) & ((1 << FAST_BITS) - 1)) >> (FAST_BITS - magbits);
1879             int m = 1 << (magbits - 1);
1880             if (k < m) k += (~0U << magbits) + 1;
1881             // if the result is small enough, we can fit it in fast_ac table
1882             if (k >= -128 && k <= 127)
1883                fast_ac[i] = (stbi__int16) ((k * 256) + (run * 16) + (len + magbits));
1884          }
1885       }
1886    }
1887 }
1888 
stbi__grow_buffer_unsafe(stbi__jpeg * j)1889 static void stbi__grow_buffer_unsafe(stbi__jpeg *j)
1890 {
1891    do {
1892       unsigned int b = j->nomore ? 0 : stbi__get8(j->s);
1893       if (b == 0xff) {
1894          int c = stbi__get8(j->s);
1895          while (c == 0xff) c = stbi__get8(j->s); // consume fill bytes
1896          if (c != 0) {
1897             j->marker = (unsigned char) c;
1898             j->nomore = 1;
1899             return;
1900          }
1901       }
1902       j->code_buffer |= b << (24 - j->code_bits);
1903       j->code_bits += 8;
1904    } while (j->code_bits <= 24);
1905 }
1906 
1907 // (1 << n) - 1
1908 static const stbi__uint32 stbi__bmask[17]={0,1,3,7,15,31,63,127,255,511,1023,2047,4095,8191,16383,32767,65535};
1909 
1910 // decode a jpeg huffman value from the bitstream
stbi__jpeg_huff_decode(stbi__jpeg * j,stbi__huffman * h)1911 stbi_inline static int stbi__jpeg_huff_decode(stbi__jpeg *j, stbi__huffman *h)
1912 {
1913    unsigned int temp;
1914    int c,k;
1915 
1916    if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
1917 
1918    // look at the top FAST_BITS and determine what symbol ID it is,
1919    // if the code is <= FAST_BITS
1920    c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1);
1921    k = h->fast[c];
1922    if (k < 255) {
1923       int s = h->size[k];
1924       if (s > j->code_bits)
1925          return -1;
1926       j->code_buffer <<= s;
1927       j->code_bits -= s;
1928       return h->values[k];
1929    }
1930 
1931    // naive test is to shift the code_buffer down so k bits are
1932    // valid, then test against maxcode. To speed this up, we've
1933    // preshifted maxcode left so that it has (16-k) 0s at the
1934    // end; in other words, regardless of the number of bits, it
1935    // wants to be compared against something shifted to have 16;
1936    // that way we don't need to shift inside the loop.
1937    temp = j->code_buffer >> 16;
1938    for (k=FAST_BITS+1 ; ; ++k)
1939       if (temp < h->maxcode[k])
1940          break;
1941    if (k == 17) {
1942       // error! code not found
1943       j->code_bits -= 16;
1944       return -1;
1945    }
1946 
1947    if (k > j->code_bits)
1948       return -1;
1949 
1950    // convert the huffman code to the symbol id
1951    c = ((j->code_buffer >> (32 - k)) & stbi__bmask[k]) + h->delta[k];
1952    STBI_ASSERT((((j->code_buffer) >> (32 - h->size[c])) & stbi__bmask[h->size[c]]) == h->code[c]);
1953 
1954    // convert the id to a symbol
1955    j->code_bits -= k;
1956    j->code_buffer <<= k;
1957    return h->values[c];
1958 }
1959 
1960 // bias[n] = (-1<<n) + 1
1961 static const int stbi__jbias[16] = {0,-1,-3,-7,-15,-31,-63,-127,-255,-511,-1023,-2047,-4095,-8191,-16383,-32767};
1962 
1963 // combined JPEG 'receive' and JPEG 'extend', since baseline
1964 // always extends everything it receives.
stbi__extend_receive(stbi__jpeg * j,int n)1965 stbi_inline static int stbi__extend_receive(stbi__jpeg *j, int n)
1966 {
1967    unsigned int k;
1968    int sgn;
1969    if (j->code_bits < n) stbi__grow_buffer_unsafe(j);
1970 
1971    sgn = (stbi__int32)j->code_buffer >> 31; // sign bit is always in MSB
1972    k = stbi_lrot(j->code_buffer, n);
1973    STBI_ASSERT(n >= 0 && n < (int) (sizeof(stbi__bmask)/sizeof(*stbi__bmask)));
1974    j->code_buffer = k & ~stbi__bmask[n];
1975    k &= stbi__bmask[n];
1976    j->code_bits -= n;
1977    return k + (stbi__jbias[n] & ~sgn);
1978 }
1979 
1980 // get some unsigned bits
stbi__jpeg_get_bits(stbi__jpeg * j,int n)1981 stbi_inline static int stbi__jpeg_get_bits(stbi__jpeg *j, int n)
1982 {
1983    unsigned int k;
1984    if (j->code_bits < n) stbi__grow_buffer_unsafe(j);
1985    k = stbi_lrot(j->code_buffer, n);
1986    j->code_buffer = k & ~stbi__bmask[n];
1987    k &= stbi__bmask[n];
1988    j->code_bits -= n;
1989    return k;
1990 }
1991 
stbi__jpeg_get_bit(stbi__jpeg * j)1992 stbi_inline static int stbi__jpeg_get_bit(stbi__jpeg *j)
1993 {
1994    unsigned int k;
1995    if (j->code_bits < 1) stbi__grow_buffer_unsafe(j);
1996    k = j->code_buffer;
1997    j->code_buffer <<= 1;
1998    --j->code_bits;
1999    return k & 0x80000000;
2000 }
2001 
2002 // given a value that's at position X in the zigzag stream,
2003 // where does it appear in the 8x8 matrix coded as row-major?
2004 static const stbi_uc stbi__jpeg_dezigzag[64+15] =
2005 {
2006     0,  1,  8, 16,  9,  2,  3, 10,
2007    17, 24, 32, 25, 18, 11,  4,  5,
2008    12, 19, 26, 33, 40, 48, 41, 34,
2009    27, 20, 13,  6,  7, 14, 21, 28,
2010    35, 42, 49, 56, 57, 50, 43, 36,
2011    29, 22, 15, 23, 30, 37, 44, 51,
2012    58, 59, 52, 45, 38, 31, 39, 46,
2013    53, 60, 61, 54, 47, 55, 62, 63,
2014    // let corrupt input sample past end
2015    63, 63, 63, 63, 63, 63, 63, 63,
2016    63, 63, 63, 63, 63, 63, 63
2017 };
2018 
2019 // decode one 64-entry block--
stbi__jpeg_decode_block(stbi__jpeg * j,short data[64],stbi__huffman * hdc,stbi__huffman * hac,stbi__int16 * fac,int b,stbi__uint16 * dequant)2020 static int stbi__jpeg_decode_block(stbi__jpeg *j, short data[64], stbi__huffman *hdc, stbi__huffman *hac, stbi__int16 *fac, int b, stbi__uint16 *dequant)
2021 {
2022    int diff,dc,k;
2023    int t;
2024 
2025    if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
2026    t = stbi__jpeg_huff_decode(j, hdc);
2027    if (t < 0) return stbi__err("bad huffman code","Corrupt JPEG");
2028 
2029    // 0 all the ac values now so we can do it 32-bits at a time
2030    memset(data,0,64*sizeof(data[0]));
2031 
2032    diff = t ? stbi__extend_receive(j, t) : 0;
2033    dc = j->img_comp[b].dc_pred + diff;
2034    j->img_comp[b].dc_pred = dc;
2035    data[0] = (short) (dc * dequant[0]);
2036 
2037    // decode AC components, see JPEG spec
2038    k = 1;
2039    do {
2040       unsigned int zig;
2041       int c,r,s;
2042       if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
2043       c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1);
2044       r = fac[c];
2045       if (r) { // fast-AC path
2046          k += (r >> 4) & 15; // run
2047          s = r & 15; // combined length
2048          j->code_buffer <<= s;
2049          j->code_bits -= s;
2050          // decode into unzigzag'd location
2051          zig = stbi__jpeg_dezigzag[k++];
2052          data[zig] = (short) ((r >> 8) * dequant[zig]);
2053       } else {
2054          int rs = stbi__jpeg_huff_decode(j, hac);
2055          if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG");
2056          s = rs & 15;
2057          r = rs >> 4;
2058          if (s == 0) {
2059             if (rs != 0xf0) break; // end block
2060             k += 16;
2061          } else {
2062             k += r;
2063             // decode into unzigzag'd location
2064             zig = stbi__jpeg_dezigzag[k++];
2065             data[zig] = (short) (stbi__extend_receive(j,s) * dequant[zig]);
2066          }
2067       }
2068    } while (k < 64);
2069    return 1;
2070 }
2071 
stbi__jpeg_decode_block_prog_dc(stbi__jpeg * j,short data[64],stbi__huffman * hdc,int b)2072 static int stbi__jpeg_decode_block_prog_dc(stbi__jpeg *j, short data[64], stbi__huffman *hdc, int b)
2073 {
2074    int diff,dc;
2075    int t;
2076    if (j->spec_end != 0) return stbi__err("can't merge dc and ac", "Corrupt JPEG");
2077 
2078    if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
2079 
2080    if (j->succ_high == 0) {
2081       // first scan for DC coefficient, must be first
2082       memset(data,0,64*sizeof(data[0])); // 0 all the ac values now
2083       t = stbi__jpeg_huff_decode(j, hdc);
2084       diff = t ? stbi__extend_receive(j, t) : 0;
2085 
2086       dc = j->img_comp[b].dc_pred + diff;
2087       j->img_comp[b].dc_pred = dc;
2088       data[0] = (short) (dc << j->succ_low);
2089    } else {
2090       // refinement scan for DC coefficient
2091       if (stbi__jpeg_get_bit(j))
2092          data[0] += (short) (1 << j->succ_low);
2093    }
2094    return 1;
2095 }
2096 
2097 // @OPTIMIZE: store non-zigzagged during the decode passes,
2098 // and only de-zigzag when dequantizing
stbi__jpeg_decode_block_prog_ac(stbi__jpeg * j,short data[64],stbi__huffman * hac,stbi__int16 * fac)2099 static int stbi__jpeg_decode_block_prog_ac(stbi__jpeg *j, short data[64], stbi__huffman *hac, stbi__int16 *fac)
2100 {
2101    int k;
2102    if (j->spec_start == 0) return stbi__err("can't merge dc and ac", "Corrupt JPEG");
2103 
2104    if (j->succ_high == 0) {
2105       int shift = j->succ_low;
2106 
2107       if (j->eob_run) {
2108          --j->eob_run;
2109          return 1;
2110       }
2111 
2112       k = j->spec_start;
2113       do {
2114          unsigned int zig;
2115          int c,r,s;
2116          if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
2117          c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1);
2118          r = fac[c];
2119          if (r) { // fast-AC path
2120             k += (r >> 4) & 15; // run
2121             s = r & 15; // combined length
2122             j->code_buffer <<= s;
2123             j->code_bits -= s;
2124             zig = stbi__jpeg_dezigzag[k++];
2125             data[zig] = (short) ((r >> 8) << shift);
2126          } else {
2127             int rs = stbi__jpeg_huff_decode(j, hac);
2128             if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG");
2129             s = rs & 15;
2130             r = rs >> 4;
2131             if (s == 0) {
2132                if (r < 15) {
2133                   j->eob_run = (1 << r);
2134                   if (r)
2135                      j->eob_run += stbi__jpeg_get_bits(j, r);
2136                   --j->eob_run;
2137                   break;
2138                }
2139                k += 16;
2140             } else {
2141                k += r;
2142                zig = stbi__jpeg_dezigzag[k++];
2143                data[zig] = (short) (stbi__extend_receive(j,s) << shift);
2144             }
2145          }
2146       } while (k <= j->spec_end);
2147    } else {
2148       // refinement scan for these AC coefficients
2149 
2150       short bit = (short) (1 << j->succ_low);
2151 
2152       if (j->eob_run) {
2153          --j->eob_run;
2154          for (k = j->spec_start; k <= j->spec_end; ++k) {
2155             short *p = &data[stbi__jpeg_dezigzag[k]];
2156             if (*p != 0)
2157                if (stbi__jpeg_get_bit(j))
2158                   if ((*p & bit)==0) {
2159                      if (*p > 0)
2160                         *p += bit;
2161                      else
2162                         *p -= bit;
2163                   }
2164          }
2165       } else {
2166          k = j->spec_start;
2167          do {
2168             int r,s;
2169             int rs = stbi__jpeg_huff_decode(j, hac); // @OPTIMIZE see if we can use the fast path here, advance-by-r is so slow, eh
2170             if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG");
2171             s = rs & 15;
2172             r = rs >> 4;
2173             if (s == 0) {
2174                if (r < 15) {
2175                   j->eob_run = (1 << r) - 1;
2176                   if (r)
2177                      j->eob_run += stbi__jpeg_get_bits(j, r);
2178                   r = 64; // force end of block
2179                } else {
2180                   // r=15 s=0 should write 16 0s, so we just do
2181                   // a run of 15 0s and then write s (which is 0),
2182                   // so we don't have to do anything special here
2183                }
2184             } else {
2185                if (s != 1) return stbi__err("bad huffman code", "Corrupt JPEG");
2186                // sign bit
2187                if (stbi__jpeg_get_bit(j))
2188                   s = bit;
2189                else
2190                   s = -bit;
2191             }
2192 
2193             // advance by r
2194             while (k <= j->spec_end) {
2195                short *p = &data[stbi__jpeg_dezigzag[k++]];
2196                if (*p != 0) {
2197                   if (stbi__jpeg_get_bit(j))
2198                      if ((*p & bit)==0) {
2199                         if (*p > 0)
2200                            *p += bit;
2201                         else
2202                            *p -= bit;
2203                      }
2204                } else {
2205                   if (r == 0) {
2206                      *p = (short) s;
2207                      break;
2208                   }
2209                   --r;
2210                }
2211             }
2212          } while (k <= j->spec_end);
2213       }
2214    }
2215    return 1;
2216 }
2217 
2218 // take a -128..127 value and stbi__clamp it and convert to 0..255
stbi__clamp(int x)2219 stbi_inline static stbi_uc stbi__clamp(int x)
2220 {
2221    // trick to use a single test to catch both cases
2222    if ((unsigned int) x > 255) {
2223       if (x < 0) return 0;
2224       if (x > 255) return 255;
2225    }
2226    return (stbi_uc) x;
2227 }
2228 
2229 #define stbi__f2f(x)  ((int) (((x) * 4096 + 0.5)))
2230 #define stbi__fsh(x)  ((x) * 4096)
2231 
2232 // derived from jidctint -- DCT_ISLOW
2233 #define STBI__IDCT_1D(s0,s1,s2,s3,s4,s5,s6,s7) \
2234    int t0,t1,t2,t3,p1,p2,p3,p4,p5,x0,x1,x2,x3; \
2235    p2 = s2;                                    \
2236    p3 = s6;                                    \
2237    p1 = (p2+p3) * stbi__f2f(0.5411961f);       \
2238    t2 = p1 + p3*stbi__f2f(-1.847759065f);      \
2239    t3 = p1 + p2*stbi__f2f( 0.765366865f);      \
2240    p2 = s0;                                    \
2241    p3 = s4;                                    \
2242    t0 = stbi__fsh(p2+p3);                      \
2243    t1 = stbi__fsh(p2-p3);                      \
2244    x0 = t0+t3;                                 \
2245    x3 = t0-t3;                                 \
2246    x1 = t1+t2;                                 \
2247    x2 = t1-t2;                                 \
2248    t0 = s7;                                    \
2249    t1 = s5;                                    \
2250    t2 = s3;                                    \
2251    t3 = s1;                                    \
2252    p3 = t0+t2;                                 \
2253    p4 = t1+t3;                                 \
2254    p1 = t0+t3;                                 \
2255    p2 = t1+t2;                                 \
2256    p5 = (p3+p4)*stbi__f2f( 1.175875602f);      \
2257    t0 = t0*stbi__f2f( 0.298631336f);           \
2258    t1 = t1*stbi__f2f( 2.053119869f);           \
2259    t2 = t2*stbi__f2f( 3.072711026f);           \
2260    t3 = t3*stbi__f2f( 1.501321110f);           \
2261    p1 = p5 + p1*stbi__f2f(-0.899976223f);      \
2262    p2 = p5 + p2*stbi__f2f(-2.562915447f);      \
2263    p3 = p3*stbi__f2f(-1.961570560f);           \
2264    p4 = p4*stbi__f2f(-0.390180644f);           \
2265    t3 += p1+p4;                                \
2266    t2 += p2+p3;                                \
2267    t1 += p2+p4;                                \
2268    t0 += p1+p3;
2269 
stbi__idct_block(stbi_uc * out,int out_stride,short data[64])2270 static void stbi__idct_block(stbi_uc *out, int out_stride, short data[64])
2271 {
2272    int i,val[64],*v=val;
2273    stbi_uc *o;
2274    short *d = data;
2275 
2276    // columns
2277    for (i=0; i < 8; ++i,++d, ++v) {
2278       // if all zeroes, shortcut -- this avoids dequantizing 0s and IDCTing
2279       if (d[ 8]==0 && d[16]==0 && d[24]==0 && d[32]==0
2280            && d[40]==0 && d[48]==0 && d[56]==0) {
2281          //    no shortcut                 0     seconds
2282          //    (1|2|3|4|5|6|7)==0          0     seconds
2283          //    all separate               -0.047 seconds
2284          //    1 && 2|3 && 4|5 && 6|7:    -0.047 seconds
2285          int dcterm = d[0]*4;
2286          v[0] = v[8] = v[16] = v[24] = v[32] = v[40] = v[48] = v[56] = dcterm;
2287       } else {
2288          STBI__IDCT_1D(d[ 0],d[ 8],d[16],d[24],d[32],d[40],d[48],d[56])
2289          // constants scaled things up by 1<<12; let's bring them back
2290          // down, but keep 2 extra bits of precision
2291          x0 += 512; x1 += 512; x2 += 512; x3 += 512;
2292          v[ 0] = (x0+t3) >> 10;
2293          v[56] = (x0-t3) >> 10;
2294          v[ 8] = (x1+t2) >> 10;
2295          v[48] = (x1-t2) >> 10;
2296          v[16] = (x2+t1) >> 10;
2297          v[40] = (x2-t1) >> 10;
2298          v[24] = (x3+t0) >> 10;
2299          v[32] = (x3-t0) >> 10;
2300       }
2301    }
2302 
2303    for (i=0, v=val, o=out; i < 8; ++i,v+=8,o+=out_stride) {
2304       // no fast case since the first 1D IDCT spread components out
2305       STBI__IDCT_1D(v[0],v[1],v[2],v[3],v[4],v[5],v[6],v[7])
2306       // constants scaled things up by 1<<12, plus we had 1<<2 from first
2307       // loop, plus horizontal and vertical each scale by sqrt(8) so together
2308       // we've got an extra 1<<3, so 1<<17 total we need to remove.
2309       // so we want to round that, which means adding 0.5 * 1<<17,
2310       // aka 65536. Also, we'll end up with -128 to 127 that we want
2311       // to encode as 0..255 by adding 128, so we'll add that before the shift
2312       x0 += 65536 + (128<<17);
2313       x1 += 65536 + (128<<17);
2314       x2 += 65536 + (128<<17);
2315       x3 += 65536 + (128<<17);
2316       // tried computing the shifts into temps, or'ing the temps to see
2317       // if any were out of range, but that was slower
2318       o[0] = stbi__clamp((x0+t3) >> 17);
2319       o[7] = stbi__clamp((x0-t3) >> 17);
2320       o[1] = stbi__clamp((x1+t2) >> 17);
2321       o[6] = stbi__clamp((x1-t2) >> 17);
2322       o[2] = stbi__clamp((x2+t1) >> 17);
2323       o[5] = stbi__clamp((x2-t1) >> 17);
2324       o[3] = stbi__clamp((x3+t0) >> 17);
2325       o[4] = stbi__clamp((x3-t0) >> 17);
2326    }
2327 }
2328 
2329 #ifdef STBI_SSE2
2330 // sse2 integer IDCT. not the fastest possible implementation but it
2331 // produces bit-identical results to the generic C version so it's
2332 // fully "transparent".
stbi__idct_simd(stbi_uc * out,int out_stride,short data[64])2333 static void stbi__idct_simd(stbi_uc *out, int out_stride, short data[64])
2334 {
2335    // This is constructed to match our regular (generic) integer IDCT exactly.
2336    __m128i row0, row1, row2, row3, row4, row5, row6, row7;
2337    __m128i tmp;
2338 
2339    // dot product constant: even elems=x, odd elems=y
2340    #define dct_const(x,y)  _mm_setr_epi16((x),(y),(x),(y),(x),(y),(x),(y))
2341 
2342    // out(0) = c0[even]*x + c0[odd]*y   (c0, x, y 16-bit, out 32-bit)
2343    // out(1) = c1[even]*x + c1[odd]*y
2344    #define dct_rot(out0,out1, x,y,c0,c1) \
2345       __m128i c0##lo = _mm_unpacklo_epi16((x),(y)); \
2346       __m128i c0##hi = _mm_unpackhi_epi16((x),(y)); \
2347       __m128i out0##_l = _mm_madd_epi16(c0##lo, c0); \
2348       __m128i out0##_h = _mm_madd_epi16(c0##hi, c0); \
2349       __m128i out1##_l = _mm_madd_epi16(c0##lo, c1); \
2350       __m128i out1##_h = _mm_madd_epi16(c0##hi, c1)
2351 
2352    // out = in << 12  (in 16-bit, out 32-bit)
2353    #define dct_widen(out, in) \
2354       __m128i out##_l = _mm_srai_epi32(_mm_unpacklo_epi16(_mm_setzero_si128(), (in)), 4); \
2355       __m128i out##_h = _mm_srai_epi32(_mm_unpackhi_epi16(_mm_setzero_si128(), (in)), 4)
2356 
2357    // wide add
2358    #define dct_wadd(out, a, b) \
2359       __m128i out##_l = _mm_add_epi32(a##_l, b##_l); \
2360       __m128i out##_h = _mm_add_epi32(a##_h, b##_h)
2361 
2362    // wide sub
2363    #define dct_wsub(out, a, b) \
2364       __m128i out##_l = _mm_sub_epi32(a##_l, b##_l); \
2365       __m128i out##_h = _mm_sub_epi32(a##_h, b##_h)
2366 
2367    // butterfly a/b, add bias, then shift by "s" and pack
2368    #define dct_bfly32o(out0, out1, a,b,bias,s) \
2369       { \
2370          __m128i abiased_l = _mm_add_epi32(a##_l, bias); \
2371          __m128i abiased_h = _mm_add_epi32(a##_h, bias); \
2372          dct_wadd(sum, abiased, b); \
2373          dct_wsub(dif, abiased, b); \
2374          out0 = _mm_packs_epi32(_mm_srai_epi32(sum_l, s), _mm_srai_epi32(sum_h, s)); \
2375          out1 = _mm_packs_epi32(_mm_srai_epi32(dif_l, s), _mm_srai_epi32(dif_h, s)); \
2376       }
2377 
2378    // 8-bit interleave step (for transposes)
2379    #define dct_interleave8(a, b) \
2380       tmp = a; \
2381       a = _mm_unpacklo_epi8(a, b); \
2382       b = _mm_unpackhi_epi8(tmp, b)
2383 
2384    // 16-bit interleave step (for transposes)
2385    #define dct_interleave16(a, b) \
2386       tmp = a; \
2387       a = _mm_unpacklo_epi16(a, b); \
2388       b = _mm_unpackhi_epi16(tmp, b)
2389 
2390    #define dct_pass(bias,shift) \
2391       { \
2392          /* even part */ \
2393          dct_rot(t2e,t3e, row2,row6, rot0_0,rot0_1); \
2394          __m128i sum04 = _mm_add_epi16(row0, row4); \
2395          __m128i dif04 = _mm_sub_epi16(row0, row4); \
2396          dct_widen(t0e, sum04); \
2397          dct_widen(t1e, dif04); \
2398          dct_wadd(x0, t0e, t3e); \
2399          dct_wsub(x3, t0e, t3e); \
2400          dct_wadd(x1, t1e, t2e); \
2401          dct_wsub(x2, t1e, t2e); \
2402          /* odd part */ \
2403          dct_rot(y0o,y2o, row7,row3, rot2_0,rot2_1); \
2404          dct_rot(y1o,y3o, row5,row1, rot3_0,rot3_1); \
2405          __m128i sum17 = _mm_add_epi16(row1, row7); \
2406          __m128i sum35 = _mm_add_epi16(row3, row5); \
2407          dct_rot(y4o,y5o, sum17,sum35, rot1_0,rot1_1); \
2408          dct_wadd(x4, y0o, y4o); \
2409          dct_wadd(x5, y1o, y5o); \
2410          dct_wadd(x6, y2o, y5o); \
2411          dct_wadd(x7, y3o, y4o); \
2412          dct_bfly32o(row0,row7, x0,x7,bias,shift); \
2413          dct_bfly32o(row1,row6, x1,x6,bias,shift); \
2414          dct_bfly32o(row2,row5, x2,x5,bias,shift); \
2415          dct_bfly32o(row3,row4, x3,x4,bias,shift); \
2416       }
2417 
2418    __m128i rot0_0 = dct_const(stbi__f2f(0.5411961f), stbi__f2f(0.5411961f) + stbi__f2f(-1.847759065f));
2419    __m128i rot0_1 = dct_const(stbi__f2f(0.5411961f) + stbi__f2f( 0.765366865f), stbi__f2f(0.5411961f));
2420    __m128i rot1_0 = dct_const(stbi__f2f(1.175875602f) + stbi__f2f(-0.899976223f), stbi__f2f(1.175875602f));
2421    __m128i rot1_1 = dct_const(stbi__f2f(1.175875602f), stbi__f2f(1.175875602f) + stbi__f2f(-2.562915447f));
2422    __m128i rot2_0 = dct_const(stbi__f2f(-1.961570560f) + stbi__f2f( 0.298631336f), stbi__f2f(-1.961570560f));
2423    __m128i rot2_1 = dct_const(stbi__f2f(-1.961570560f), stbi__f2f(-1.961570560f) + stbi__f2f( 3.072711026f));
2424    __m128i rot3_0 = dct_const(stbi__f2f(-0.390180644f) + stbi__f2f( 2.053119869f), stbi__f2f(-0.390180644f));
2425    __m128i rot3_1 = dct_const(stbi__f2f(-0.390180644f), stbi__f2f(-0.390180644f) + stbi__f2f( 1.501321110f));
2426 
2427    // rounding biases in column/row passes, see stbi__idct_block for explanation.
2428    __m128i bias_0 = _mm_set1_epi32(512);
2429    __m128i bias_1 = _mm_set1_epi32(65536 + (128<<17));
2430 
2431    // load
2432    row0 = _mm_load_si128((const __m128i *) (data + 0*8));
2433    row1 = _mm_load_si128((const __m128i *) (data + 1*8));
2434    row2 = _mm_load_si128((const __m128i *) (data + 2*8));
2435    row3 = _mm_load_si128((const __m128i *) (data + 3*8));
2436    row4 = _mm_load_si128((const __m128i *) (data + 4*8));
2437    row5 = _mm_load_si128((const __m128i *) (data + 5*8));
2438    row6 = _mm_load_si128((const __m128i *) (data + 6*8));
2439    row7 = _mm_load_si128((const __m128i *) (data + 7*8));
2440 
2441    // column pass
2442    dct_pass(bias_0, 10);
2443 
2444    {
2445       // 16bit 8x8 transpose pass 1
2446       dct_interleave16(row0, row4);
2447       dct_interleave16(row1, row5);
2448       dct_interleave16(row2, row6);
2449       dct_interleave16(row3, row7);
2450 
2451       // transpose pass 2
2452       dct_interleave16(row0, row2);
2453       dct_interleave16(row1, row3);
2454       dct_interleave16(row4, row6);
2455       dct_interleave16(row5, row7);
2456 
2457       // transpose pass 3
2458       dct_interleave16(row0, row1);
2459       dct_interleave16(row2, row3);
2460       dct_interleave16(row4, row5);
2461       dct_interleave16(row6, row7);
2462    }
2463 
2464    // row pass
2465    dct_pass(bias_1, 17);
2466 
2467    {
2468       // pack
2469       __m128i p0 = _mm_packus_epi16(row0, row1); // a0a1a2a3...a7b0b1b2b3...b7
2470       __m128i p1 = _mm_packus_epi16(row2, row3);
2471       __m128i p2 = _mm_packus_epi16(row4, row5);
2472       __m128i p3 = _mm_packus_epi16(row6, row7);
2473 
2474       // 8bit 8x8 transpose pass 1
2475       dct_interleave8(p0, p2); // a0e0a1e1...
2476       dct_interleave8(p1, p3); // c0g0c1g1...
2477 
2478       // transpose pass 2
2479       dct_interleave8(p0, p1); // a0c0e0g0...
2480       dct_interleave8(p2, p3); // b0d0f0h0...
2481 
2482       // transpose pass 3
2483       dct_interleave8(p0, p2); // a0b0c0d0...
2484       dct_interleave8(p1, p3); // a4b4c4d4...
2485 
2486       // store
2487       _mm_storel_epi64((__m128i *) out, p0); out += out_stride;
2488       _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p0, 0x4e)); out += out_stride;
2489       _mm_storel_epi64((__m128i *) out, p2); out += out_stride;
2490       _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p2, 0x4e)); out += out_stride;
2491       _mm_storel_epi64((__m128i *) out, p1); out += out_stride;
2492       _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p1, 0x4e)); out += out_stride;
2493       _mm_storel_epi64((__m128i *) out, p3); out += out_stride;
2494       _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p3, 0x4e));
2495    }
2496 
2497 #undef dct_const
2498 #undef dct_rot
2499 #undef dct_widen
2500 #undef dct_wadd
2501 #undef dct_wsub
2502 #undef dct_bfly32o
2503 #undef dct_interleave8
2504 #undef dct_interleave16
2505 #undef dct_pass
2506 }
2507 
2508 #endif // STBI_SSE2
2509 
2510 #ifdef STBI_NEON
2511 
2512 // NEON integer IDCT. should produce bit-identical
2513 // results to the generic C version.
stbi__idct_simd(stbi_uc * out,int out_stride,short data[64])2514 static void stbi__idct_simd(stbi_uc *out, int out_stride, short data[64])
2515 {
2516    int16x8_t row0, row1, row2, row3, row4, row5, row6, row7;
2517 
2518    int16x4_t rot0_0 = vdup_n_s16(stbi__f2f(0.5411961f));
2519    int16x4_t rot0_1 = vdup_n_s16(stbi__f2f(-1.847759065f));
2520    int16x4_t rot0_2 = vdup_n_s16(stbi__f2f( 0.765366865f));
2521    int16x4_t rot1_0 = vdup_n_s16(stbi__f2f( 1.175875602f));
2522    int16x4_t rot1_1 = vdup_n_s16(stbi__f2f(-0.899976223f));
2523    int16x4_t rot1_2 = vdup_n_s16(stbi__f2f(-2.562915447f));
2524    int16x4_t rot2_0 = vdup_n_s16(stbi__f2f(-1.961570560f));
2525    int16x4_t rot2_1 = vdup_n_s16(stbi__f2f(-0.390180644f));
2526    int16x4_t rot3_0 = vdup_n_s16(stbi__f2f( 0.298631336f));
2527    int16x4_t rot3_1 = vdup_n_s16(stbi__f2f( 2.053119869f));
2528    int16x4_t rot3_2 = vdup_n_s16(stbi__f2f( 3.072711026f));
2529    int16x4_t rot3_3 = vdup_n_s16(stbi__f2f( 1.501321110f));
2530 
2531 #define dct_long_mul(out, inq, coeff) \
2532    int32x4_t out##_l = vmull_s16(vget_low_s16(inq), coeff); \
2533    int32x4_t out##_h = vmull_s16(vget_high_s16(inq), coeff)
2534 
2535 #define dct_long_mac(out, acc, inq, coeff) \
2536    int32x4_t out##_l = vmlal_s16(acc##_l, vget_low_s16(inq), coeff); \
2537    int32x4_t out##_h = vmlal_s16(acc##_h, vget_high_s16(inq), coeff)
2538 
2539 #define dct_widen(out, inq) \
2540    int32x4_t out##_l = vshll_n_s16(vget_low_s16(inq), 12); \
2541    int32x4_t out##_h = vshll_n_s16(vget_high_s16(inq), 12)
2542 
2543 // wide add
2544 #define dct_wadd(out, a, b) \
2545    int32x4_t out##_l = vaddq_s32(a##_l, b##_l); \
2546    int32x4_t out##_h = vaddq_s32(a##_h, b##_h)
2547 
2548 // wide sub
2549 #define dct_wsub(out, a, b) \
2550    int32x4_t out##_l = vsubq_s32(a##_l, b##_l); \
2551    int32x4_t out##_h = vsubq_s32(a##_h, b##_h)
2552 
2553 // butterfly a/b, then shift using "shiftop" by "s" and pack
2554 #define dct_bfly32o(out0,out1, a,b,shiftop,s) \
2555    { \
2556       dct_wadd(sum, a, b); \
2557       dct_wsub(dif, a, b); \
2558       out0 = vcombine_s16(shiftop(sum_l, s), shiftop(sum_h, s)); \
2559       out1 = vcombine_s16(shiftop(dif_l, s), shiftop(dif_h, s)); \
2560    }
2561 
2562 #define dct_pass(shiftop, shift) \
2563    { \
2564       /* even part */ \
2565       int16x8_t sum26 = vaddq_s16(row2, row6); \
2566       dct_long_mul(p1e, sum26, rot0_0); \
2567       dct_long_mac(t2e, p1e, row6, rot0_1); \
2568       dct_long_mac(t3e, p1e, row2, rot0_2); \
2569       int16x8_t sum04 = vaddq_s16(row0, row4); \
2570       int16x8_t dif04 = vsubq_s16(row0, row4); \
2571       dct_widen(t0e, sum04); \
2572       dct_widen(t1e, dif04); \
2573       dct_wadd(x0, t0e, t3e); \
2574       dct_wsub(x3, t0e, t3e); \
2575       dct_wadd(x1, t1e, t2e); \
2576       dct_wsub(x2, t1e, t2e); \
2577       /* odd part */ \
2578       int16x8_t sum15 = vaddq_s16(row1, row5); \
2579       int16x8_t sum17 = vaddq_s16(row1, row7); \
2580       int16x8_t sum35 = vaddq_s16(row3, row5); \
2581       int16x8_t sum37 = vaddq_s16(row3, row7); \
2582       int16x8_t sumodd = vaddq_s16(sum17, sum35); \
2583       dct_long_mul(p5o, sumodd, rot1_0); \
2584       dct_long_mac(p1o, p5o, sum17, rot1_1); \
2585       dct_long_mac(p2o, p5o, sum35, rot1_2); \
2586       dct_long_mul(p3o, sum37, rot2_0); \
2587       dct_long_mul(p4o, sum15, rot2_1); \
2588       dct_wadd(sump13o, p1o, p3o); \
2589       dct_wadd(sump24o, p2o, p4o); \
2590       dct_wadd(sump23o, p2o, p3o); \
2591       dct_wadd(sump14o, p1o, p4o); \
2592       dct_long_mac(x4, sump13o, row7, rot3_0); \
2593       dct_long_mac(x5, sump24o, row5, rot3_1); \
2594       dct_long_mac(x6, sump23o, row3, rot3_2); \
2595       dct_long_mac(x7, sump14o, row1, rot3_3); \
2596       dct_bfly32o(row0,row7, x0,x7,shiftop,shift); \
2597       dct_bfly32o(row1,row6, x1,x6,shiftop,shift); \
2598       dct_bfly32o(row2,row5, x2,x5,shiftop,shift); \
2599       dct_bfly32o(row3,row4, x3,x4,shiftop,shift); \
2600    }
2601 
2602    // load
2603    row0 = vld1q_s16(data + 0*8);
2604    row1 = vld1q_s16(data + 1*8);
2605    row2 = vld1q_s16(data + 2*8);
2606    row3 = vld1q_s16(data + 3*8);
2607    row4 = vld1q_s16(data + 4*8);
2608    row5 = vld1q_s16(data + 5*8);
2609    row6 = vld1q_s16(data + 6*8);
2610    row7 = vld1q_s16(data + 7*8);
2611 
2612    // add DC bias
2613    row0 = vaddq_s16(row0, vsetq_lane_s16(1024, vdupq_n_s16(0), 0));
2614 
2615    // column pass
2616    dct_pass(vrshrn_n_s32, 10);
2617 
2618    // 16bit 8x8 transpose
2619    {
2620 // these three map to a single VTRN.16, VTRN.32, and VSWP, respectively.
2621 // whether compilers actually get this is another story, sadly.
2622 #define dct_trn16(x, y) { int16x8x2_t t = vtrnq_s16(x, y); x = t.val[0]; y = t.val[1]; }
2623 #define dct_trn32(x, y) { int32x4x2_t t = vtrnq_s32(vreinterpretq_s32_s16(x), vreinterpretq_s32_s16(y)); x = vreinterpretq_s16_s32(t.val[0]); y = vreinterpretq_s16_s32(t.val[1]); }
2624 #define dct_trn64(x, y) { int16x8_t x0 = x; int16x8_t y0 = y; x = vcombine_s16(vget_low_s16(x0), vget_low_s16(y0)); y = vcombine_s16(vget_high_s16(x0), vget_high_s16(y0)); }
2625 
2626       // pass 1
2627       dct_trn16(row0, row1); // a0b0a2b2a4b4a6b6
2628       dct_trn16(row2, row3);
2629       dct_trn16(row4, row5);
2630       dct_trn16(row6, row7);
2631 
2632       // pass 2
2633       dct_trn32(row0, row2); // a0b0c0d0a4b4c4d4
2634       dct_trn32(row1, row3);
2635       dct_trn32(row4, row6);
2636       dct_trn32(row5, row7);
2637 
2638       // pass 3
2639       dct_trn64(row0, row4); // a0b0c0d0e0f0g0h0
2640       dct_trn64(row1, row5);
2641       dct_trn64(row2, row6);
2642       dct_trn64(row3, row7);
2643 
2644 #undef dct_trn16
2645 #undef dct_trn32
2646 #undef dct_trn64
2647    }
2648 
2649    // row pass
2650    // vrshrn_n_s32 only supports shifts up to 16, we need
2651    // 17. so do a non-rounding shift of 16 first then follow
2652    // up with a rounding shift by 1.
2653    dct_pass(vshrn_n_s32, 16);
2654 
2655    {
2656       // pack and round
2657       uint8x8_t p0 = vqrshrun_n_s16(row0, 1);
2658       uint8x8_t p1 = vqrshrun_n_s16(row1, 1);
2659       uint8x8_t p2 = vqrshrun_n_s16(row2, 1);
2660       uint8x8_t p3 = vqrshrun_n_s16(row3, 1);
2661       uint8x8_t p4 = vqrshrun_n_s16(row4, 1);
2662       uint8x8_t p5 = vqrshrun_n_s16(row5, 1);
2663       uint8x8_t p6 = vqrshrun_n_s16(row6, 1);
2664       uint8x8_t p7 = vqrshrun_n_s16(row7, 1);
2665 
2666       // again, these can translate into one instruction, but often don't.
2667 #define dct_trn8_8(x, y) { uint8x8x2_t t = vtrn_u8(x, y); x = t.val[0]; y = t.val[1]; }
2668 #define dct_trn8_16(x, y) { uint16x4x2_t t = vtrn_u16(vreinterpret_u16_u8(x), vreinterpret_u16_u8(y)); x = vreinterpret_u8_u16(t.val[0]); y = vreinterpret_u8_u16(t.val[1]); }
2669 #define dct_trn8_32(x, y) { uint32x2x2_t t = vtrn_u32(vreinterpret_u32_u8(x), vreinterpret_u32_u8(y)); x = vreinterpret_u8_u32(t.val[0]); y = vreinterpret_u8_u32(t.val[1]); }
2670 
2671       // sadly can't use interleaved stores here since we only write
2672       // 8 bytes to each scan line!
2673 
2674       // 8x8 8-bit transpose pass 1
2675       dct_trn8_8(p0, p1);
2676       dct_trn8_8(p2, p3);
2677       dct_trn8_8(p4, p5);
2678       dct_trn8_8(p6, p7);
2679 
2680       // pass 2
2681       dct_trn8_16(p0, p2);
2682       dct_trn8_16(p1, p3);
2683       dct_trn8_16(p4, p6);
2684       dct_trn8_16(p5, p7);
2685 
2686       // pass 3
2687       dct_trn8_32(p0, p4);
2688       dct_trn8_32(p1, p5);
2689       dct_trn8_32(p2, p6);
2690       dct_trn8_32(p3, p7);
2691 
2692       // store
2693       vst1_u8(out, p0); out += out_stride;
2694       vst1_u8(out, p1); out += out_stride;
2695       vst1_u8(out, p2); out += out_stride;
2696       vst1_u8(out, p3); out += out_stride;
2697       vst1_u8(out, p4); out += out_stride;
2698       vst1_u8(out, p5); out += out_stride;
2699       vst1_u8(out, p6); out += out_stride;
2700       vst1_u8(out, p7);
2701 
2702 #undef dct_trn8_8
2703 #undef dct_trn8_16
2704 #undef dct_trn8_32
2705    }
2706 
2707 #undef dct_long_mul
2708 #undef dct_long_mac
2709 #undef dct_widen
2710 #undef dct_wadd
2711 #undef dct_wsub
2712 #undef dct_bfly32o
2713 #undef dct_pass
2714 }
2715 
2716 #endif // STBI_NEON
2717 
2718 #define STBI__MARKER_none  0xff
2719 // if there's a pending marker from the entropy stream, return that
2720 // otherwise, fetch from the stream and get a marker. if there's no
2721 // marker, return 0xff, which is never a valid marker value
stbi__get_marker(stbi__jpeg * j)2722 static stbi_uc stbi__get_marker(stbi__jpeg *j)
2723 {
2724    stbi_uc x;
2725    if (j->marker != STBI__MARKER_none) { x = j->marker; j->marker = STBI__MARKER_none; return x; }
2726    x = stbi__get8(j->s);
2727    if (x != 0xff) return STBI__MARKER_none;
2728    while (x == 0xff)
2729       x = stbi__get8(j->s); // consume repeated 0xff fill bytes
2730    return x;
2731 }
2732 
2733 // in each scan, we'll have scan_n components, and the order
2734 // of the components is specified by order[]
2735 #define STBI__RESTART(x)     ((x) >= 0xd0 && (x) <= 0xd7)
2736 
2737 // after a restart interval, stbi__jpeg_reset the entropy decoder and
2738 // the dc prediction
stbi__jpeg_reset(stbi__jpeg * j)2739 static void stbi__jpeg_reset(stbi__jpeg *j)
2740 {
2741    j->code_bits = 0;
2742    j->code_buffer = 0;
2743    j->nomore = 0;
2744    j->img_comp[0].dc_pred = j->img_comp[1].dc_pred = j->img_comp[2].dc_pred = j->img_comp[3].dc_pred = 0;
2745    j->marker = STBI__MARKER_none;
2746    j->todo = j->restart_interval ? j->restart_interval : 0x7fffffff;
2747    j->eob_run = 0;
2748    // no more than 1<<31 MCUs if no restart_interal? that's plenty safe,
2749    // since we don't even allow 1<<30 pixels
2750 }
2751 
stbi__parse_entropy_coded_data(stbi__jpeg * z)2752 static int stbi__parse_entropy_coded_data(stbi__jpeg *z)
2753 {
2754    stbi__jpeg_reset(z);
2755    if (!z->progressive) {
2756       if (z->scan_n == 1) {
2757          int i,j;
2758          STBI_SIMD_ALIGN(short, data[64]);
2759          int n = z->order[0];
2760          // non-interleaved data, we just need to process one block at a time,
2761          // in trivial scanline order
2762          // number of blocks to do just depends on how many actual "pixels" this
2763          // component has, independent of interleaved MCU blocking and such
2764          int w = (z->img_comp[n].x+7) >> 3;
2765          int h = (z->img_comp[n].y+7) >> 3;
2766          for (j=0; j < h; ++j) {
2767             for (i=0; i < w; ++i) {
2768                int ha = z->img_comp[n].ha;
2769                if (!stbi__jpeg_decode_block(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+ha, z->fast_ac[ha], n, z->dequant[z->img_comp[n].tq])) return 0;
2770                z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*j*8+i*8, z->img_comp[n].w2, data);
2771                // every data block is an MCU, so countdown the restart interval
2772                if (--z->todo <= 0) {
2773                   if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
2774                   // if it's NOT a restart, then just bail, so we get corrupt data
2775                   // rather than no data
2776                   if (!STBI__RESTART(z->marker)) return 1;
2777                   stbi__jpeg_reset(z);
2778                }
2779             }
2780          }
2781          return 1;
2782       } else { // interleaved
2783          int i,j,k,x,y;
2784          STBI_SIMD_ALIGN(short, data[64]);
2785          for (j=0; j < z->img_mcu_y; ++j) {
2786             for (i=0; i < z->img_mcu_x; ++i) {
2787                // scan an interleaved mcu... process scan_n components in order
2788                for (k=0; k < z->scan_n; ++k) {
2789                   int n = z->order[k];
2790                   // scan out an mcu's worth of this component; that's just determined
2791                   // by the basic H and V specified for the component
2792                   for (y=0; y < z->img_comp[n].v; ++y) {
2793                      for (x=0; x < z->img_comp[n].h; ++x) {
2794                         int x2 = (i*z->img_comp[n].h + x)*8;
2795                         int y2 = (j*z->img_comp[n].v + y)*8;
2796                         int ha = z->img_comp[n].ha;
2797                         if (!stbi__jpeg_decode_block(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+ha, z->fast_ac[ha], n, z->dequant[z->img_comp[n].tq])) return 0;
2798                         z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*y2+x2, z->img_comp[n].w2, data);
2799                      }
2800                   }
2801                }
2802                // after all interleaved components, that's an interleaved MCU,
2803                // so now count down the restart interval
2804                if (--z->todo <= 0) {
2805                   if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
2806                   if (!STBI__RESTART(z->marker)) return 1;
2807                   stbi__jpeg_reset(z);
2808                }
2809             }
2810          }
2811          return 1;
2812       }
2813    } else {
2814       if (z->scan_n == 1) {
2815          int i,j;
2816          int n = z->order[0];
2817          // non-interleaved data, we just need to process one block at a time,
2818          // in trivial scanline order
2819          // number of blocks to do just depends on how many actual "pixels" this
2820          // component has, independent of interleaved MCU blocking and such
2821          int w = (z->img_comp[n].x+7) >> 3;
2822          int h = (z->img_comp[n].y+7) >> 3;
2823          for (j=0; j < h; ++j) {
2824             for (i=0; i < w; ++i) {
2825                short *data = z->img_comp[n].coeff + 64 * (i + j * z->img_comp[n].coeff_w);
2826                if (z->spec_start == 0) {
2827                   if (!stbi__jpeg_decode_block_prog_dc(z, data, &z->huff_dc[z->img_comp[n].hd], n))
2828                      return 0;
2829                } else {
2830                   int ha = z->img_comp[n].ha;
2831                   if (!stbi__jpeg_decode_block_prog_ac(z, data, &z->huff_ac[ha], z->fast_ac[ha]))
2832                      return 0;
2833                }
2834                // every data block is an MCU, so countdown the restart interval
2835                if (--z->todo <= 0) {
2836                   if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
2837                   if (!STBI__RESTART(z->marker)) return 1;
2838                   stbi__jpeg_reset(z);
2839                }
2840             }
2841          }
2842          return 1;
2843       } else { // interleaved
2844          int i,j,k,x,y;
2845          for (j=0; j < z->img_mcu_y; ++j) {
2846             for (i=0; i < z->img_mcu_x; ++i) {
2847                // scan an interleaved mcu... process scan_n components in order
2848                for (k=0; k < z->scan_n; ++k) {
2849                   int n = z->order[k];
2850                   // scan out an mcu's worth of this component; that's just determined
2851                   // by the basic H and V specified for the component
2852                   for (y=0; y < z->img_comp[n].v; ++y) {
2853                      for (x=0; x < z->img_comp[n].h; ++x) {
2854                         int x2 = (i*z->img_comp[n].h + x);
2855                         int y2 = (j*z->img_comp[n].v + y);
2856                         short *data = z->img_comp[n].coeff + 64 * (x2 + y2 * z->img_comp[n].coeff_w);
2857                         if (!stbi__jpeg_decode_block_prog_dc(z, data, &z->huff_dc[z->img_comp[n].hd], n))
2858                            return 0;
2859                      }
2860                   }
2861                }
2862                // after all interleaved components, that's an interleaved MCU,
2863                // so now count down the restart interval
2864                if (--z->todo <= 0) {
2865                   if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
2866                   if (!STBI__RESTART(z->marker)) return 1;
2867                   stbi__jpeg_reset(z);
2868                }
2869             }
2870          }
2871          return 1;
2872       }
2873    }
2874 }
2875 
stbi__jpeg_dequantize(short * data,stbi__uint16 * dequant)2876 static void stbi__jpeg_dequantize(short *data, stbi__uint16 *dequant)
2877 {
2878    int i;
2879    for (i=0; i < 64; ++i)
2880       data[i] *= dequant[i];
2881 }
2882 
stbi__jpeg_finish(stbi__jpeg * z)2883 static void stbi__jpeg_finish(stbi__jpeg *z)
2884 {
2885    if (z->progressive) {
2886       // dequantize and idct the data
2887       int i,j,n;
2888       for (n=0; n < z->s->img_n; ++n) {
2889          int w = (z->img_comp[n].x+7) >> 3;
2890          int h = (z->img_comp[n].y+7) >> 3;
2891          for (j=0; j < h; ++j) {
2892             for (i=0; i < w; ++i) {
2893                short *data = z->img_comp[n].coeff + 64 * (i + j * z->img_comp[n].coeff_w);
2894                stbi__jpeg_dequantize(data, z->dequant[z->img_comp[n].tq]);
2895                z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*j*8+i*8, z->img_comp[n].w2, data);
2896             }
2897          }
2898       }
2899    }
2900 }
2901 
stbi__process_marker(stbi__jpeg * z,int m)2902 static int stbi__process_marker(stbi__jpeg *z, int m)
2903 {
2904    int L;
2905    switch (m) {
2906       case STBI__MARKER_none: // no marker found
2907          return stbi__err("expected marker","Corrupt JPEG");
2908 
2909       case 0xDD: // DRI - specify restart interval
2910          if (stbi__get16be(z->s) != 4) return stbi__err("bad DRI len","Corrupt JPEG");
2911          z->restart_interval = stbi__get16be(z->s);
2912          return 1;
2913 
2914       case 0xDB: // DQT - define quantization table
2915          L = stbi__get16be(z->s)-2;
2916          while (L > 0) {
2917             int q = stbi__get8(z->s);
2918             int p = q >> 4, sixteen = (p != 0);
2919             int t = q & 15,i;
2920             if (p != 0 && p != 1) return stbi__err("bad DQT type","Corrupt JPEG");
2921             if (t > 3) return stbi__err("bad DQT table","Corrupt JPEG");
2922 
2923             for (i=0; i < 64; ++i)
2924                z->dequant[t][stbi__jpeg_dezigzag[i]] = (stbi__uint16)(sixteen ? stbi__get16be(z->s) : stbi__get8(z->s));
2925             L -= (sixteen ? 129 : 65);
2926          }
2927          return L==0;
2928 
2929       case 0xC4: // DHT - define huffman table
2930          L = stbi__get16be(z->s)-2;
2931          while (L > 0) {
2932             stbi_uc *v;
2933             int sizes[16],i,n=0;
2934             int q = stbi__get8(z->s);
2935             int tc = q >> 4;
2936             int th = q & 15;
2937             if (tc > 1 || th > 3) return stbi__err("bad DHT header","Corrupt JPEG");
2938             for (i=0; i < 16; ++i) {
2939                sizes[i] = stbi__get8(z->s);
2940                n += sizes[i];
2941             }
2942             L -= 17;
2943             if (tc == 0) {
2944                if (!stbi__build_huffman(z->huff_dc+th, sizes)) return 0;
2945                v = z->huff_dc[th].values;
2946             } else {
2947                if (!stbi__build_huffman(z->huff_ac+th, sizes)) return 0;
2948                v = z->huff_ac[th].values;
2949             }
2950             for (i=0; i < n; ++i)
2951                v[i] = stbi__get8(z->s);
2952             if (tc != 0)
2953                stbi__build_fast_ac(z->fast_ac[th], z->huff_ac + th);
2954             L -= n;
2955          }
2956          return L==0;
2957    }
2958 
2959    // check for comment block or APP blocks
2960    if ((m >= 0xE0 && m <= 0xEF) || m == 0xFE) {
2961       L = stbi__get16be(z->s);
2962       if (L < 2) {
2963          if (m == 0xFE)
2964             return stbi__err("bad COM len","Corrupt JPEG");
2965          else
2966             return stbi__err("bad APP len","Corrupt JPEG");
2967       }
2968       L -= 2;
2969 
2970       if (m == 0xE0 && L >= 5) { // JFIF APP0 segment
2971          static const unsigned char tag[5] = {'J','F','I','F','\0'};
2972          int ok = 1;
2973          int i;
2974          for (i=0; i < 5; ++i)
2975             if (stbi__get8(z->s) != tag[i])
2976                ok = 0;
2977          L -= 5;
2978          if (ok)
2979             z->jfif = 1;
2980       } else if (m == 0xEE && L >= 12) { // Adobe APP14 segment
2981          static const unsigned char tag[6] = {'A','d','o','b','e','\0'};
2982          int ok = 1;
2983          int i;
2984          for (i=0; i < 6; ++i)
2985             if (stbi__get8(z->s) != tag[i])
2986                ok = 0;
2987          L -= 6;
2988          if (ok) {
2989             stbi__get8(z->s); // version
2990             stbi__get16be(z->s); // flags0
2991             stbi__get16be(z->s); // flags1
2992             z->app14_color_transform = stbi__get8(z->s); // color transform
2993             L -= 6;
2994          }
2995       }
2996 
2997       stbi__skip(z->s, L);
2998       return 1;
2999    }
3000 
3001    return stbi__err("unknown marker","Corrupt JPEG");
3002 }
3003 
3004 // after we see SOS
stbi__process_scan_header(stbi__jpeg * z)3005 static int stbi__process_scan_header(stbi__jpeg *z)
3006 {
3007    int i;
3008    int Ls = stbi__get16be(z->s);
3009    z->scan_n = stbi__get8(z->s);
3010    if (z->scan_n < 1 || z->scan_n > 4 || z->scan_n > (int) z->s->img_n) return stbi__err("bad SOS component count","Corrupt JPEG");
3011    if (Ls != 6+2*z->scan_n) return stbi__err("bad SOS len","Corrupt JPEG");
3012    for (i=0; i < z->scan_n; ++i) {
3013       int id = stbi__get8(z->s), which;
3014       int q = stbi__get8(z->s);
3015       for (which = 0; which < z->s->img_n; ++which)
3016          if (z->img_comp[which].id == id)
3017             break;
3018       if (which == z->s->img_n) return 0; // no match
3019       z->img_comp[which].hd = q >> 4;   if (z->img_comp[which].hd > 3) return stbi__err("bad DC huff","Corrupt JPEG");
3020       z->img_comp[which].ha = q & 15;   if (z->img_comp[which].ha > 3) return stbi__err("bad AC huff","Corrupt JPEG");
3021       z->order[i] = which;
3022    }
3023 
3024    {
3025       int aa;
3026       z->spec_start = stbi__get8(z->s);
3027       z->spec_end   = stbi__get8(z->s); // should be 63, but might be 0
3028       aa = stbi__get8(z->s);
3029       z->succ_high = (aa >> 4);
3030       z->succ_low  = (aa & 15);
3031       if (z->progressive) {
3032          if (z->spec_start > 63 || z->spec_end > 63  || z->spec_start > z->spec_end || z->succ_high > 13 || z->succ_low > 13)
3033             return stbi__err("bad SOS", "Corrupt JPEG");
3034       } else {
3035          if (z->spec_start != 0) return stbi__err("bad SOS","Corrupt JPEG");
3036          if (z->succ_high != 0 || z->succ_low != 0) return stbi__err("bad SOS","Corrupt JPEG");
3037          z->spec_end = 63;
3038       }
3039    }
3040 
3041    return 1;
3042 }
3043 
stbi__free_jpeg_components(stbi__jpeg * z,int ncomp,int why)3044 static int stbi__free_jpeg_components(stbi__jpeg *z, int ncomp, int why)
3045 {
3046    int i;
3047    for (i=0; i < ncomp; ++i) {
3048       if (z->img_comp[i].raw_data) {
3049          STBI_FREE(z->img_comp[i].raw_data);
3050          z->img_comp[i].raw_data = NULL;
3051          z->img_comp[i].data = NULL;
3052       }
3053       if (z->img_comp[i].raw_coeff) {
3054          STBI_FREE(z->img_comp[i].raw_coeff);
3055          z->img_comp[i].raw_coeff = 0;
3056          z->img_comp[i].coeff = 0;
3057       }
3058       if (z->img_comp[i].linebuf) {
3059          STBI_FREE(z->img_comp[i].linebuf);
3060          z->img_comp[i].linebuf = NULL;
3061       }
3062    }
3063    return why;
3064 }
3065 
stbi__process_frame_header(stbi__jpeg * z,int scan)3066 static int stbi__process_frame_header(stbi__jpeg *z, int scan)
3067 {
3068    stbi__context *s = z->s;
3069    int Lf,p,i,q, h_max=1,v_max=1,c;
3070    Lf = stbi__get16be(s);         if (Lf < 11) return stbi__err("bad SOF len","Corrupt JPEG"); // JPEG
3071    p  = stbi__get8(s);            if (p != 8) return stbi__err("only 8-bit","JPEG format not supported: 8-bit only"); // JPEG baseline
3072    s->img_y = stbi__get16be(s);   if (s->img_y == 0) return stbi__err("no header height", "JPEG format not supported: delayed height"); // Legal, but we don't handle it--but neither does IJG
3073    s->img_x = stbi__get16be(s);   if (s->img_x == 0) return stbi__err("0 width","Corrupt JPEG"); // JPEG requires
3074    c = stbi__get8(s);
3075    if (c != 3 && c != 1 && c != 4) return stbi__err("bad component count","Corrupt JPEG");
3076    s->img_n = c;
3077    for (i=0; i < c; ++i) {
3078       z->img_comp[i].data = NULL;
3079       z->img_comp[i].linebuf = NULL;
3080    }
3081 
3082    if (Lf != 8+3*s->img_n) return stbi__err("bad SOF len","Corrupt JPEG");
3083 
3084    z->rgb = 0;
3085    for (i=0; i < s->img_n; ++i) {
3086       static const unsigned char rgb[3] = { 'R', 'G', 'B' };
3087       z->img_comp[i].id = stbi__get8(s);
3088       if (s->img_n == 3 && z->img_comp[i].id == rgb[i])
3089          ++z->rgb;
3090       q = stbi__get8(s);
3091       z->img_comp[i].h = (q >> 4);  if (!z->img_comp[i].h || z->img_comp[i].h > 4) return stbi__err("bad H","Corrupt JPEG");
3092       z->img_comp[i].v = q & 15;    if (!z->img_comp[i].v || z->img_comp[i].v > 4) return stbi__err("bad V","Corrupt JPEG");
3093       z->img_comp[i].tq = stbi__get8(s);  if (z->img_comp[i].tq > 3) return stbi__err("bad TQ","Corrupt JPEG");
3094    }
3095 
3096    if (scan != STBI__SCAN_load) return 1;
3097 
3098    if (!stbi__mad3sizes_valid(s->img_x, s->img_y, s->img_n, 0)) return stbi__err("too large", "Image too large to decode");
3099 
3100    for (i=0; i < s->img_n; ++i) {
3101       if (z->img_comp[i].h > h_max) h_max = z->img_comp[i].h;
3102       if (z->img_comp[i].v > v_max) v_max = z->img_comp[i].v;
3103    }
3104 
3105    // compute interleaved mcu info
3106    z->img_h_max = h_max;
3107    z->img_v_max = v_max;
3108    z->img_mcu_w = h_max * 8;
3109    z->img_mcu_h = v_max * 8;
3110    // these sizes can't be more than 17 bits
3111    z->img_mcu_x = (s->img_x + z->img_mcu_w-1) / z->img_mcu_w;
3112    z->img_mcu_y = (s->img_y + z->img_mcu_h-1) / z->img_mcu_h;
3113 
3114    for (i=0; i < s->img_n; ++i) {
3115       // number of effective pixels (e.g. for non-interleaved MCU)
3116       z->img_comp[i].x = (s->img_x * z->img_comp[i].h + h_max-1) / h_max;
3117       z->img_comp[i].y = (s->img_y * z->img_comp[i].v + v_max-1) / v_max;
3118       // to simplify generation, we'll allocate enough memory to decode
3119       // the bogus oversized data from using interleaved MCUs and their
3120       // big blocks (e.g. a 16x16 iMCU on an image of width 33); we won't
3121       // discard the extra data until colorspace conversion
3122       //
3123       // img_mcu_x, img_mcu_y: <=17 bits; comp[i].h and .v are <=4 (checked earlier)
3124       // so these muls can't overflow with 32-bit ints (which we require)
3125       z->img_comp[i].w2 = z->img_mcu_x * z->img_comp[i].h * 8;
3126       z->img_comp[i].h2 = z->img_mcu_y * z->img_comp[i].v * 8;
3127       z->img_comp[i].coeff = 0;
3128       z->img_comp[i].raw_coeff = 0;
3129       z->img_comp[i].linebuf = NULL;
3130       z->img_comp[i].raw_data = stbi__malloc_mad2(z->img_comp[i].w2, z->img_comp[i].h2, 15);
3131       if (z->img_comp[i].raw_data == NULL)
3132          return stbi__free_jpeg_components(z, i+1, stbi__err("outofmem", "Out of memory"));
3133       // align blocks for idct using mmx/sse
3134       z->img_comp[i].data = (stbi_uc*) (((size_t) z->img_comp[i].raw_data + 15) & ~15);
3135       if (z->progressive) {
3136          // w2, h2 are multiples of 8 (see above)
3137          z->img_comp[i].coeff_w = z->img_comp[i].w2 / 8;
3138          z->img_comp[i].coeff_h = z->img_comp[i].h2 / 8;
3139          z->img_comp[i].raw_coeff = stbi__malloc_mad3(z->img_comp[i].w2, z->img_comp[i].h2, sizeof(short), 15);
3140          if (z->img_comp[i].raw_coeff == NULL)
3141             return stbi__free_jpeg_components(z, i+1, stbi__err("outofmem", "Out of memory"));
3142          z->img_comp[i].coeff = (short*) (((size_t) z->img_comp[i].raw_coeff + 15) & ~15);
3143       }
3144    }
3145 
3146    return 1;
3147 }
3148 
3149 // use comparisons since in some cases we handle more than one case (e.g. SOF)
3150 #define stbi__DNL(x)         ((x) == 0xdc)
3151 #define stbi__SOI(x)         ((x) == 0xd8)
3152 #define stbi__EOI(x)         ((x) == 0xd9)
3153 #define stbi__SOF(x)         ((x) == 0xc0 || (x) == 0xc1 || (x) == 0xc2)
3154 #define stbi__SOS(x)         ((x) == 0xda)
3155 
3156 #define stbi__SOF_progressive(x)   ((x) == 0xc2)
3157 
stbi__decode_jpeg_header(stbi__jpeg * z,int scan)3158 static int stbi__decode_jpeg_header(stbi__jpeg *z, int scan)
3159 {
3160    int m;
3161    z->jfif = 0;
3162    z->app14_color_transform = -1; // valid values are 0,1,2
3163    z->marker = STBI__MARKER_none; // initialize cached marker to empty
3164    m = stbi__get_marker(z);
3165    if (!stbi__SOI(m)) return stbi__err("no SOI","Corrupt JPEG");
3166    if (scan == STBI__SCAN_type) return 1;
3167    m = stbi__get_marker(z);
3168    while (!stbi__SOF(m)) {
3169       if (!stbi__process_marker(z,m)) return 0;
3170       m = stbi__get_marker(z);
3171       while (m == STBI__MARKER_none) {
3172          // some files have extra padding after their blocks, so ok, we'll scan
3173          if (stbi__at_eof(z->s)) return stbi__err("no SOF", "Corrupt JPEG");
3174          m = stbi__get_marker(z);
3175       }
3176    }
3177    z->progressive = stbi__SOF_progressive(m);
3178    if (!stbi__process_frame_header(z, scan)) return 0;
3179    return 1;
3180 }
3181 
3182 // decode image to YCbCr format
stbi__decode_jpeg_image(stbi__jpeg * j)3183 static int stbi__decode_jpeg_image(stbi__jpeg *j)
3184 {
3185    int m;
3186    for (m = 0; m < 4; m++) {
3187       j->img_comp[m].raw_data = NULL;
3188       j->img_comp[m].raw_coeff = NULL;
3189    }
3190    j->restart_interval = 0;
3191    if (!stbi__decode_jpeg_header(j, STBI__SCAN_load)) return 0;
3192    m = stbi__get_marker(j);
3193    while (!stbi__EOI(m)) {
3194       if (stbi__SOS(m)) {
3195          if (!stbi__process_scan_header(j)) return 0;
3196          if (!stbi__parse_entropy_coded_data(j)) return 0;
3197          if (j->marker == STBI__MARKER_none ) {
3198             // handle 0s at the end of image data from IP Kamera 9060
3199             while (!stbi__at_eof(j->s)) {
3200                int x = stbi__get8(j->s);
3201                if (x == 255) {
3202                   j->marker = stbi__get8(j->s);
3203                   break;
3204                }
3205             }
3206             // if we reach eof without hitting a marker, stbi__get_marker() below will fail and we'll eventually return 0
3207          }
3208       } else if (stbi__DNL(m)) {
3209          int Ld = stbi__get16be(j->s);
3210          stbi__uint32 NL = stbi__get16be(j->s);
3211          if (Ld != 4) return stbi__err("bad DNL len", "Corrupt JPEG");
3212          if (NL != j->s->img_y) return stbi__err("bad DNL height", "Corrupt JPEG");
3213       } else {
3214          if (!stbi__process_marker(j, m)) return 0;
3215       }
3216       m = stbi__get_marker(j);
3217    }
3218    if (j->progressive)
3219       stbi__jpeg_finish(j);
3220    return 1;
3221 }
3222 
3223 // static jfif-centered resampling (across block boundaries)
3224 
3225 typedef stbi_uc *(*resample_row_func)(stbi_uc *out, stbi_uc *in0, stbi_uc *in1,
3226                                     int w, int hs);
3227 
3228 #define stbi__div4(x) ((stbi_uc) ((x) >> 2))
3229 
resample_row_1(stbi_uc * out,stbi_uc * in_near,stbi_uc * in_far,int w,int hs)3230 static stbi_uc *resample_row_1(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
3231 {
3232    STBI_NOTUSED(out);
3233    STBI_NOTUSED(in_far);
3234    STBI_NOTUSED(w);
3235    STBI_NOTUSED(hs);
3236    return in_near;
3237 }
3238 
stbi__resample_row_v_2(stbi_uc * out,stbi_uc * in_near,stbi_uc * in_far,int w,int hs)3239 static stbi_uc* stbi__resample_row_v_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
3240 {
3241    // need to generate two samples vertically for every one in input
3242    int i;
3243    STBI_NOTUSED(hs);
3244    for (i=0; i < w; ++i)
3245       out[i] = stbi__div4(3*in_near[i] + in_far[i] + 2);
3246    return out;
3247 }
3248 
stbi__resample_row_h_2(stbi_uc * out,stbi_uc * in_near,stbi_uc * in_far,int w,int hs)3249 static stbi_uc*  stbi__resample_row_h_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
3250 {
3251    // need to generate two samples horizontally for every one in input
3252    int i;
3253    stbi_uc *input = in_near;
3254 
3255    if (w == 1) {
3256       // if only one sample, can't do any interpolation
3257       out[0] = out[1] = input[0];
3258       return out;
3259    }
3260 
3261    out[0] = input[0];
3262    out[1] = stbi__div4(input[0]*3 + input[1] + 2);
3263    for (i=1; i < w-1; ++i) {
3264       int n = 3*input[i]+2;
3265       out[i*2+0] = stbi__div4(n+input[i-1]);
3266       out[i*2+1] = stbi__div4(n+input[i+1]);
3267    }
3268    out[i*2+0] = stbi__div4(input[w-2]*3 + input[w-1] + 2);
3269    out[i*2+1] = input[w-1];
3270 
3271    STBI_NOTUSED(in_far);
3272    STBI_NOTUSED(hs);
3273 
3274    return out;
3275 }
3276 
3277 #define stbi__div16(x) ((stbi_uc) ((x) >> 4))
3278 
stbi__resample_row_hv_2(stbi_uc * out,stbi_uc * in_near,stbi_uc * in_far,int w,int hs)3279 static stbi_uc *stbi__resample_row_hv_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
3280 {
3281    // need to generate 2x2 samples for every one in input
3282    int i,t0,t1;
3283    if (w == 1) {
3284       out[0] = out[1] = stbi__div4(3*in_near[0] + in_far[0] + 2);
3285       return out;
3286    }
3287 
3288    t1 = 3*in_near[0] + in_far[0];
3289    out[0] = stbi__div4(t1+2);
3290    for (i=1; i < w; ++i) {
3291       t0 = t1;
3292       t1 = 3*in_near[i]+in_far[i];
3293       out[i*2-1] = stbi__div16(3*t0 + t1 + 8);
3294       out[i*2  ] = stbi__div16(3*t1 + t0 + 8);
3295    }
3296    out[w*2-1] = stbi__div4(t1+2);
3297 
3298    STBI_NOTUSED(hs);
3299 
3300    return out;
3301 }
3302 
3303 #if defined(STBI_SSE2) || defined(STBI_NEON)
stbi__resample_row_hv_2_simd(stbi_uc * out,stbi_uc * in_near,stbi_uc * in_far,int w,int hs)3304 static stbi_uc *stbi__resample_row_hv_2_simd(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
3305 {
3306    // need to generate 2x2 samples for every one in input
3307    int i=0,t0,t1;
3308 
3309    if (w == 1) {
3310       out[0] = out[1] = stbi__div4(3*in_near[0] + in_far[0] + 2);
3311       return out;
3312    }
3313 
3314    t1 = 3*in_near[0] + in_far[0];
3315    // process groups of 8 pixels for as long as we can.
3316    // note we can't handle the last pixel in a row in this loop
3317    // because we need to handle the filter boundary conditions.
3318    for (; i < ((w-1) & ~7); i += 8) {
3319 #if defined(STBI_SSE2)
3320       // load and perform the vertical filtering pass
3321       // this uses 3*x + y = 4*x + (y - x)
3322       __m128i zero  = _mm_setzero_si128();
3323       __m128i farb  = _mm_loadl_epi64((__m128i *) (in_far + i));
3324       __m128i nearb = _mm_loadl_epi64((__m128i *) (in_near + i));
3325       __m128i farw  = _mm_unpacklo_epi8(farb, zero);
3326       __m128i nearw = _mm_unpacklo_epi8(nearb, zero);
3327       __m128i diff  = _mm_sub_epi16(farw, nearw);
3328       __m128i nears = _mm_slli_epi16(nearw, 2);
3329       __m128i curr  = _mm_add_epi16(nears, diff); // current row
3330 
3331       // horizontal filter works the same based on shifted vers of current
3332       // row. "prev" is current row shifted right by 1 pixel; we need to
3333       // insert the previous pixel value (from t1).
3334       // "next" is current row shifted left by 1 pixel, with first pixel
3335       // of next block of 8 pixels added in.
3336       __m128i prv0 = _mm_slli_si128(curr, 2);
3337       __m128i nxt0 = _mm_srli_si128(curr, 2);
3338       __m128i prev = _mm_insert_epi16(prv0, t1, 0);
3339       __m128i next = _mm_insert_epi16(nxt0, 3*in_near[i+8] + in_far[i+8], 7);
3340 
3341       // horizontal filter, polyphase implementation since it's convenient:
3342       // even pixels = 3*cur + prev = cur*4 + (prev - cur)
3343       // odd  pixels = 3*cur + next = cur*4 + (next - cur)
3344       // note the shared term.
3345       __m128i bias  = _mm_set1_epi16(8);
3346       __m128i curs = _mm_slli_epi16(curr, 2);
3347       __m128i prvd = _mm_sub_epi16(prev, curr);
3348       __m128i nxtd = _mm_sub_epi16(next, curr);
3349       __m128i curb = _mm_add_epi16(curs, bias);
3350       __m128i even = _mm_add_epi16(prvd, curb);
3351       __m128i odd  = _mm_add_epi16(nxtd, curb);
3352 
3353       // interleave even and odd pixels, then undo scaling.
3354       __m128i int0 = _mm_unpacklo_epi16(even, odd);
3355       __m128i int1 = _mm_unpackhi_epi16(even, odd);
3356       __m128i de0  = _mm_srli_epi16(int0, 4);
3357       __m128i de1  = _mm_srli_epi16(int1, 4);
3358 
3359       // pack and write output
3360       __m128i outv = _mm_packus_epi16(de0, de1);
3361       _mm_storeu_si128((__m128i *) (out + i*2), outv);
3362 #elif defined(STBI_NEON)
3363       // load and perform the vertical filtering pass
3364       // this uses 3*x + y = 4*x + (y - x)
3365       uint8x8_t farb  = vld1_u8(in_far + i);
3366       uint8x8_t nearb = vld1_u8(in_near + i);
3367       int16x8_t diff  = vreinterpretq_s16_u16(vsubl_u8(farb, nearb));
3368       int16x8_t nears = vreinterpretq_s16_u16(vshll_n_u8(nearb, 2));
3369       int16x8_t curr  = vaddq_s16(nears, diff); // current row
3370 
3371       // horizontal filter works the same based on shifted vers of current
3372       // row. "prev" is current row shifted right by 1 pixel; we need to
3373       // insert the previous pixel value (from t1).
3374       // "next" is current row shifted left by 1 pixel, with first pixel
3375       // of next block of 8 pixels added in.
3376       int16x8_t prv0 = vextq_s16(curr, curr, 7);
3377       int16x8_t nxt0 = vextq_s16(curr, curr, 1);
3378       int16x8_t prev = vsetq_lane_s16(t1, prv0, 0);
3379       int16x8_t next = vsetq_lane_s16(3*in_near[i+8] + in_far[i+8], nxt0, 7);
3380 
3381       // horizontal filter, polyphase implementation since it's convenient:
3382       // even pixels = 3*cur + prev = cur*4 + (prev - cur)
3383       // odd  pixels = 3*cur + next = cur*4 + (next - cur)
3384       // note the shared term.
3385       int16x8_t curs = vshlq_n_s16(curr, 2);
3386       int16x8_t prvd = vsubq_s16(prev, curr);
3387       int16x8_t nxtd = vsubq_s16(next, curr);
3388       int16x8_t even = vaddq_s16(curs, prvd);
3389       int16x8_t odd  = vaddq_s16(curs, nxtd);
3390 
3391       // undo scaling and round, then store with even/odd phases interleaved
3392       uint8x8x2_t o;
3393       o.val[0] = vqrshrun_n_s16(even, 4);
3394       o.val[1] = vqrshrun_n_s16(odd,  4);
3395       vst2_u8(out + i*2, o);
3396 #endif
3397 
3398       // "previous" value for next iter
3399       t1 = 3*in_near[i+7] + in_far[i+7];
3400    }
3401 
3402    t0 = t1;
3403    t1 = 3*in_near[i] + in_far[i];
3404    out[i*2] = stbi__div16(3*t1 + t0 + 8);
3405 
3406    for (++i; i < w; ++i) {
3407       t0 = t1;
3408       t1 = 3*in_near[i]+in_far[i];
3409       out[i*2-1] = stbi__div16(3*t0 + t1 + 8);
3410       out[i*2  ] = stbi__div16(3*t1 + t0 + 8);
3411    }
3412    out[w*2-1] = stbi__div4(t1+2);
3413 
3414    STBI_NOTUSED(hs);
3415 
3416    return out;
3417 }
3418 #endif
3419 
stbi__resample_row_generic(stbi_uc * out,stbi_uc * in_near,stbi_uc * in_far,int w,int hs)3420 static stbi_uc *stbi__resample_row_generic(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
3421 {
3422    // resample with nearest-neighbor
3423    int i,j;
3424    STBI_NOTUSED(in_far);
3425    for (i=0; i < w; ++i)
3426       for (j=0; j < hs; ++j)
3427          out[i*hs+j] = in_near[i];
3428    return out;
3429 }
3430 
3431 // this is a reduced-precision calculation of YCbCr-to-RGB introduced
3432 // to make sure the code produces the same results in both SIMD and scalar
3433 #define stbi__float2fixed(x)  (((int) ((x) * 4096.0f + 0.5f)) << 8)
stbi__YCbCr_to_RGB_row(stbi_uc * out,const stbi_uc * y,const stbi_uc * pcb,const stbi_uc * pcr,int count,int step)3434 static void stbi__YCbCr_to_RGB_row(stbi_uc *out, const stbi_uc *y, const stbi_uc *pcb, const stbi_uc *pcr, int count, int step)
3435 {
3436    int i;
3437    for (i=0; i < count; ++i) {
3438       int y_fixed = (y[i] << 20) + (1<<19); // rounding
3439       int r,g,b;
3440       int cr = pcr[i] - 128;
3441       int cb = pcb[i] - 128;
3442       r = y_fixed +  cr* stbi__float2fixed(1.40200f);
3443       g = y_fixed + (cr*-stbi__float2fixed(0.71414f)) + ((cb*-stbi__float2fixed(0.34414f)) & 0xffff0000);
3444       b = y_fixed                                     +   cb* stbi__float2fixed(1.77200f);
3445       r >>= 20;
3446       g >>= 20;
3447       b >>= 20;
3448       if ((unsigned) r > 255) { if (r < 0) r = 0; else r = 255; }
3449       if ((unsigned) g > 255) { if (g < 0) g = 0; else g = 255; }
3450       if ((unsigned) b > 255) { if (b < 0) b = 0; else b = 255; }
3451       out[0] = (stbi_uc)r;
3452       out[1] = (stbi_uc)g;
3453       out[2] = (stbi_uc)b;
3454       out[3] = 255;
3455       out += step;
3456    }
3457 }
3458 
3459 #if defined(STBI_SSE2) || defined(STBI_NEON)
stbi__YCbCr_to_RGB_simd(stbi_uc * out,stbi_uc const * y,stbi_uc const * pcb,stbi_uc const * pcr,int count,int step)3460 static void stbi__YCbCr_to_RGB_simd(stbi_uc *out, stbi_uc const *y, stbi_uc const *pcb, stbi_uc const *pcr, int count, int step)
3461 {
3462    int i = 0;
3463 
3464 #ifdef STBI_SSE2
3465    // step == 3 is pretty ugly on the final interleave, and i'm not convinced
3466    // it's useful in practice (you wouldn't use it for textures, for example).
3467    // so just accelerate step == 4 case.
3468    if (step == 4) {
3469       // this is a fairly straightforward implementation and not super-optimized.
3470       __m128i signflip  = _mm_set1_epi8(-0x80);
3471       __m128i cr_const0 = _mm_set1_epi16(   (short) ( 1.40200f*4096.0f+0.5f));
3472       __m128i cr_const1 = _mm_set1_epi16( - (short) ( 0.71414f*4096.0f+0.5f));
3473       __m128i cb_const0 = _mm_set1_epi16( - (short) ( 0.34414f*4096.0f+0.5f));
3474       __m128i cb_const1 = _mm_set1_epi16(   (short) ( 1.77200f*4096.0f+0.5f));
3475       __m128i y_bias = _mm_set1_epi8((char) (unsigned char) 128);
3476       __m128i xw = _mm_set1_epi16(255); // alpha channel
3477 
3478       for (; i+7 < count; i += 8) {
3479          // load
3480          __m128i y_bytes = _mm_loadl_epi64((__m128i *) (y+i));
3481          __m128i cr_bytes = _mm_loadl_epi64((__m128i *) (pcr+i));
3482          __m128i cb_bytes = _mm_loadl_epi64((__m128i *) (pcb+i));
3483          __m128i cr_biased = _mm_xor_si128(cr_bytes, signflip); // -128
3484          __m128i cb_biased = _mm_xor_si128(cb_bytes, signflip); // -128
3485 
3486          // unpack to short (and left-shift cr, cb by 8)
3487          __m128i yw  = _mm_unpacklo_epi8(y_bias, y_bytes);
3488          __m128i crw = _mm_unpacklo_epi8(_mm_setzero_si128(), cr_biased);
3489          __m128i cbw = _mm_unpacklo_epi8(_mm_setzero_si128(), cb_biased);
3490 
3491          // color transform
3492          __m128i yws = _mm_srli_epi16(yw, 4);
3493          __m128i cr0 = _mm_mulhi_epi16(cr_const0, crw);
3494          __m128i cb0 = _mm_mulhi_epi16(cb_const0, cbw);
3495          __m128i cb1 = _mm_mulhi_epi16(cbw, cb_const1);
3496          __m128i cr1 = _mm_mulhi_epi16(crw, cr_const1);
3497          __m128i rws = _mm_add_epi16(cr0, yws);
3498          __m128i gwt = _mm_add_epi16(cb0, yws);
3499          __m128i bws = _mm_add_epi16(yws, cb1);
3500          __m128i gws = _mm_add_epi16(gwt, cr1);
3501 
3502          // descale
3503          __m128i rw = _mm_srai_epi16(rws, 4);
3504          __m128i bw = _mm_srai_epi16(bws, 4);
3505          __m128i gw = _mm_srai_epi16(gws, 4);
3506 
3507          // back to byte, set up for transpose
3508          __m128i brb = _mm_packus_epi16(rw, bw);
3509          __m128i gxb = _mm_packus_epi16(gw, xw);
3510 
3511          // transpose to interleave channels
3512          __m128i t0 = _mm_unpacklo_epi8(brb, gxb);
3513          __m128i t1 = _mm_unpackhi_epi8(brb, gxb);
3514          __m128i o0 = _mm_unpacklo_epi16(t0, t1);
3515          __m128i o1 = _mm_unpackhi_epi16(t0, t1);
3516 
3517          // store
3518          _mm_storeu_si128((__m128i *) (out + 0), o0);
3519          _mm_storeu_si128((__m128i *) (out + 16), o1);
3520          out += 32;
3521       }
3522    }
3523 #endif
3524 
3525 #ifdef STBI_NEON
3526    // in this version, step=3 support would be easy to add. but is there demand?
3527    if (step == 4) {
3528       // this is a fairly straightforward implementation and not super-optimized.
3529       uint8x8_t signflip = vdup_n_u8(0x80);
3530       int16x8_t cr_const0 = vdupq_n_s16(   (short) ( 1.40200f*4096.0f+0.5f));
3531       int16x8_t cr_const1 = vdupq_n_s16( - (short) ( 0.71414f*4096.0f+0.5f));
3532       int16x8_t cb_const0 = vdupq_n_s16( - (short) ( 0.34414f*4096.0f+0.5f));
3533       int16x8_t cb_const1 = vdupq_n_s16(   (short) ( 1.77200f*4096.0f+0.5f));
3534 
3535       for (; i+7 < count; i += 8) {
3536          // load
3537          uint8x8_t y_bytes  = vld1_u8(y + i);
3538          uint8x8_t cr_bytes = vld1_u8(pcr + i);
3539          uint8x8_t cb_bytes = vld1_u8(pcb + i);
3540          int8x8_t cr_biased = vreinterpret_s8_u8(vsub_u8(cr_bytes, signflip));
3541          int8x8_t cb_biased = vreinterpret_s8_u8(vsub_u8(cb_bytes, signflip));
3542 
3543          // expand to s16
3544          int16x8_t yws = vreinterpretq_s16_u16(vshll_n_u8(y_bytes, 4));
3545          int16x8_t crw = vshll_n_s8(cr_biased, 7);
3546          int16x8_t cbw = vshll_n_s8(cb_biased, 7);
3547 
3548          // color transform
3549          int16x8_t cr0 = vqdmulhq_s16(crw, cr_const0);
3550          int16x8_t cb0 = vqdmulhq_s16(cbw, cb_const0);
3551          int16x8_t cr1 = vqdmulhq_s16(crw, cr_const1);
3552          int16x8_t cb1 = vqdmulhq_s16(cbw, cb_const1);
3553          int16x8_t rws = vaddq_s16(yws, cr0);
3554          int16x8_t gws = vaddq_s16(vaddq_s16(yws, cb0), cr1);
3555          int16x8_t bws = vaddq_s16(yws, cb1);
3556 
3557          // undo scaling, round, convert to byte
3558          uint8x8x4_t o;
3559          o.val[0] = vqrshrun_n_s16(rws, 4);
3560          o.val[1] = vqrshrun_n_s16(gws, 4);
3561          o.val[2] = vqrshrun_n_s16(bws, 4);
3562          o.val[3] = vdup_n_u8(255);
3563 
3564          // store, interleaving r/g/b/a
3565          vst4_u8(out, o);
3566          out += 8*4;
3567       }
3568    }
3569 #endif
3570 
3571    for (; i < count; ++i) {
3572       int y_fixed = (y[i] << 20) + (1<<19); // rounding
3573       int r,g,b;
3574       int cr = pcr[i] - 128;
3575       int cb = pcb[i] - 128;
3576       r = y_fixed + cr* stbi__float2fixed(1.40200f);
3577       g = y_fixed + cr*-stbi__float2fixed(0.71414f) + ((cb*-stbi__float2fixed(0.34414f)) & 0xffff0000);
3578       b = y_fixed                                   +   cb* stbi__float2fixed(1.77200f);
3579       r >>= 20;
3580       g >>= 20;
3581       b >>= 20;
3582       if ((unsigned) r > 255) { if (r < 0) r = 0; else r = 255; }
3583       if ((unsigned) g > 255) { if (g < 0) g = 0; else g = 255; }
3584       if ((unsigned) b > 255) { if (b < 0) b = 0; else b = 255; }
3585       out[0] = (stbi_uc)r;
3586       out[1] = (stbi_uc)g;
3587       out[2] = (stbi_uc)b;
3588       out[3] = 255;
3589       out += step;
3590    }
3591 }
3592 #endif
3593 
3594 // set up the kernels
stbi__setup_jpeg(stbi__jpeg * j)3595 static void stbi__setup_jpeg(stbi__jpeg *j)
3596 {
3597    j->idct_block_kernel = stbi__idct_block;
3598    j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_row;
3599    j->resample_row_hv_2_kernel = stbi__resample_row_hv_2;
3600 
3601 #ifdef STBI_SSE2
3602    if (stbi__sse2_available()) {
3603       j->idct_block_kernel = stbi__idct_simd;
3604       j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_simd;
3605       j->resample_row_hv_2_kernel = stbi__resample_row_hv_2_simd;
3606    }
3607 #endif
3608 
3609 #ifdef STBI_NEON
3610    j->idct_block_kernel = stbi__idct_simd;
3611    j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_simd;
3612    j->resample_row_hv_2_kernel = stbi__resample_row_hv_2_simd;
3613 #endif
3614 }
3615 
3616 // clean up the temporary component buffers
stbi__cleanup_jpeg(stbi__jpeg * j)3617 static void stbi__cleanup_jpeg(stbi__jpeg *j)
3618 {
3619    stbi__free_jpeg_components(j, j->s->img_n, 0);
3620 }
3621 
3622 typedef struct
3623 {
3624    resample_row_func resample;
3625    stbi_uc *line0,*line1;
3626    int hs,vs;   // expansion factor in each axis
3627    int w_lores; // horizontal pixels pre-expansion
3628    int ystep;   // how far through vertical expansion we are
3629    int ypos;    // which pre-expansion row we're on
3630 } stbi__resample;
3631 
3632 // fast 0..255 * 0..255 => 0..255 rounded multiplication
stbi__blinn_8x8(stbi_uc x,stbi_uc y)3633 static stbi_uc stbi__blinn_8x8(stbi_uc x, stbi_uc y)
3634 {
3635    unsigned int t = x*y + 128;
3636    return (stbi_uc) ((t + (t >>8)) >> 8);
3637 }
3638 
load_jpeg_image(stbi__jpeg * z,int * out_x,int * out_y,int * comp,int req_comp)3639 static stbi_uc *load_jpeg_image(stbi__jpeg *z, int *out_x, int *out_y, int *comp, int req_comp)
3640 {
3641    int n, decode_n, is_rgb;
3642    z->s->img_n = 0; // make stbi__cleanup_jpeg safe
3643 
3644    // validate req_comp
3645    if (req_comp < 0 || req_comp > 4) return stbi__errpuc("bad req_comp", "Internal error");
3646 
3647    // load a jpeg image from whichever source, but leave in YCbCr format
3648    if (!stbi__decode_jpeg_image(z)) { stbi__cleanup_jpeg(z); return NULL; }
3649 
3650    // determine actual number of components to generate
3651    n = req_comp ? req_comp : z->s->img_n >= 3 ? 3 : 1;
3652 
3653    is_rgb = z->s->img_n == 3 && (z->rgb == 3 || (z->app14_color_transform == 0 && !z->jfif));
3654 
3655    if (z->s->img_n == 3 && n < 3 && !is_rgb)
3656       decode_n = 1;
3657    else
3658       decode_n = z->s->img_n;
3659 
3660    // resample and color-convert
3661    {
3662       int k;
3663       unsigned int i,j;
3664       stbi_uc *output;
3665       stbi_uc *coutput[4] = { NULL, NULL, NULL, NULL };
3666 
3667       stbi__resample res_comp[4];
3668 
3669       for (k=0; k < decode_n; ++k) {
3670          stbi__resample *r = &res_comp[k];
3671 
3672          // allocate line buffer big enough for upsampling off the edges
3673          // with upsample factor of 4
3674          z->img_comp[k].linebuf = (stbi_uc *) stbi__malloc(z->s->img_x + 3);
3675          if (!z->img_comp[k].linebuf) { stbi__cleanup_jpeg(z); return stbi__errpuc("outofmem", "Out of memory"); }
3676 
3677          r->hs      = z->img_h_max / z->img_comp[k].h;
3678          r->vs      = z->img_v_max / z->img_comp[k].v;
3679          r->ystep   = r->vs >> 1;
3680          r->w_lores = (z->s->img_x + r->hs-1) / r->hs;
3681          r->ypos    = 0;
3682          r->line0   = r->line1 = z->img_comp[k].data;
3683 
3684          if      (r->hs == 1 && r->vs == 1) r->resample = resample_row_1;
3685          else if (r->hs == 1 && r->vs == 2) r->resample = stbi__resample_row_v_2;
3686          else if (r->hs == 2 && r->vs == 1) r->resample = stbi__resample_row_h_2;
3687          else if (r->hs == 2 && r->vs == 2) r->resample = z->resample_row_hv_2_kernel;
3688          else                               r->resample = stbi__resample_row_generic;
3689       }
3690 
3691       // can't error after this so, this is safe
3692       output = (stbi_uc *) stbi__malloc_mad3(n, z->s->img_x, z->s->img_y, 1);
3693       if (!output) { stbi__cleanup_jpeg(z); return stbi__errpuc("outofmem", "Out of memory"); }
3694 
3695       // now go ahead and resample
3696       for (j=0; j < z->s->img_y; ++j) {
3697          stbi_uc *out = output + n * z->s->img_x * j;
3698          for (k=0; k < decode_n; ++k) {
3699             stbi__resample *r = &res_comp[k];
3700             int y_bot = r->ystep >= (r->vs >> 1);
3701             coutput[k] = r->resample(z->img_comp[k].linebuf,
3702                                      y_bot ? r->line1 : r->line0,
3703                                      y_bot ? r->line0 : r->line1,
3704                                      r->w_lores, r->hs);
3705             if (++r->ystep >= r->vs) {
3706                r->ystep = 0;
3707                r->line0 = r->line1;
3708                if (++r->ypos < z->img_comp[k].y)
3709                   r->line1 += z->img_comp[k].w2;
3710             }
3711          }
3712          if (n >= 3) {
3713             stbi_uc *y = coutput[0];
3714             if (z->s->img_n == 3) {
3715                if (is_rgb) {
3716                   for (i=0; i < z->s->img_x; ++i) {
3717                      out[0] = y[i];
3718                      out[1] = coutput[1][i];
3719                      out[2] = coutput[2][i];
3720                      out[3] = 255;
3721                      out += n;
3722                   }
3723                } else {
3724                   z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n);
3725                }
3726             } else if (z->s->img_n == 4) {
3727                if (z->app14_color_transform == 0) { // CMYK
3728                   for (i=0; i < z->s->img_x; ++i) {
3729                      stbi_uc m = coutput[3][i];
3730                      out[0] = stbi__blinn_8x8(coutput[0][i], m);
3731                      out[1] = stbi__blinn_8x8(coutput[1][i], m);
3732                      out[2] = stbi__blinn_8x8(coutput[2][i], m);
3733                      out[3] = 255;
3734                      out += n;
3735                   }
3736                } else if (z->app14_color_transform == 2) { // YCCK
3737                   z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n);
3738                   for (i=0; i < z->s->img_x; ++i) {
3739                      stbi_uc m = coutput[3][i];
3740                      out[0] = stbi__blinn_8x8(255 - out[0], m);
3741                      out[1] = stbi__blinn_8x8(255 - out[1], m);
3742                      out[2] = stbi__blinn_8x8(255 - out[2], m);
3743                      out += n;
3744                   }
3745                } else { // YCbCr + alpha?  Ignore the fourth channel for now
3746                   z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n);
3747                }
3748             } else
3749                for (i=0; i < z->s->img_x; ++i) {
3750                   out[0] = out[1] = out[2] = y[i];
3751                   out[3] = 255; // not used if n==3
3752                   out += n;
3753                }
3754          } else {
3755             if (is_rgb) {
3756                if (n == 1)
3757                   for (i=0; i < z->s->img_x; ++i)
3758                      *out++ = stbi__compute_y(coutput[0][i], coutput[1][i], coutput[2][i]);
3759                else {
3760                   for (i=0; i < z->s->img_x; ++i, out += 2) {
3761                      out[0] = stbi__compute_y(coutput[0][i], coutput[1][i], coutput[2][i]);
3762                      out[1] = 255;
3763                   }
3764                }
3765             } else if (z->s->img_n == 4 && z->app14_color_transform == 0) {
3766                for (i=0; i < z->s->img_x; ++i) {
3767                   stbi_uc m = coutput[3][i];
3768                   stbi_uc r = stbi__blinn_8x8(coutput[0][i], m);
3769                   stbi_uc g = stbi__blinn_8x8(coutput[1][i], m);
3770                   stbi_uc b = stbi__blinn_8x8(coutput[2][i], m);
3771                   out[0] = stbi__compute_y(r, g, b);
3772                   out[1] = 255;
3773                   out += n;
3774                }
3775             } else if (z->s->img_n == 4 && z->app14_color_transform == 2) {
3776                for (i=0; i < z->s->img_x; ++i) {
3777                   out[0] = stbi__blinn_8x8(255 - coutput[0][i], coutput[3][i]);
3778                   out[1] = 255;
3779                   out += n;
3780                }
3781             } else {
3782                stbi_uc *y = coutput[0];
3783                if (n == 1)
3784                   for (i=0; i < z->s->img_x; ++i) out[i] = y[i];
3785                else
3786                   for (i=0; i < z->s->img_x; ++i) { *out++ = y[i]; *out++ = 255; }
3787             }
3788          }
3789       }
3790       stbi__cleanup_jpeg(z);
3791       *out_x = z->s->img_x;
3792       *out_y = z->s->img_y;
3793       if (comp) *comp = z->s->img_n >= 3 ? 3 : 1; // report original components, not output
3794       return output;
3795    }
3796 }
3797 
stbi__jpeg_load(stbi__context * s,int * x,int * y,int * comp,int req_comp,stbi__result_info * ri)3798 static void *stbi__jpeg_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
3799 {
3800    unsigned char* result;
3801    stbi__jpeg* j = (stbi__jpeg*) stbi__malloc(sizeof(stbi__jpeg));
3802    STBI_NOTUSED(ri);
3803    j->s = s;
3804    stbi__setup_jpeg(j);
3805    result = load_jpeg_image(j, x,y,comp,req_comp);
3806    STBI_FREE(j);
3807    return result;
3808 }
3809 
stbi__jpeg_test(stbi__context * s)3810 static int stbi__jpeg_test(stbi__context *s)
3811 {
3812    int r;
3813    stbi__jpeg* j = (stbi__jpeg*)stbi__malloc(sizeof(stbi__jpeg));
3814    j->s = s;
3815    stbi__setup_jpeg(j);
3816    r = stbi__decode_jpeg_header(j, STBI__SCAN_type);
3817    stbi__rewind(s);
3818    STBI_FREE(j);
3819    return r;
3820 }
3821 
stbi__jpeg_info_raw(stbi__jpeg * j,int * x,int * y,int * comp)3822 static int stbi__jpeg_info_raw(stbi__jpeg *j, int *x, int *y, int *comp)
3823 {
3824    if (!stbi__decode_jpeg_header(j, STBI__SCAN_header)) {
3825       stbi__rewind( j->s );
3826       return 0;
3827    }
3828    if (x) *x = j->s->img_x;
3829    if (y) *y = j->s->img_y;
3830    if (comp) *comp = j->s->img_n >= 3 ? 3 : 1;
3831    return 1;
3832 }
3833 
stbi__jpeg_info(stbi__context * s,int * x,int * y,int * comp)3834 static int stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp)
3835 {
3836    int result;
3837    stbi__jpeg* j = (stbi__jpeg*) (stbi__malloc(sizeof(stbi__jpeg)));
3838    j->s = s;
3839    result = stbi__jpeg_info_raw(j, x, y, comp);
3840    STBI_FREE(j);
3841    return result;
3842 }
3843 #endif
3844 
3845 // public domain zlib decode    v0.2  Sean Barrett 2006-11-18
3846 //    simple implementation
3847 //      - all input must be provided in an upfront buffer
3848 //      - all output is written to a single output buffer (can malloc/realloc)
3849 //    performance
3850 //      - fast huffman
3851 
3852 #ifndef STBI_NO_ZLIB
3853 
3854 // fast-way is faster to check than jpeg huffman, but slow way is slower
3855 #define STBI__ZFAST_BITS  9 // accelerate all cases in default tables
3856 #define STBI__ZFAST_MASK  ((1 << STBI__ZFAST_BITS) - 1)
3857 
3858 // zlib-style huffman encoding
3859 // (jpegs packs from left, zlib from right, so can't share code)
3860 typedef struct
3861 {
3862    stbi__uint16 fast[1 << STBI__ZFAST_BITS];
3863    stbi__uint16 firstcode[16];
3864    int maxcode[17];
3865    stbi__uint16 firstsymbol[16];
3866    stbi_uc  size[288];
3867    stbi__uint16 value[288];
3868 } stbi__zhuffman;
3869 
stbi__bitreverse16(int n)3870 stbi_inline static int stbi__bitreverse16(int n)
3871 {
3872   n = ((n & 0xAAAA) >>  1) | ((n & 0x5555) << 1);
3873   n = ((n & 0xCCCC) >>  2) | ((n & 0x3333) << 2);
3874   n = ((n & 0xF0F0) >>  4) | ((n & 0x0F0F) << 4);
3875   n = ((n & 0xFF00) >>  8) | ((n & 0x00FF) << 8);
3876   return n;
3877 }
3878 
stbi__bit_reverse(int v,int bits)3879 stbi_inline static int stbi__bit_reverse(int v, int bits)
3880 {
3881    STBI_ASSERT(bits <= 16);
3882    // to bit reverse n bits, reverse 16 and shift
3883    // e.g. 11 bits, bit reverse and shift away 5
3884    return stbi__bitreverse16(v) >> (16-bits);
3885 }
3886 
stbi__zbuild_huffman(stbi__zhuffman * z,const stbi_uc * sizelist,int num)3887 static int stbi__zbuild_huffman(stbi__zhuffman *z, const stbi_uc *sizelist, int num)
3888 {
3889    int i,k=0;
3890    int code, next_code[16], sizes[17];
3891 
3892    // DEFLATE spec for generating codes
3893    memset(sizes, 0, sizeof(sizes));
3894    memset(z->fast, 0, sizeof(z->fast));
3895    for (i=0; i < num; ++i)
3896       ++sizes[sizelist[i]];
3897    sizes[0] = 0;
3898    for (i=1; i < 16; ++i)
3899       if (sizes[i] > (1 << i))
3900          return stbi__err("bad sizes", "Corrupt PNG");
3901    code = 0;
3902    for (i=1; i < 16; ++i) {
3903       next_code[i] = code;
3904       z->firstcode[i] = (stbi__uint16) code;
3905       z->firstsymbol[i] = (stbi__uint16) k;
3906       code = (code + sizes[i]);
3907       if (sizes[i])
3908          if (code-1 >= (1 << i)) return stbi__err("bad codelengths","Corrupt PNG");
3909       z->maxcode[i] = code << (16-i); // preshift for inner loop
3910       code <<= 1;
3911       k += sizes[i];
3912    }
3913    z->maxcode[16] = 0x10000; // sentinel
3914    for (i=0; i < num; ++i) {
3915       int s = sizelist[i];
3916       if (s) {
3917          int c = next_code[s] - z->firstcode[s] + z->firstsymbol[s];
3918          stbi__uint16 fastv = (stbi__uint16) ((s << 9) | i);
3919          z->size [c] = (stbi_uc     ) s;
3920          z->value[c] = (stbi__uint16) i;
3921          if (s <= STBI__ZFAST_BITS) {
3922             int j = stbi__bit_reverse(next_code[s],s);
3923             while (j < (1 << STBI__ZFAST_BITS)) {
3924                z->fast[j] = fastv;
3925                j += (1 << s);
3926             }
3927          }
3928          ++next_code[s];
3929       }
3930    }
3931    return 1;
3932 }
3933 
3934 // zlib-from-memory implementation for PNG reading
3935 //    because PNG allows splitting the zlib stream arbitrarily,
3936 //    and it's annoying structurally to have PNG call ZLIB call PNG,
3937 //    we require PNG read all the IDATs and combine them into a single
3938 //    memory buffer
3939 
3940 typedef struct
3941 {
3942    stbi_uc *zbuffer, *zbuffer_end;
3943    int num_bits;
3944    stbi__uint32 code_buffer;
3945 
3946    char *zout;
3947    char *zout_start;
3948    char *zout_end;
3949    int   z_expandable;
3950 
3951    stbi__zhuffman z_length, z_distance;
3952 } stbi__zbuf;
3953 
stbi__zget8(stbi__zbuf * z)3954 stbi_inline static stbi_uc stbi__zget8(stbi__zbuf *z)
3955 {
3956    if (z->zbuffer >= z->zbuffer_end) return 0;
3957    return *z->zbuffer++;
3958 }
3959 
stbi__fill_bits(stbi__zbuf * z)3960 static void stbi__fill_bits(stbi__zbuf *z)
3961 {
3962    do {
3963       STBI_ASSERT(z->code_buffer < (1U << z->num_bits));
3964       z->code_buffer |= (unsigned int) stbi__zget8(z) << z->num_bits;
3965       z->num_bits += 8;
3966    } while (z->num_bits <= 24);
3967 }
3968 
stbi__zreceive(stbi__zbuf * z,int n)3969 stbi_inline static unsigned int stbi__zreceive(stbi__zbuf *z, int n)
3970 {
3971    unsigned int k;
3972    if (z->num_bits < n) stbi__fill_bits(z);
3973    k = z->code_buffer & ((1 << n) - 1);
3974    z->code_buffer >>= n;
3975    z->num_bits -= n;
3976    return k;
3977 }
3978 
stbi__zhuffman_decode_slowpath(stbi__zbuf * a,stbi__zhuffman * z)3979 static int stbi__zhuffman_decode_slowpath(stbi__zbuf *a, stbi__zhuffman *z)
3980 {
3981    int b,s,k;
3982    // not resolved by fast table, so compute it the slow way
3983    // use jpeg approach, which requires MSbits at top
3984    k = stbi__bit_reverse(a->code_buffer, 16);
3985    for (s=STBI__ZFAST_BITS+1; ; ++s)
3986       if (k < z->maxcode[s])
3987          break;
3988    if (s == 16) return -1; // invalid code!
3989    // code size is s, so:
3990    b = (k >> (16-s)) - z->firstcode[s] + z->firstsymbol[s];
3991    STBI_ASSERT(z->size[b] == s);
3992    a->code_buffer >>= s;
3993    a->num_bits -= s;
3994    return z->value[b];
3995 }
3996 
stbi__zhuffman_decode(stbi__zbuf * a,stbi__zhuffman * z)3997 stbi_inline static int stbi__zhuffman_decode(stbi__zbuf *a, stbi__zhuffman *z)
3998 {
3999    int b,s;
4000    if (a->num_bits < 16) stbi__fill_bits(a);
4001    b = z->fast[a->code_buffer & STBI__ZFAST_MASK];
4002    if (b) {
4003       s = b >> 9;
4004       a->code_buffer >>= s;
4005       a->num_bits -= s;
4006       return b & 511;
4007    }
4008    return stbi__zhuffman_decode_slowpath(a, z);
4009 }
4010 
stbi__zexpand(stbi__zbuf * z,char * zout,int n)4011 static int stbi__zexpand(stbi__zbuf *z, char *zout, int n)  // need to make room for n bytes
4012 {
4013    char *q;
4014    int cur, limit, old_limit;
4015    z->zout = zout;
4016    if (!z->z_expandable) return stbi__err("output buffer limit","Corrupt PNG");
4017    cur   = (int) (z->zout     - z->zout_start);
4018    limit = old_limit = (int) (z->zout_end - z->zout_start);
4019    while (cur + n > limit)
4020       limit *= 2;
4021    q = (char *) STBI_REALLOC_SIZED(z->zout_start, old_limit, limit);
4022    STBI_NOTUSED(old_limit);
4023    if (q == NULL) return stbi__err("outofmem", "Out of memory");
4024    z->zout_start = q;
4025    z->zout       = q + cur;
4026    z->zout_end   = q + limit;
4027    return 1;
4028 }
4029 
4030 static const int stbi__zlength_base[31] = {
4031    3,4,5,6,7,8,9,10,11,13,
4032    15,17,19,23,27,31,35,43,51,59,
4033    67,83,99,115,131,163,195,227,258,0,0 };
4034 
4035 static const int stbi__zlength_extra[31]=
4036 { 0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0,0,0 };
4037 
4038 static const int stbi__zdist_base[32] = { 1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193,
4039 257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577,0,0};
4040 
4041 static const int stbi__zdist_extra[32] =
4042 { 0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13};
4043 
stbi__parse_huffman_block(stbi__zbuf * a)4044 static int stbi__parse_huffman_block(stbi__zbuf *a)
4045 {
4046    char *zout = a->zout;
4047    for(;;) {
4048       int z = stbi__zhuffman_decode(a, &a->z_length);
4049       if (z < 256) {
4050          if (z < 0) return stbi__err("bad huffman code","Corrupt PNG"); // error in huffman codes
4051          if (zout >= a->zout_end) {
4052             if (!stbi__zexpand(a, zout, 1)) return 0;
4053             zout = a->zout;
4054          }
4055          *zout++ = (char) z;
4056       } else {
4057          stbi_uc *p;
4058          int len,dist;
4059          if (z == 256) {
4060             a->zout = zout;
4061             return 1;
4062          }
4063          z -= 257;
4064          len = stbi__zlength_base[z];
4065          if (stbi__zlength_extra[z]) len += stbi__zreceive(a, stbi__zlength_extra[z]);
4066          z = stbi__zhuffman_decode(a, &a->z_distance);
4067          if (z < 0) return stbi__err("bad huffman code","Corrupt PNG");
4068          dist = stbi__zdist_base[z];
4069          if (stbi__zdist_extra[z]) dist += stbi__zreceive(a, stbi__zdist_extra[z]);
4070          if (zout - a->zout_start < dist) return stbi__err("bad dist","Corrupt PNG");
4071          if (zout + len > a->zout_end) {
4072             if (!stbi__zexpand(a, zout, len)) return 0;
4073             zout = a->zout;
4074          }
4075          p = (stbi_uc *) (zout - dist);
4076          if (dist == 1) { // run of one byte; common in images.
4077             stbi_uc v = *p;
4078             if (len) { do *zout++ = v; while (--len); }
4079          } else {
4080             if (len) { do *zout++ = *p++; while (--len); }
4081          }
4082       }
4083    }
4084 }
4085 
stbi__compute_huffman_codes(stbi__zbuf * a)4086 static int stbi__compute_huffman_codes(stbi__zbuf *a)
4087 {
4088    static const stbi_uc length_dezigzag[19] = { 16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15 };
4089    stbi__zhuffman z_codelength;
4090    stbi_uc lencodes[286+32+137];//padding for maximum single op
4091    stbi_uc codelength_sizes[19];
4092    int i,n;
4093 
4094    int hlit  = stbi__zreceive(a,5) + 257;
4095    int hdist = stbi__zreceive(a,5) + 1;
4096    int hclen = stbi__zreceive(a,4) + 4;
4097    int ntot  = hlit + hdist;
4098 
4099    memset(codelength_sizes, 0, sizeof(codelength_sizes));
4100    for (i=0; i < hclen; ++i) {
4101       int s = stbi__zreceive(a,3);
4102       codelength_sizes[length_dezigzag[i]] = (stbi_uc) s;
4103    }
4104    if (!stbi__zbuild_huffman(&z_codelength, codelength_sizes, 19)) return 0;
4105 
4106    n = 0;
4107    while (n < ntot) {
4108       int c = stbi__zhuffman_decode(a, &z_codelength);
4109       if (c < 0 || c >= 19) return stbi__err("bad codelengths", "Corrupt PNG");
4110       if (c < 16)
4111          lencodes[n++] = (stbi_uc) c;
4112       else {
4113          stbi_uc fill = 0;
4114          if (c == 16) {
4115             c = stbi__zreceive(a,2)+3;
4116             if (n == 0) return stbi__err("bad codelengths", "Corrupt PNG");
4117             fill = lencodes[n-1];
4118          } else if (c == 17)
4119             c = stbi__zreceive(a,3)+3;
4120          else {
4121             STBI_ASSERT(c == 18);
4122             c = stbi__zreceive(a,7)+11;
4123          }
4124          if (ntot - n < c) return stbi__err("bad codelengths", "Corrupt PNG");
4125          memset(lencodes+n, fill, c);
4126          n += c;
4127       }
4128    }
4129    if (n != ntot) return stbi__err("bad codelengths","Corrupt PNG");
4130    if (!stbi__zbuild_huffman(&a->z_length, lencodes, hlit)) return 0;
4131    if (!stbi__zbuild_huffman(&a->z_distance, lencodes+hlit, hdist)) return 0;
4132    return 1;
4133 }
4134 
stbi__parse_uncompressed_block(stbi__zbuf * a)4135 static int stbi__parse_uncompressed_block(stbi__zbuf *a)
4136 {
4137    stbi_uc header[4];
4138    int len,nlen,k;
4139    if (a->num_bits & 7)
4140       stbi__zreceive(a, a->num_bits & 7); // discard
4141    // drain the bit-packed data into header
4142    k = 0;
4143    while (a->num_bits > 0) {
4144       header[k++] = (stbi_uc) (a->code_buffer & 255); // suppress MSVC run-time check
4145       a->code_buffer >>= 8;
4146       a->num_bits -= 8;
4147    }
4148    STBI_ASSERT(a->num_bits == 0);
4149    // now fill header the normal way
4150    while (k < 4)
4151       header[k++] = stbi__zget8(a);
4152    len  = header[1] * 256 + header[0];
4153    nlen = header[3] * 256 + header[2];
4154    if (nlen != (len ^ 0xffff)) return stbi__err("zlib corrupt","Corrupt PNG");
4155    if (a->zbuffer + len > a->zbuffer_end) return stbi__err("read past buffer","Corrupt PNG");
4156    if (a->zout + len > a->zout_end)
4157       if (!stbi__zexpand(a, a->zout, len)) return 0;
4158    memcpy(a->zout, a->zbuffer, len);
4159    a->zbuffer += len;
4160    a->zout += len;
4161    return 1;
4162 }
4163 
stbi__parse_zlib_header(stbi__zbuf * a)4164 static int stbi__parse_zlib_header(stbi__zbuf *a)
4165 {
4166    int cmf   = stbi__zget8(a);
4167    int cm    = cmf & 15;
4168    /* int cinfo = cmf >> 4; */
4169    int flg   = stbi__zget8(a);
4170    if ((cmf*256+flg) % 31 != 0) return stbi__err("bad zlib header","Corrupt PNG"); // zlib spec
4171    if (flg & 32) return stbi__err("no preset dict","Corrupt PNG"); // preset dictionary not allowed in png
4172    if (cm != 8) return stbi__err("bad compression","Corrupt PNG"); // DEFLATE required for png
4173    // window = 1 << (8 + cinfo)... but who cares, we fully buffer output
4174    return 1;
4175 }
4176 
4177 static const stbi_uc stbi__zdefault_length[288] =
4178 {
4179    8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
4180    8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
4181    8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
4182    8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
4183    8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
4184    9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
4185    9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
4186    9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
4187    7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,8,8,8,8,8,8,8,8
4188 };
4189 static const stbi_uc stbi__zdefault_distance[32] =
4190 {
4191    5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5
4192 };
4193 /*
4194 Init algorithm:
4195 {
4196    int i;   // use <= to match clearly with spec
4197    for (i=0; i <= 143; ++i)     stbi__zdefault_length[i]   = 8;
4198    for (   ; i <= 255; ++i)     stbi__zdefault_length[i]   = 9;
4199    for (   ; i <= 279; ++i)     stbi__zdefault_length[i]   = 7;
4200    for (   ; i <= 287; ++i)     stbi__zdefault_length[i]   = 8;
4201 
4202    for (i=0; i <=  31; ++i)     stbi__zdefault_distance[i] = 5;
4203 }
4204 */
4205 
stbi__parse_zlib(stbi__zbuf * a,int parse_header)4206 static int stbi__parse_zlib(stbi__zbuf *a, int parse_header)
4207 {
4208    int final, type;
4209    if (parse_header)
4210       if (!stbi__parse_zlib_header(a)) return 0;
4211    a->num_bits = 0;
4212    a->code_buffer = 0;
4213    do {
4214       final = stbi__zreceive(a,1);
4215       type = stbi__zreceive(a,2);
4216       if (type == 0) {
4217          if (!stbi__parse_uncompressed_block(a)) return 0;
4218       } else if (type == 3) {
4219          return 0;
4220       } else {
4221          if (type == 1) {
4222             // use fixed code lengths
4223             if (!stbi__zbuild_huffman(&a->z_length  , stbi__zdefault_length  , 288)) return 0;
4224             if (!stbi__zbuild_huffman(&a->z_distance, stbi__zdefault_distance,  32)) return 0;
4225          } else {
4226             if (!stbi__compute_huffman_codes(a)) return 0;
4227          }
4228          if (!stbi__parse_huffman_block(a)) return 0;
4229       }
4230    } while (!final);
4231    return 1;
4232 }
4233 
stbi__do_zlib(stbi__zbuf * a,char * obuf,int olen,int exp,int parse_header)4234 static int stbi__do_zlib(stbi__zbuf *a, char *obuf, int olen, int exp, int parse_header)
4235 {
4236    a->zout_start = obuf;
4237    a->zout       = obuf;
4238    a->zout_end   = obuf + olen;
4239    a->z_expandable = exp;
4240 
4241    return stbi__parse_zlib(a, parse_header);
4242 }
4243 
stbi_zlib_decode_malloc_guesssize(const char * buffer,int len,int initial_size,int * outlen)4244 STBIDEF char *stbi_zlib_decode_malloc_guesssize(const char *buffer, int len, int initial_size, int *outlen)
4245 {
4246    stbi__zbuf a;
4247    char *p = (char *) stbi__malloc(initial_size);
4248    if (p == NULL) return NULL;
4249    a.zbuffer = (stbi_uc *) buffer;
4250    a.zbuffer_end = (stbi_uc *) buffer + len;
4251    if (stbi__do_zlib(&a, p, initial_size, 1, 1)) {
4252       if (outlen) *outlen = (int) (a.zout - a.zout_start);
4253       return a.zout_start;
4254    } else {
4255       STBI_FREE(a.zout_start);
4256       return NULL;
4257    }
4258 }
4259 
stbi_zlib_decode_malloc(char const * buffer,int len,int * outlen)4260 STBIDEF char *stbi_zlib_decode_malloc(char const *buffer, int len, int *outlen)
4261 {
4262    return stbi_zlib_decode_malloc_guesssize(buffer, len, 16384, outlen);
4263 }
4264 
stbi_zlib_decode_malloc_guesssize_headerflag(const char * buffer,int len,int initial_size,int * outlen,int parse_header)4265 STBIDEF char *stbi_zlib_decode_malloc_guesssize_headerflag(const char *buffer, int len, int initial_size, int *outlen, int parse_header)
4266 {
4267    stbi__zbuf a;
4268    char *p = (char *) stbi__malloc(initial_size);
4269    if (p == NULL) return NULL;
4270    a.zbuffer = (stbi_uc *) buffer;
4271    a.zbuffer_end = (stbi_uc *) buffer + len;
4272    if (stbi__do_zlib(&a, p, initial_size, 1, parse_header)) {
4273       if (outlen) *outlen = (int) (a.zout - a.zout_start);
4274       return a.zout_start;
4275    } else {
4276       STBI_FREE(a.zout_start);
4277       return NULL;
4278    }
4279 }
4280 
stbi_zlib_decode_buffer(char * obuffer,int olen,char const * ibuffer,int ilen)4281 STBIDEF int stbi_zlib_decode_buffer(char *obuffer, int olen, char const *ibuffer, int ilen)
4282 {
4283    stbi__zbuf a;
4284    a.zbuffer = (stbi_uc *) ibuffer;
4285    a.zbuffer_end = (stbi_uc *) ibuffer + ilen;
4286    if (stbi__do_zlib(&a, obuffer, olen, 0, 1))
4287       return (int) (a.zout - a.zout_start);
4288    else
4289       return -1;
4290 }
4291 
stbi_zlib_decode_noheader_malloc(char const * buffer,int len,int * outlen)4292 STBIDEF char *stbi_zlib_decode_noheader_malloc(char const *buffer, int len, int *outlen)
4293 {
4294    stbi__zbuf a;
4295    char *p = (char *) stbi__malloc(16384);
4296    if (p == NULL) return NULL;
4297    a.zbuffer = (stbi_uc *) buffer;
4298    a.zbuffer_end = (stbi_uc *) buffer+len;
4299    if (stbi__do_zlib(&a, p, 16384, 1, 0)) {
4300       if (outlen) *outlen = (int) (a.zout - a.zout_start);
4301       return a.zout_start;
4302    } else {
4303       STBI_FREE(a.zout_start);
4304       return NULL;
4305    }
4306 }
4307 
stbi_zlib_decode_noheader_buffer(char * obuffer,int olen,const char * ibuffer,int ilen)4308 STBIDEF int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char *ibuffer, int ilen)
4309 {
4310    stbi__zbuf a;
4311    a.zbuffer = (stbi_uc *) ibuffer;
4312    a.zbuffer_end = (stbi_uc *) ibuffer + ilen;
4313    if (stbi__do_zlib(&a, obuffer, olen, 0, 0))
4314       return (int) (a.zout - a.zout_start);
4315    else
4316       return -1;
4317 }
4318 #endif
4319 
4320 // public domain "baseline" PNG decoder   v0.10  Sean Barrett 2006-11-18
4321 //    simple implementation
4322 //      - only 8-bit samples
4323 //      - no CRC checking
4324 //      - allocates lots of intermediate memory
4325 //        - avoids problem of streaming data between subsystems
4326 //        - avoids explicit window management
4327 //    performance
4328 //      - uses stb_zlib, a PD zlib implementation with fast huffman decoding
4329 
4330 #ifndef STBI_NO_PNG
4331 typedef struct
4332 {
4333    stbi__uint32 length;
4334    stbi__uint32 type;
4335 } stbi__pngchunk;
4336 
stbi__get_chunk_header(stbi__context * s)4337 static stbi__pngchunk stbi__get_chunk_header(stbi__context *s)
4338 {
4339    stbi__pngchunk c;
4340    c.length = stbi__get32be(s);
4341    c.type   = stbi__get32be(s);
4342    return c;
4343 }
4344 
stbi__check_png_header(stbi__context * s)4345 static int stbi__check_png_header(stbi__context *s)
4346 {
4347    static const stbi_uc png_sig[8] = { 137,80,78,71,13,10,26,10 };
4348    int i;
4349    for (i=0; i < 8; ++i)
4350       if (stbi__get8(s) != png_sig[i]) return stbi__err("bad png sig","Not a PNG");
4351    return 1;
4352 }
4353 
4354 typedef struct
4355 {
4356    stbi__context *s;
4357    stbi_uc *idata, *expanded, *out;
4358    int depth;
4359 } stbi__png;
4360 
4361 
4362 enum {
4363    STBI__F_none=0,
4364    STBI__F_sub=1,
4365    STBI__F_up=2,
4366    STBI__F_avg=3,
4367    STBI__F_paeth=4,
4368    // synthetic filters used for first scanline to avoid needing a dummy row of 0s
4369    STBI__F_avg_first,
4370    STBI__F_paeth_first
4371 };
4372 
4373 static stbi_uc first_row_filter[5] =
4374 {
4375    STBI__F_none,
4376    STBI__F_sub,
4377    STBI__F_none,
4378    STBI__F_avg_first,
4379    STBI__F_paeth_first
4380 };
4381 
stbi__paeth(int a,int b,int c)4382 static int stbi__paeth(int a, int b, int c)
4383 {
4384    int p = a + b - c;
4385    int pa = abs(p-a);
4386    int pb = abs(p-b);
4387    int pc = abs(p-c);
4388    if (pa <= pb && pa <= pc) return a;
4389    if (pb <= pc) return b;
4390    return c;
4391 }
4392 
4393 static const stbi_uc stbi__depth_scale_table[9] = { 0, 0xff, 0x55, 0, 0x11, 0,0,0, 0x01 };
4394 
4395 // create the png data from post-deflated data
stbi__create_png_image_raw(stbi__png * a,stbi_uc * raw,stbi__uint32 raw_len,int out_n,stbi__uint32 x,stbi__uint32 y,int depth,int color)4396 static int stbi__create_png_image_raw(stbi__png *a, stbi_uc *raw, stbi__uint32 raw_len, int out_n, stbi__uint32 x, stbi__uint32 y, int depth, int color)
4397 {
4398    int bytes = (depth == 16? 2 : 1);
4399    stbi__context *s = a->s;
4400    stbi__uint32 i,j,stride = x*out_n*bytes;
4401    stbi__uint32 img_len, img_width_bytes;
4402    int k;
4403    int img_n = s->img_n; // copy it into a local for later
4404 
4405    int output_bytes = out_n*bytes;
4406    int filter_bytes = img_n*bytes;
4407    int width = x;
4408 
4409    STBI_ASSERT(out_n == s->img_n || out_n == s->img_n+1);
4410    a->out = (stbi_uc *) stbi__malloc_mad3(x, y, output_bytes, 0); // extra bytes to write off the end into
4411    if (!a->out) return stbi__err("outofmem", "Out of memory");
4412 
4413    if (!stbi__mad3sizes_valid(img_n, x, depth, 7)) return stbi__err("too large", "Corrupt PNG");
4414    img_width_bytes = (((img_n * x * depth) + 7) >> 3);
4415    img_len = (img_width_bytes + 1) * y;
4416 
4417    // we used to check for exact match between raw_len and img_len on non-interlaced PNGs,
4418    // but issue #276 reported a PNG in the wild that had extra data at the end (all zeros),
4419    // so just check for raw_len < img_len always.
4420    if (raw_len < img_len) return stbi__err("not enough pixels","Corrupt PNG");
4421 
4422    for (j=0; j < y; ++j) {
4423       stbi_uc *cur = a->out + stride*j;
4424       stbi_uc *prior;
4425       int filter = *raw++;
4426 
4427       if (filter > 4)
4428          return stbi__err("invalid filter","Corrupt PNG");
4429 
4430       if (depth < 8) {
4431          STBI_ASSERT(img_width_bytes <= x);
4432          cur += x*out_n - img_width_bytes; // store output to the rightmost img_len bytes, so we can decode in place
4433          filter_bytes = 1;
4434          width = img_width_bytes;
4435       }
4436       prior = cur - stride; // bugfix: need to compute this after 'cur +=' computation above
4437 
4438       // if first row, use special filter that doesn't sample previous row
4439       if (j == 0) filter = first_row_filter[filter];
4440 
4441       // handle first byte explicitly
4442       for (k=0; k < filter_bytes; ++k) {
4443          switch (filter) {
4444             case STBI__F_none       : cur[k] = raw[k]; break;
4445             case STBI__F_sub        : cur[k] = raw[k]; break;
4446             case STBI__F_up         : cur[k] = STBI__BYTECAST(raw[k] + prior[k]); break;
4447             case STBI__F_avg        : cur[k] = STBI__BYTECAST(raw[k] + (prior[k]>>1)); break;
4448             case STBI__F_paeth      : cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(0,prior[k],0)); break;
4449             case STBI__F_avg_first  : cur[k] = raw[k]; break;
4450             case STBI__F_paeth_first: cur[k] = raw[k]; break;
4451          }
4452       }
4453 
4454       if (depth == 8) {
4455          if (img_n != out_n)
4456             cur[img_n] = 255; // first pixel
4457          raw += img_n;
4458          cur += out_n;
4459          prior += out_n;
4460       } else if (depth == 16) {
4461          if (img_n != out_n) {
4462             cur[filter_bytes]   = 255; // first pixel top byte
4463             cur[filter_bytes+1] = 255; // first pixel bottom byte
4464          }
4465          raw += filter_bytes;
4466          cur += output_bytes;
4467          prior += output_bytes;
4468       } else {
4469          raw += 1;
4470          cur += 1;
4471          prior += 1;
4472       }
4473 
4474       // this is a little gross, so that we don't switch per-pixel or per-component
4475       if (depth < 8 || img_n == out_n) {
4476          int nk = (width - 1)*filter_bytes;
4477          #define STBI__CASE(f) \
4478              case f:     \
4479                 for (k=0; k < nk; ++k)
4480          switch (filter) {
4481             // "none" filter turns into a memcpy here; make that explicit.
4482             case STBI__F_none:         memcpy(cur, raw, nk); break;
4483             STBI__CASE(STBI__F_sub)          { cur[k] = STBI__BYTECAST(raw[k] + cur[k-filter_bytes]); } break;
4484             STBI__CASE(STBI__F_up)           { cur[k] = STBI__BYTECAST(raw[k] + prior[k]); } break;
4485             STBI__CASE(STBI__F_avg)          { cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k-filter_bytes])>>1)); } break;
4486             STBI__CASE(STBI__F_paeth)        { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-filter_bytes],prior[k],prior[k-filter_bytes])); } break;
4487             STBI__CASE(STBI__F_avg_first)    { cur[k] = STBI__BYTECAST(raw[k] + (cur[k-filter_bytes] >> 1)); } break;
4488             STBI__CASE(STBI__F_paeth_first)  { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-filter_bytes],0,0)); } break;
4489          }
4490          #undef STBI__CASE
4491          raw += nk;
4492       } else {
4493          STBI_ASSERT(img_n+1 == out_n);
4494          #define STBI__CASE(f) \
4495              case f:     \
4496                 for (i=x-1; i >= 1; --i, cur[filter_bytes]=255,raw+=filter_bytes,cur+=output_bytes,prior+=output_bytes) \
4497                    for (k=0; k < filter_bytes; ++k)
4498          switch (filter) {
4499             STBI__CASE(STBI__F_none)         { cur[k] = raw[k]; } break;
4500             STBI__CASE(STBI__F_sub)          { cur[k] = STBI__BYTECAST(raw[k] + cur[k- output_bytes]); } break;
4501             STBI__CASE(STBI__F_up)           { cur[k] = STBI__BYTECAST(raw[k] + prior[k]); } break;
4502             STBI__CASE(STBI__F_avg)          { cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k- output_bytes])>>1)); } break;
4503             STBI__CASE(STBI__F_paeth)        { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k- output_bytes],prior[k],prior[k- output_bytes])); } break;
4504             STBI__CASE(STBI__F_avg_first)    { cur[k] = STBI__BYTECAST(raw[k] + (cur[k- output_bytes] >> 1)); } break;
4505             STBI__CASE(STBI__F_paeth_first)  { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k- output_bytes],0,0)); } break;
4506          }
4507          #undef STBI__CASE
4508 
4509          // the loop above sets the high byte of the pixels' alpha, but for
4510          // 16 bit png files we also need the low byte set. we'll do that here.
4511          if (depth == 16) {
4512             cur = a->out + stride*j; // start at the beginning of the row again
4513             for (i=0; i < x; ++i,cur+=output_bytes) {
4514                cur[filter_bytes+1] = 255;
4515             }
4516          }
4517       }
4518    }
4519 
4520    // we make a separate pass to expand bits to pixels; for performance,
4521    // this could run two scanlines behind the above code, so it won't
4522    // intefere with filtering but will still be in the cache.
4523    if (depth < 8) {
4524       for (j=0; j < y; ++j) {
4525          stbi_uc *cur = a->out + stride*j;
4526          stbi_uc *in  = a->out + stride*j + x*out_n - img_width_bytes;
4527          // unpack 1/2/4-bit into a 8-bit buffer. allows us to keep the common 8-bit path optimal at minimal cost for 1/2/4-bit
4528          // png guarante byte alignment, if width is not multiple of 8/4/2 we'll decode dummy trailing data that will be skipped in the later loop
4529          stbi_uc scale = (color == 0) ? stbi__depth_scale_table[depth] : 1; // scale grayscale values to 0..255 range
4530 
4531          // note that the final byte might overshoot and write more data than desired.
4532          // we can allocate enough data that this never writes out of memory, but it
4533          // could also overwrite the next scanline. can it overwrite non-empty data
4534          // on the next scanline? yes, consider 1-pixel-wide scanlines with 1-bit-per-pixel.
4535          // so we need to explicitly clamp the final ones
4536 
4537          if (depth == 4) {
4538             for (k=x*img_n; k >= 2; k-=2, ++in) {
4539                *cur++ = scale * ((*in >> 4)       );
4540                *cur++ = scale * ((*in     ) & 0x0f);
4541             }
4542             if (k > 0) *cur++ = scale * ((*in >> 4)       );
4543          } else if (depth == 2) {
4544             for (k=x*img_n; k >= 4; k-=4, ++in) {
4545                *cur++ = scale * ((*in >> 6)       );
4546                *cur++ = scale * ((*in >> 4) & 0x03);
4547                *cur++ = scale * ((*in >> 2) & 0x03);
4548                *cur++ = scale * ((*in     ) & 0x03);
4549             }
4550             if (k > 0) *cur++ = scale * ((*in >> 6)       );
4551             if (k > 1) *cur++ = scale * ((*in >> 4) & 0x03);
4552             if (k > 2) *cur++ = scale * ((*in >> 2) & 0x03);
4553          } else if (depth == 1) {
4554             for (k=x*img_n; k >= 8; k-=8, ++in) {
4555                *cur++ = scale * ((*in >> 7)       );
4556                *cur++ = scale * ((*in >> 6) & 0x01);
4557                *cur++ = scale * ((*in >> 5) & 0x01);
4558                *cur++ = scale * ((*in >> 4) & 0x01);
4559                *cur++ = scale * ((*in >> 3) & 0x01);
4560                *cur++ = scale * ((*in >> 2) & 0x01);
4561                *cur++ = scale * ((*in >> 1) & 0x01);
4562                *cur++ = scale * ((*in     ) & 0x01);
4563             }
4564             if (k > 0) *cur++ = scale * ((*in >> 7)       );
4565             if (k > 1) *cur++ = scale * ((*in >> 6) & 0x01);
4566             if (k > 2) *cur++ = scale * ((*in >> 5) & 0x01);
4567             if (k > 3) *cur++ = scale * ((*in >> 4) & 0x01);
4568             if (k > 4) *cur++ = scale * ((*in >> 3) & 0x01);
4569             if (k > 5) *cur++ = scale * ((*in >> 2) & 0x01);
4570             if (k > 6) *cur++ = scale * ((*in >> 1) & 0x01);
4571          }
4572          if (img_n != out_n) {
4573             int q;
4574             // insert alpha = 255
4575             cur = a->out + stride*j;
4576             if (img_n == 1) {
4577                for (q=x-1; q >= 0; --q) {
4578                   cur[q*2+1] = 255;
4579                   cur[q*2+0] = cur[q];
4580                }
4581             } else {
4582                STBI_ASSERT(img_n == 3);
4583                for (q=x-1; q >= 0; --q) {
4584                   cur[q*4+3] = 255;
4585                   cur[q*4+2] = cur[q*3+2];
4586                   cur[q*4+1] = cur[q*3+1];
4587                   cur[q*4+0] = cur[q*3+0];
4588                }
4589             }
4590          }
4591       }
4592    } else if (depth == 16) {
4593       // force the image data from big-endian to platform-native.
4594       // this is done in a separate pass due to the decoding relying
4595       // on the data being untouched, but could probably be done
4596       // per-line during decode if care is taken.
4597       stbi_uc *cur = a->out;
4598       stbi__uint16 *cur16 = (stbi__uint16*)cur;
4599 
4600       for(i=0; i < x*y*out_n; ++i,cur16++,cur+=2) {
4601          *cur16 = (cur[0] << 8) | cur[1];
4602       }
4603    }
4604 
4605    return 1;
4606 }
4607 
stbi__create_png_image(stbi__png * a,stbi_uc * image_data,stbi__uint32 image_data_len,int out_n,int depth,int color,int interlaced)4608 static int stbi__create_png_image(stbi__png *a, stbi_uc *image_data, stbi__uint32 image_data_len, int out_n, int depth, int color, int interlaced)
4609 {
4610    int bytes = (depth == 16 ? 2 : 1);
4611    int out_bytes = out_n * bytes;
4612    stbi_uc *final;
4613    int p;
4614    if (!interlaced)
4615       return stbi__create_png_image_raw(a, image_data, image_data_len, out_n, a->s->img_x, a->s->img_y, depth, color);
4616 
4617    // de-interlacing
4618    final = (stbi_uc *) stbi__malloc_mad3(a->s->img_x, a->s->img_y, out_bytes, 0);
4619    for (p=0; p < 7; ++p) {
4620       int xorig[] = { 0,4,0,2,0,1,0 };
4621       int yorig[] = { 0,0,4,0,2,0,1 };
4622       int xspc[]  = { 8,8,4,4,2,2,1 };
4623       int yspc[]  = { 8,8,8,4,4,2,2 };
4624       int i,j,x,y;
4625       // pass1_x[4] = 0, pass1_x[5] = 1, pass1_x[12] = 1
4626       x = (a->s->img_x - xorig[p] + xspc[p]-1) / xspc[p];
4627       y = (a->s->img_y - yorig[p] + yspc[p]-1) / yspc[p];
4628       if (x && y) {
4629          stbi__uint32 img_len = ((((a->s->img_n * x * depth) + 7) >> 3) + 1) * y;
4630          if (!stbi__create_png_image_raw(a, image_data, image_data_len, out_n, x, y, depth, color)) {
4631             STBI_FREE(final);
4632             return 0;
4633          }
4634          for (j=0; j < y; ++j) {
4635             for (i=0; i < x; ++i) {
4636                int out_y = j*yspc[p]+yorig[p];
4637                int out_x = i*xspc[p]+xorig[p];
4638                memcpy(final + out_y*a->s->img_x*out_bytes + out_x*out_bytes,
4639                       a->out + (j*x+i)*out_bytes, out_bytes);
4640             }
4641          }
4642          STBI_FREE(a->out);
4643          image_data += img_len;
4644          image_data_len -= img_len;
4645       }
4646    }
4647    a->out = final;
4648 
4649    return 1;
4650 }
4651 
stbi__compute_transparency(stbi__png * z,stbi_uc tc[3],int out_n)4652 static int stbi__compute_transparency(stbi__png *z, stbi_uc tc[3], int out_n)
4653 {
4654    stbi__context *s = z->s;
4655    stbi__uint32 i, pixel_count = s->img_x * s->img_y;
4656    stbi_uc *p = z->out;
4657 
4658    // compute color-based transparency, assuming we've
4659    // already got 255 as the alpha value in the output
4660    STBI_ASSERT(out_n == 2 || out_n == 4);
4661 
4662    if (out_n == 2) {
4663       for (i=0; i < pixel_count; ++i) {
4664          p[1] = (p[0] == tc[0] ? 0 : 255);
4665          p += 2;
4666       }
4667    } else {
4668       for (i=0; i < pixel_count; ++i) {
4669          if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2])
4670             p[3] = 0;
4671          p += 4;
4672       }
4673    }
4674    return 1;
4675 }
4676 
stbi__compute_transparency16(stbi__png * z,stbi__uint16 tc[3],int out_n)4677 static int stbi__compute_transparency16(stbi__png *z, stbi__uint16 tc[3], int out_n)
4678 {
4679    stbi__context *s = z->s;
4680    stbi__uint32 i, pixel_count = s->img_x * s->img_y;
4681    stbi__uint16 *p = (stbi__uint16*) z->out;
4682 
4683    // compute color-based transparency, assuming we've
4684    // already got 65535 as the alpha value in the output
4685    STBI_ASSERT(out_n == 2 || out_n == 4);
4686 
4687    if (out_n == 2) {
4688       for (i = 0; i < pixel_count; ++i) {
4689          p[1] = (p[0] == tc[0] ? 0 : 65535);
4690          p += 2;
4691       }
4692    } else {
4693       for (i = 0; i < pixel_count; ++i) {
4694          if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2])
4695             p[3] = 0;
4696          p += 4;
4697       }
4698    }
4699    return 1;
4700 }
4701 
stbi__expand_png_palette(stbi__png * a,stbi_uc * palette,int len,int pal_img_n)4702 static int stbi__expand_png_palette(stbi__png *a, stbi_uc *palette, int len, int pal_img_n)
4703 {
4704    stbi__uint32 i, pixel_count = a->s->img_x * a->s->img_y;
4705    stbi_uc *p, *temp_out, *orig = a->out;
4706 
4707    p = (stbi_uc *) stbi__malloc_mad2(pixel_count, pal_img_n, 0);
4708    if (p == NULL) return stbi__err("outofmem", "Out of memory");
4709 
4710    // between here and free(out) below, exitting would leak
4711    temp_out = p;
4712 
4713    if (pal_img_n == 3) {
4714       for (i=0; i < pixel_count; ++i) {
4715          int n = orig[i]*4;
4716          p[0] = palette[n  ];
4717          p[1] = palette[n+1];
4718          p[2] = palette[n+2];
4719          p += 3;
4720       }
4721    } else {
4722       for (i=0; i < pixel_count; ++i) {
4723          int n = orig[i]*4;
4724          p[0] = palette[n  ];
4725          p[1] = palette[n+1];
4726          p[2] = palette[n+2];
4727          p[3] = palette[n+3];
4728          p += 4;
4729       }
4730    }
4731    STBI_FREE(a->out);
4732    a->out = temp_out;
4733 
4734    STBI_NOTUSED(len);
4735 
4736    return 1;
4737 }
4738 
4739 static int stbi__unpremultiply_on_load = 0;
4740 static int stbi__de_iphone_flag = 0;
4741 
stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply)4742 STBIDEF void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply)
4743 {
4744    stbi__unpremultiply_on_load = flag_true_if_should_unpremultiply;
4745 }
4746 
stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert)4747 STBIDEF void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert)
4748 {
4749    stbi__de_iphone_flag = flag_true_if_should_convert;
4750 }
4751 
stbi__de_iphone(stbi__png * z)4752 static void stbi__de_iphone(stbi__png *z)
4753 {
4754    stbi__context *s = z->s;
4755    stbi__uint32 i, pixel_count = s->img_x * s->img_y;
4756    stbi_uc *p = z->out;
4757 
4758    if (s->img_out_n == 3) {  // convert bgr to rgb
4759       for (i=0; i < pixel_count; ++i) {
4760          stbi_uc t = p[0];
4761          p[0] = p[2];
4762          p[2] = t;
4763          p += 3;
4764       }
4765    } else {
4766       STBI_ASSERT(s->img_out_n == 4);
4767       if (stbi__unpremultiply_on_load) {
4768          // convert bgr to rgb and unpremultiply
4769          for (i=0; i < pixel_count; ++i) {
4770             stbi_uc a = p[3];
4771             stbi_uc t = p[0];
4772             if (a) {
4773                stbi_uc half = a / 2;
4774                p[0] = (p[2] * 255 + half) / a;
4775                p[1] = (p[1] * 255 + half) / a;
4776                p[2] = ( t   * 255 + half) / a;
4777             } else {
4778                p[0] = p[2];
4779                p[2] = t;
4780             }
4781             p += 4;
4782          }
4783       } else {
4784          // convert bgr to rgb
4785          for (i=0; i < pixel_count; ++i) {
4786             stbi_uc t = p[0];
4787             p[0] = p[2];
4788             p[2] = t;
4789             p += 4;
4790          }
4791       }
4792    }
4793 }
4794 
4795 #define STBI__PNG_TYPE(a,b,c,d)  (((unsigned) (a) << 24) + ((unsigned) (b) << 16) + ((unsigned) (c) << 8) + (unsigned) (d))
4796 
stbi__parse_png_file(stbi__png * z,int scan,int req_comp)4797 static int stbi__parse_png_file(stbi__png *z, int scan, int req_comp)
4798 {
4799    stbi_uc palette[1024], pal_img_n=0;
4800    stbi_uc has_trans=0, tc[3]={0};
4801    stbi__uint16 tc16[3];
4802    stbi__uint32 ioff=0, idata_limit=0, i, pal_len=0;
4803    int first=1,k,interlace=0, color=0, is_iphone=0;
4804    stbi__context *s = z->s;
4805 
4806    z->expanded = NULL;
4807    z->idata = NULL;
4808    z->out = NULL;
4809 
4810    if (!stbi__check_png_header(s)) return 0;
4811 
4812    if (scan == STBI__SCAN_type) return 1;
4813 
4814    for (;;) {
4815       stbi__pngchunk c = stbi__get_chunk_header(s);
4816       switch (c.type) {
4817          case STBI__PNG_TYPE('C','g','B','I'):
4818             is_iphone = 1;
4819             stbi__skip(s, c.length);
4820             break;
4821          case STBI__PNG_TYPE('I','H','D','R'): {
4822             int comp,filter;
4823             if (!first) return stbi__err("multiple IHDR","Corrupt PNG");
4824             first = 0;
4825             if (c.length != 13) return stbi__err("bad IHDR len","Corrupt PNG");
4826             s->img_x = stbi__get32be(s); if (s->img_x > (1 << 24)) return stbi__err("too large","Very large image (corrupt?)");
4827             s->img_y = stbi__get32be(s); if (s->img_y > (1 << 24)) return stbi__err("too large","Very large image (corrupt?)");
4828             z->depth = stbi__get8(s);  if (z->depth != 1 && z->depth != 2 && z->depth != 4 && z->depth != 8 && z->depth != 16)  return stbi__err("1/2/4/8/16-bit only","PNG not supported: 1/2/4/8/16-bit only");
4829             color = stbi__get8(s);  if (color > 6)         return stbi__err("bad ctype","Corrupt PNG");
4830             if (color == 3 && z->depth == 16)                  return stbi__err("bad ctype","Corrupt PNG");
4831             if (color == 3) pal_img_n = 3; else if (color & 1) return stbi__err("bad ctype","Corrupt PNG");
4832             comp  = stbi__get8(s);  if (comp) return stbi__err("bad comp method","Corrupt PNG");
4833             filter= stbi__get8(s);  if (filter) return stbi__err("bad filter method","Corrupt PNG");
4834             interlace = stbi__get8(s); if (interlace>1) return stbi__err("bad interlace method","Corrupt PNG");
4835             if (!s->img_x || !s->img_y) return stbi__err("0-pixel image","Corrupt PNG");
4836             if (!pal_img_n) {
4837                s->img_n = (color & 2 ? 3 : 1) + (color & 4 ? 1 : 0);
4838                if ((1 << 30) / s->img_x / s->img_n < s->img_y) return stbi__err("too large", "Image too large to decode");
4839                if (scan == STBI__SCAN_header) return 1;
4840             } else {
4841                // if paletted, then pal_n is our final components, and
4842                // img_n is # components to decompress/filter.
4843                s->img_n = 1;
4844                if ((1 << 30) / s->img_x / 4 < s->img_y) return stbi__err("too large","Corrupt PNG");
4845                // if SCAN_header, have to scan to see if we have a tRNS
4846             }
4847             break;
4848          }
4849 
4850          case STBI__PNG_TYPE('P','L','T','E'):  {
4851             if (first) return stbi__err("first not IHDR", "Corrupt PNG");
4852             if (c.length > 256*3) return stbi__err("invalid PLTE","Corrupt PNG");
4853             pal_len = c.length / 3;
4854             if (pal_len * 3 != c.length) return stbi__err("invalid PLTE","Corrupt PNG");
4855             for (i=0; i < pal_len; ++i) {
4856                palette[i*4+0] = stbi__get8(s);
4857                palette[i*4+1] = stbi__get8(s);
4858                palette[i*4+2] = stbi__get8(s);
4859                palette[i*4+3] = 255;
4860             }
4861             break;
4862          }
4863 
4864          case STBI__PNG_TYPE('t','R','N','S'): {
4865             if (first) return stbi__err("first not IHDR", "Corrupt PNG");
4866             if (z->idata) return stbi__err("tRNS after IDAT","Corrupt PNG");
4867             if (pal_img_n) {
4868                if (scan == STBI__SCAN_header) { s->img_n = 4; return 1; }
4869                if (pal_len == 0) return stbi__err("tRNS before PLTE","Corrupt PNG");
4870                if (c.length > pal_len) return stbi__err("bad tRNS len","Corrupt PNG");
4871                pal_img_n = 4;
4872                for (i=0; i < c.length; ++i)
4873                   palette[i*4+3] = stbi__get8(s);
4874             } else {
4875                if (!(s->img_n & 1)) return stbi__err("tRNS with alpha","Corrupt PNG");
4876                if (c.length != (stbi__uint32) s->img_n*2) return stbi__err("bad tRNS len","Corrupt PNG");
4877                has_trans = 1;
4878                if (z->depth == 16) {
4879                   for (k = 0; k < s->img_n; ++k) tc16[k] = (stbi__uint16)stbi__get16be(s); // copy the values as-is
4880                } else {
4881                   for (k = 0; k < s->img_n; ++k) tc[k] = (stbi_uc)(stbi__get16be(s) & 255) * stbi__depth_scale_table[z->depth]; // non 8-bit images will be larger
4882                }
4883             }
4884             break;
4885          }
4886 
4887          case STBI__PNG_TYPE('I','D','A','T'): {
4888             if (first) return stbi__err("first not IHDR", "Corrupt PNG");
4889             if (pal_img_n && !pal_len) return stbi__err("no PLTE","Corrupt PNG");
4890             if (scan == STBI__SCAN_header) { s->img_n = pal_img_n; return 1; }
4891             if ((int)(ioff + c.length) < (int)ioff) return 0;
4892             if (ioff + c.length > idata_limit) {
4893                stbi__uint32 idata_limit_old = idata_limit;
4894                stbi_uc *p;
4895                if (idata_limit == 0) idata_limit = c.length > 4096 ? c.length : 4096;
4896                while (ioff + c.length > idata_limit)
4897                   idata_limit *= 2;
4898                STBI_NOTUSED(idata_limit_old);
4899                p = (stbi_uc *) STBI_REALLOC_SIZED(z->idata, idata_limit_old, idata_limit); if (p == NULL) return stbi__err("outofmem", "Out of memory");
4900                z->idata = p;
4901             }
4902             if (!stbi__getn(s, z->idata+ioff,c.length)) return stbi__err("outofdata","Corrupt PNG");
4903             ioff += c.length;
4904             break;
4905          }
4906 
4907          case STBI__PNG_TYPE('I','E','N','D'): {
4908             stbi__uint32 raw_len, bpl;
4909             if (first) return stbi__err("first not IHDR", "Corrupt PNG");
4910             if (scan != STBI__SCAN_load) return 1;
4911             if (z->idata == NULL) return stbi__err("no IDAT","Corrupt PNG");
4912             // initial guess for decoded data size to avoid unnecessary reallocs
4913             bpl = (s->img_x * z->depth + 7) / 8; // bytes per line, per component
4914             raw_len = bpl * s->img_y * s->img_n /* pixels */ + s->img_y /* filter mode per row */;
4915             z->expanded = (stbi_uc *) stbi_zlib_decode_malloc_guesssize_headerflag((char *) z->idata, ioff, raw_len, (int *) &raw_len, !is_iphone);
4916             if (z->expanded == NULL) return 0; // zlib should set error
4917             STBI_FREE(z->idata); z->idata = NULL;
4918             if ((req_comp == s->img_n+1 && req_comp != 3 && !pal_img_n) || has_trans)
4919                s->img_out_n = s->img_n+1;
4920             else
4921                s->img_out_n = s->img_n;
4922             if (!stbi__create_png_image(z, z->expanded, raw_len, s->img_out_n, z->depth, color, interlace)) return 0;
4923             if (has_trans) {
4924                if (z->depth == 16) {
4925                   if (!stbi__compute_transparency16(z, tc16, s->img_out_n)) return 0;
4926                } else {
4927                   if (!stbi__compute_transparency(z, tc, s->img_out_n)) return 0;
4928                }
4929             }
4930             if (is_iphone && stbi__de_iphone_flag && s->img_out_n > 2)
4931                stbi__de_iphone(z);
4932             if (pal_img_n) {
4933                // pal_img_n == 3 or 4
4934                s->img_n = pal_img_n; // record the actual colors we had
4935                s->img_out_n = pal_img_n;
4936                if (req_comp >= 3) s->img_out_n = req_comp;
4937                if (!stbi__expand_png_palette(z, palette, pal_len, s->img_out_n))
4938                   return 0;
4939             } else if (has_trans) {
4940                // non-paletted image with tRNS -> source image has (constant) alpha
4941                ++s->img_n;
4942             }
4943             STBI_FREE(z->expanded); z->expanded = NULL;
4944             return 1;
4945          }
4946 
4947          default:
4948             // if critical, fail
4949             if (first) return stbi__err("first not IHDR", "Corrupt PNG");
4950             if ((c.type & (1 << 29)) == 0) {
4951                #ifndef STBI_NO_FAILURE_STRINGS
4952                // not threadsafe
4953                static char invalid_chunk[] = "XXXX PNG chunk not known";
4954                invalid_chunk[0] = STBI__BYTECAST(c.type >> 24);
4955                invalid_chunk[1] = STBI__BYTECAST(c.type >> 16);
4956                invalid_chunk[2] = STBI__BYTECAST(c.type >>  8);
4957                invalid_chunk[3] = STBI__BYTECAST(c.type >>  0);
4958                #endif
4959                return stbi__err(invalid_chunk, "PNG not supported: unknown PNG chunk type");
4960             }
4961             stbi__skip(s, c.length);
4962             break;
4963       }
4964       // end of PNG chunk, read and skip CRC
4965       stbi__get32be(s);
4966    }
4967 }
4968 
stbi__do_png(stbi__png * p,int * x,int * y,int * n,int req_comp,stbi__result_info * ri)4969 static void *stbi__do_png(stbi__png *p, int *x, int *y, int *n, int req_comp, stbi__result_info *ri)
4970 {
4971    void *result=NULL;
4972    if (req_comp < 0 || req_comp > 4) return stbi__errpuc("bad req_comp", "Internal error");
4973    if (stbi__parse_png_file(p, STBI__SCAN_load, req_comp)) {
4974       if (p->depth < 8)
4975          ri->bits_per_channel = 8;
4976       else
4977          ri->bits_per_channel = p->depth;
4978       result = p->out;
4979       p->out = NULL;
4980       if (req_comp && req_comp != p->s->img_out_n) {
4981          if (ri->bits_per_channel == 8)
4982             result = stbi__convert_format((unsigned char *) result, p->s->img_out_n, req_comp, p->s->img_x, p->s->img_y);
4983          else
4984             result = stbi__convert_format16((stbi__uint16 *) result, p->s->img_out_n, req_comp, p->s->img_x, p->s->img_y);
4985          p->s->img_out_n = req_comp;
4986          if (result == NULL) return result;
4987       }
4988       *x = p->s->img_x;
4989       *y = p->s->img_y;
4990       if (n) *n = p->s->img_n;
4991    }
4992    STBI_FREE(p->out);      p->out      = NULL;
4993    STBI_FREE(p->expanded); p->expanded = NULL;
4994    STBI_FREE(p->idata);    p->idata    = NULL;
4995 
4996    return result;
4997 }
4998 
stbi__png_load(stbi__context * s,int * x,int * y,int * comp,int req_comp,stbi__result_info * ri)4999 static void *stbi__png_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
5000 {
5001    stbi__png p;
5002    p.s = s;
5003    return stbi__do_png(&p, x,y,comp,req_comp, ri);
5004 }
5005 
stbi__png_test(stbi__context * s)5006 static int stbi__png_test(stbi__context *s)
5007 {
5008    int r;
5009    r = stbi__check_png_header(s);
5010    stbi__rewind(s);
5011    return r;
5012 }
5013 
stbi__png_info_raw(stbi__png * p,int * x,int * y,int * comp)5014 static int stbi__png_info_raw(stbi__png *p, int *x, int *y, int *comp)
5015 {
5016    if (!stbi__parse_png_file(p, STBI__SCAN_header, 0)) {
5017       stbi__rewind( p->s );
5018       return 0;
5019    }
5020    if (x) *x = p->s->img_x;
5021    if (y) *y = p->s->img_y;
5022    if (comp) *comp = p->s->img_n;
5023    return 1;
5024 }
5025 
stbi__png_info(stbi__context * s,int * x,int * y,int * comp)5026 static int stbi__png_info(stbi__context *s, int *x, int *y, int *comp)
5027 {
5028    stbi__png p;
5029    p.s = s;
5030    return stbi__png_info_raw(&p, x, y, comp);
5031 }
5032 
stbi__png_is16(stbi__context * s)5033 static int stbi__png_is16(stbi__context *s)
5034 {
5035    stbi__png p;
5036    p.s = s;
5037    if (!stbi__png_info_raw(&p, NULL, NULL, NULL))
5038 	   return 0;
5039    if (p.depth != 16) {
5040       stbi__rewind(p.s);
5041       return 0;
5042    }
5043    return 1;
5044 }
5045 #endif
5046 
5047 // Microsoft/Windows BMP image
5048 
5049 #ifndef STBI_NO_BMP
stbi__bmp_test_raw(stbi__context * s)5050 static int stbi__bmp_test_raw(stbi__context *s)
5051 {
5052    int r;
5053    int sz;
5054    if (stbi__get8(s) != 'B') return 0;
5055    if (stbi__get8(s) != 'M') return 0;
5056    stbi__get32le(s); // discard filesize
5057    stbi__get16le(s); // discard reserved
5058    stbi__get16le(s); // discard reserved
5059    stbi__get32le(s); // discard data offset
5060    sz = stbi__get32le(s);
5061    r = (sz == 12 || sz == 40 || sz == 56 || sz == 108 || sz == 124);
5062    return r;
5063 }
5064 
stbi__bmp_test(stbi__context * s)5065 static int stbi__bmp_test(stbi__context *s)
5066 {
5067    int r = stbi__bmp_test_raw(s);
5068    stbi__rewind(s);
5069    return r;
5070 }
5071 
5072 
5073 // returns 0..31 for the highest set bit
stbi__high_bit(unsigned int z)5074 static int stbi__high_bit(unsigned int z)
5075 {
5076    int n=0;
5077    if (z == 0) return -1;
5078    if (z >= 0x10000) { n += 16; z >>= 16; }
5079    if (z >= 0x00100) { n +=  8; z >>=  8; }
5080    if (z >= 0x00010) { n +=  4; z >>=  4; }
5081    if (z >= 0x00004) { n +=  2; z >>=  2; }
5082    if (z >= 0x00002) { n +=  1; z >>=  1; }
5083    return n;
5084 }
5085 
stbi__bitcount(unsigned int a)5086 static int stbi__bitcount(unsigned int a)
5087 {
5088    a = (a & 0x55555555) + ((a >>  1) & 0x55555555); // max 2
5089    a = (a & 0x33333333) + ((a >>  2) & 0x33333333); // max 4
5090    a = (a + (a >> 4)) & 0x0f0f0f0f; // max 8 per 4, now 8 bits
5091    a = (a + (a >> 8)); // max 16 per 8 bits
5092    a = (a + (a >> 16)); // max 32 per 8 bits
5093    return a & 0xff;
5094 }
5095 
5096 // extract an arbitrarily-aligned N-bit value (N=bits)
5097 // from v, and then make it 8-bits long and fractionally
5098 // extend it to full full range.
stbi__shiftsigned(unsigned int v,int shift,int bits)5099 static int stbi__shiftsigned(unsigned int v, int shift, int bits)
5100 {
5101    static unsigned int mul_table[9] = {
5102       0,
5103       0xff/*0b11111111*/, 0x55/*0b01010101*/, 0x49/*0b01001001*/, 0x11/*0b00010001*/,
5104       0x21/*0b00100001*/, 0x41/*0b01000001*/, 0x81/*0b10000001*/, 0x01/*0b00000001*/,
5105    };
5106    static unsigned int shift_table[9] = {
5107       0, 0,0,1,0,2,4,6,0,
5108    };
5109    if (shift < 0)
5110       v <<= -shift;
5111    else
5112       v >>= shift;
5113    STBI_ASSERT(v >= 0 && v < 256);
5114    v >>= (8-bits);
5115    STBI_ASSERT(bits >= 0 && bits <= 8);
5116    return (int) ((unsigned) v * mul_table[bits]) >> shift_table[bits];
5117 }
5118 
5119 typedef struct
5120 {
5121    int bpp, offset, hsz;
5122    unsigned int mr,mg,mb,ma, all_a;
5123 } stbi__bmp_data;
5124 
stbi__bmp_parse_header(stbi__context * s,stbi__bmp_data * info)5125 static void *stbi__bmp_parse_header(stbi__context *s, stbi__bmp_data *info)
5126 {
5127    int hsz;
5128    if (stbi__get8(s) != 'B' || stbi__get8(s) != 'M') return stbi__errpuc("not BMP", "Corrupt BMP");
5129    stbi__get32le(s); // discard filesize
5130    stbi__get16le(s); // discard reserved
5131    stbi__get16le(s); // discard reserved
5132    info->offset = stbi__get32le(s);
5133    info->hsz = hsz = stbi__get32le(s);
5134    info->mr = info->mg = info->mb = info->ma = 0;
5135 
5136    if (hsz != 12 && hsz != 40 && hsz != 56 && hsz != 108 && hsz != 124) return stbi__errpuc("unknown BMP", "BMP type not supported: unknown");
5137    if (hsz == 12) {
5138       s->img_x = stbi__get16le(s);
5139       s->img_y = stbi__get16le(s);
5140    } else {
5141       s->img_x = stbi__get32le(s);
5142       s->img_y = stbi__get32le(s);
5143    }
5144    if (stbi__get16le(s) != 1) return stbi__errpuc("bad BMP", "bad BMP");
5145    info->bpp = stbi__get16le(s);
5146    if (hsz != 12) {
5147       int compress = stbi__get32le(s);
5148       if (compress == 1 || compress == 2) return stbi__errpuc("BMP RLE", "BMP type not supported: RLE");
5149       stbi__get32le(s); // discard sizeof
5150       stbi__get32le(s); // discard hres
5151       stbi__get32le(s); // discard vres
5152       stbi__get32le(s); // discard colorsused
5153       stbi__get32le(s); // discard max important
5154       if (hsz == 40 || hsz == 56) {
5155          if (hsz == 56) {
5156             stbi__get32le(s);
5157             stbi__get32le(s);
5158             stbi__get32le(s);
5159             stbi__get32le(s);
5160          }
5161          if (info->bpp == 16 || info->bpp == 32) {
5162             if (compress == 0) {
5163                if (info->bpp == 32) {
5164                   info->mr = 0xffu << 16;
5165                   info->mg = 0xffu <<  8;
5166                   info->mb = 0xffu <<  0;
5167                   info->ma = 0xffu << 24;
5168                   info->all_a = 0; // if all_a is 0 at end, then we loaded alpha channel but it was all 0
5169                } else {
5170                   info->mr = 31u << 10;
5171                   info->mg = 31u <<  5;
5172                   info->mb = 31u <<  0;
5173                }
5174             } else if (compress == 3) {
5175                info->mr = stbi__get32le(s);
5176                info->mg = stbi__get32le(s);
5177                info->mb = stbi__get32le(s);
5178                // not documented, but generated by photoshop and handled by mspaint
5179                if (info->mr == info->mg && info->mg == info->mb) {
5180                   // ?!?!?
5181                   return stbi__errpuc("bad BMP", "bad BMP");
5182                }
5183             } else
5184                return stbi__errpuc("bad BMP", "bad BMP");
5185          }
5186       } else {
5187          int i;
5188          if (hsz != 108 && hsz != 124)
5189             return stbi__errpuc("bad BMP", "bad BMP");
5190          info->mr = stbi__get32le(s);
5191          info->mg = stbi__get32le(s);
5192          info->mb = stbi__get32le(s);
5193          info->ma = stbi__get32le(s);
5194          stbi__get32le(s); // discard color space
5195          for (i=0; i < 12; ++i)
5196             stbi__get32le(s); // discard color space parameters
5197          if (hsz == 124) {
5198             stbi__get32le(s); // discard rendering intent
5199             stbi__get32le(s); // discard offset of profile data
5200             stbi__get32le(s); // discard size of profile data
5201             stbi__get32le(s); // discard reserved
5202          }
5203       }
5204    }
5205    return (void *) 1;
5206 }
5207 
5208 
stbi__bmp_load(stbi__context * s,int * x,int * y,int * comp,int req_comp,stbi__result_info * ri)5209 static void *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
5210 {
5211    stbi_uc *out;
5212    unsigned int mr=0,mg=0,mb=0,ma=0, all_a;
5213    stbi_uc pal[256][4];
5214    int psize=0,i,j,width;
5215    int flip_vertically, pad, target;
5216    stbi__bmp_data info;
5217    STBI_NOTUSED(ri);
5218 
5219    info.all_a = 255;
5220    if (stbi__bmp_parse_header(s, &info) == NULL)
5221       return NULL; // error code already set
5222 
5223    flip_vertically = ((int) s->img_y) > 0;
5224    s->img_y = abs((int) s->img_y);
5225 
5226    mr = info.mr;
5227    mg = info.mg;
5228    mb = info.mb;
5229    ma = info.ma;
5230    all_a = info.all_a;
5231 
5232    if (info.hsz == 12) {
5233       if (info.bpp < 24)
5234          psize = (info.offset - 14 - 24) / 3;
5235    } else {
5236       if (info.bpp < 16)
5237          psize = (info.offset - 14 - info.hsz) >> 2;
5238    }
5239 
5240    s->img_n = ma ? 4 : 3;
5241    if (req_comp && req_comp >= 3) // we can directly decode 3 or 4
5242       target = req_comp;
5243    else
5244       target = s->img_n; // if they want monochrome, we'll post-convert
5245 
5246    // sanity-check size
5247    if (!stbi__mad3sizes_valid(target, s->img_x, s->img_y, 0))
5248       return stbi__errpuc("too large", "Corrupt BMP");
5249 
5250    out = (stbi_uc *) stbi__malloc_mad3(target, s->img_x, s->img_y, 0);
5251    if (!out) return stbi__errpuc("outofmem", "Out of memory");
5252    if (info.bpp < 16) {
5253       int z=0;
5254       if (psize == 0 || psize > 256) { STBI_FREE(out); return stbi__errpuc("invalid", "Corrupt BMP"); }
5255       for (i=0; i < psize; ++i) {
5256          pal[i][2] = stbi__get8(s);
5257          pal[i][1] = stbi__get8(s);
5258          pal[i][0] = stbi__get8(s);
5259          if (info.hsz != 12) stbi__get8(s);
5260          pal[i][3] = 255;
5261       }
5262       stbi__skip(s, info.offset - 14 - info.hsz - psize * (info.hsz == 12 ? 3 : 4));
5263       if (info.bpp == 1) width = (s->img_x + 7) >> 3;
5264       else if (info.bpp == 4) width = (s->img_x + 1) >> 1;
5265       else if (info.bpp == 8) width = s->img_x;
5266       else { STBI_FREE(out); return stbi__errpuc("bad bpp", "Corrupt BMP"); }
5267       pad = (-width)&3;
5268       if (info.bpp == 1) {
5269          for (j=0; j < (int) s->img_y; ++j) {
5270             int bit_offset = 7, v = stbi__get8(s);
5271             for (i=0; i < (int) s->img_x; ++i) {
5272                int color = (v>>bit_offset)&0x1;
5273                out[z++] = pal[color][0];
5274                out[z++] = pal[color][1];
5275                out[z++] = pal[color][2];
5276                if (target == 4) out[z++] = 255;
5277                if (i+1 == (int) s->img_x) break;
5278                if((--bit_offset) < 0) {
5279                   bit_offset = 7;
5280                   v = stbi__get8(s);
5281                }
5282             }
5283             stbi__skip(s, pad);
5284          }
5285       } else {
5286          for (j=0; j < (int) s->img_y; ++j) {
5287             for (i=0; i < (int) s->img_x; i += 2) {
5288                int v=stbi__get8(s),v2=0;
5289                if (info.bpp == 4) {
5290                   v2 = v & 15;
5291                   v >>= 4;
5292                }
5293                out[z++] = pal[v][0];
5294                out[z++] = pal[v][1];
5295                out[z++] = pal[v][2];
5296                if (target == 4) out[z++] = 255;
5297                if (i+1 == (int) s->img_x) break;
5298                v = (info.bpp == 8) ? stbi__get8(s) : v2;
5299                out[z++] = pal[v][0];
5300                out[z++] = pal[v][1];
5301                out[z++] = pal[v][2];
5302                if (target == 4) out[z++] = 255;
5303             }
5304             stbi__skip(s, pad);
5305          }
5306       }
5307    } else {
5308       int rshift=0,gshift=0,bshift=0,ashift=0,rcount=0,gcount=0,bcount=0,acount=0;
5309       int z = 0;
5310       int easy=0;
5311       stbi__skip(s, info.offset - 14 - info.hsz);
5312       if (info.bpp == 24) width = 3 * s->img_x;
5313       else if (info.bpp == 16) width = 2*s->img_x;
5314       else /* bpp = 32 and pad = 0 */ width=0;
5315       pad = (-width) & 3;
5316       if (info.bpp == 24) {
5317          easy = 1;
5318       } else if (info.bpp == 32) {
5319          if (mb == 0xff && mg == 0xff00 && mr == 0x00ff0000 && ma == 0xff000000)
5320             easy = 2;
5321       }
5322       if (!easy) {
5323          if (!mr || !mg || !mb) { STBI_FREE(out); return stbi__errpuc("bad masks", "Corrupt BMP"); }
5324          // right shift amt to put high bit in position #7
5325          rshift = stbi__high_bit(mr)-7; rcount = stbi__bitcount(mr);
5326          gshift = stbi__high_bit(mg)-7; gcount = stbi__bitcount(mg);
5327          bshift = stbi__high_bit(mb)-7; bcount = stbi__bitcount(mb);
5328          ashift = stbi__high_bit(ma)-7; acount = stbi__bitcount(ma);
5329       }
5330       for (j=0; j < (int) s->img_y; ++j) {
5331          if (easy) {
5332             for (i=0; i < (int) s->img_x; ++i) {
5333                unsigned char a;
5334                out[z+2] = stbi__get8(s);
5335                out[z+1] = stbi__get8(s);
5336                out[z+0] = stbi__get8(s);
5337                z += 3;
5338                a = (easy == 2 ? stbi__get8(s) : 255);
5339                all_a |= a;
5340                if (target == 4) out[z++] = a;
5341             }
5342          } else {
5343             int bpp = info.bpp;
5344             for (i=0; i < (int) s->img_x; ++i) {
5345                stbi__uint32 v = (bpp == 16 ? (stbi__uint32) stbi__get16le(s) : stbi__get32le(s));
5346                unsigned int a;
5347                out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mr, rshift, rcount));
5348                out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mg, gshift, gcount));
5349                out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mb, bshift, bcount));
5350                a = (ma ? stbi__shiftsigned(v & ma, ashift, acount) : 255);
5351                all_a |= a;
5352                if (target == 4) out[z++] = STBI__BYTECAST(a);
5353             }
5354          }
5355          stbi__skip(s, pad);
5356       }
5357    }
5358 
5359    // if alpha channel is all 0s, replace with all 255s
5360    if (target == 4 && all_a == 0)
5361       for (i=4*s->img_x*s->img_y-1; i >= 0; i -= 4)
5362          out[i] = 255;
5363 
5364    if (flip_vertically) {
5365       stbi_uc t;
5366       for (j=0; j < (int) s->img_y>>1; ++j) {
5367          stbi_uc *p1 = out +      j     *s->img_x*target;
5368          stbi_uc *p2 = out + (s->img_y-1-j)*s->img_x*target;
5369          for (i=0; i < (int) s->img_x*target; ++i) {
5370             t = p1[i]; p1[i] = p2[i]; p2[i] = t;
5371          }
5372       }
5373    }
5374 
5375    if (req_comp && req_comp != target) {
5376       out = stbi__convert_format(out, target, req_comp, s->img_x, s->img_y);
5377       if (out == NULL) return out; // stbi__convert_format frees input on failure
5378    }
5379 
5380    *x = s->img_x;
5381    *y = s->img_y;
5382    if (comp) *comp = s->img_n;
5383    return out;
5384 }
5385 #endif
5386 
5387 // Targa Truevision - TGA
5388 // by Jonathan Dummer
5389 #ifndef STBI_NO_TGA
5390 // returns STBI_rgb or whatever, 0 on error
stbi__tga_get_comp(int bits_per_pixel,int is_grey,int * is_rgb16)5391 static int stbi__tga_get_comp(int bits_per_pixel, int is_grey, int* is_rgb16)
5392 {
5393    // only RGB or RGBA (incl. 16bit) or grey allowed
5394    if (is_rgb16) *is_rgb16 = 0;
5395    switch(bits_per_pixel) {
5396       case 8:  return STBI_grey;
5397       case 16: if(is_grey) return STBI_grey_alpha;
5398                // fallthrough
5399       case 15: if(is_rgb16) *is_rgb16 = 1;
5400                return STBI_rgb;
5401       case 24: // fallthrough
5402       case 32: return bits_per_pixel/8;
5403       default: return 0;
5404    }
5405 }
5406 
stbi__tga_info(stbi__context * s,int * x,int * y,int * comp)5407 static int stbi__tga_info(stbi__context *s, int *x, int *y, int *comp)
5408 {
5409     int tga_w, tga_h, tga_comp, tga_image_type, tga_bits_per_pixel, tga_colormap_bpp;
5410     int sz, tga_colormap_type;
5411     stbi__get8(s);                   // discard Offset
5412     tga_colormap_type = stbi__get8(s); // colormap type
5413     if( tga_colormap_type > 1 ) {
5414         stbi__rewind(s);
5415         return 0;      // only RGB or indexed allowed
5416     }
5417     tga_image_type = stbi__get8(s); // image type
5418     if ( tga_colormap_type == 1 ) { // colormapped (paletted) image
5419         if (tga_image_type != 1 && tga_image_type != 9) {
5420             stbi__rewind(s);
5421             return 0;
5422         }
5423         stbi__skip(s,4);       // skip index of first colormap entry and number of entries
5424         sz = stbi__get8(s);    //   check bits per palette color entry
5425         if ( (sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32) ) {
5426             stbi__rewind(s);
5427             return 0;
5428         }
5429         stbi__skip(s,4);       // skip image x and y origin
5430         tga_colormap_bpp = sz;
5431     } else { // "normal" image w/o colormap - only RGB or grey allowed, +/- RLE
5432         if ( (tga_image_type != 2) && (tga_image_type != 3) && (tga_image_type != 10) && (tga_image_type != 11) ) {
5433             stbi__rewind(s);
5434             return 0; // only RGB or grey allowed, +/- RLE
5435         }
5436         stbi__skip(s,9); // skip colormap specification and image x/y origin
5437         tga_colormap_bpp = 0;
5438     }
5439     tga_w = stbi__get16le(s);
5440     if( tga_w < 1 ) {
5441         stbi__rewind(s);
5442         return 0;   // test width
5443     }
5444     tga_h = stbi__get16le(s);
5445     if( tga_h < 1 ) {
5446         stbi__rewind(s);
5447         return 0;   // test height
5448     }
5449     tga_bits_per_pixel = stbi__get8(s); // bits per pixel
5450     stbi__get8(s); // ignore alpha bits
5451     if (tga_colormap_bpp != 0) {
5452         if((tga_bits_per_pixel != 8) && (tga_bits_per_pixel != 16)) {
5453             // when using a colormap, tga_bits_per_pixel is the size of the indexes
5454             // I don't think anything but 8 or 16bit indexes makes sense
5455             stbi__rewind(s);
5456             return 0;
5457         }
5458         tga_comp = stbi__tga_get_comp(tga_colormap_bpp, 0, NULL);
5459     } else {
5460         tga_comp = stbi__tga_get_comp(tga_bits_per_pixel, (tga_image_type == 3) || (tga_image_type == 11), NULL);
5461     }
5462     if(!tga_comp) {
5463       stbi__rewind(s);
5464       return 0;
5465     }
5466     if (x) *x = tga_w;
5467     if (y) *y = tga_h;
5468     if (comp) *comp = tga_comp;
5469     return 1;                   // seems to have passed everything
5470 }
5471 
stbi__tga_test(stbi__context * s)5472 static int stbi__tga_test(stbi__context *s)
5473 {
5474    int res = 0;
5475    int sz, tga_color_type;
5476    stbi__get8(s);      //   discard Offset
5477    tga_color_type = stbi__get8(s);   //   color type
5478    if ( tga_color_type > 1 ) goto errorEnd;   //   only RGB or indexed allowed
5479    sz = stbi__get8(s);   //   image type
5480    if ( tga_color_type == 1 ) { // colormapped (paletted) image
5481       if (sz != 1 && sz != 9) goto errorEnd; // colortype 1 demands image type 1 or 9
5482       stbi__skip(s,4);       // skip index of first colormap entry and number of entries
5483       sz = stbi__get8(s);    //   check bits per palette color entry
5484       if ( (sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32) ) goto errorEnd;
5485       stbi__skip(s,4);       // skip image x and y origin
5486    } else { // "normal" image w/o colormap
5487       if ( (sz != 2) && (sz != 3) && (sz != 10) && (sz != 11) ) goto errorEnd; // only RGB or grey allowed, +/- RLE
5488       stbi__skip(s,9); // skip colormap specification and image x/y origin
5489    }
5490    if ( stbi__get16le(s) < 1 ) goto errorEnd;      //   test width
5491    if ( stbi__get16le(s) < 1 ) goto errorEnd;      //   test height
5492    sz = stbi__get8(s);   //   bits per pixel
5493    if ( (tga_color_type == 1) && (sz != 8) && (sz != 16) ) goto errorEnd; // for colormapped images, bpp is size of an index
5494    if ( (sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32) ) goto errorEnd;
5495 
5496    res = 1; // if we got this far, everything's good and we can return 1 instead of 0
5497 
5498 errorEnd:
5499    stbi__rewind(s);
5500    return res;
5501 }
5502 
5503 // read 16bit value and convert to 24bit RGB
stbi__tga_read_rgb16(stbi__context * s,stbi_uc * out)5504 static void stbi__tga_read_rgb16(stbi__context *s, stbi_uc* out)
5505 {
5506    stbi__uint16 px = (stbi__uint16)stbi__get16le(s);
5507    stbi__uint16 fiveBitMask = 31;
5508    // we have 3 channels with 5bits each
5509    int r = (px >> 10) & fiveBitMask;
5510    int g = (px >> 5) & fiveBitMask;
5511    int b = px & fiveBitMask;
5512    // Note that this saves the data in RGB(A) order, so it doesn't need to be swapped later
5513    out[0] = (stbi_uc)((r * 255)/31);
5514    out[1] = (stbi_uc)((g * 255)/31);
5515    out[2] = (stbi_uc)((b * 255)/31);
5516 
5517    // some people claim that the most significant bit might be used for alpha
5518    // (possibly if an alpha-bit is set in the "image descriptor byte")
5519    // but that only made 16bit test images completely translucent..
5520    // so let's treat all 15 and 16bit TGAs as RGB with no alpha.
5521 }
5522 
stbi__tga_load(stbi__context * s,int * x,int * y,int * comp,int req_comp,stbi__result_info * ri)5523 static void *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
5524 {
5525    //   read in the TGA header stuff
5526    int tga_offset = stbi__get8(s);
5527    int tga_indexed = stbi__get8(s);
5528    int tga_image_type = stbi__get8(s);
5529    int tga_is_RLE = 0;
5530    int tga_palette_start = stbi__get16le(s);
5531    int tga_palette_len = stbi__get16le(s);
5532    int tga_palette_bits = stbi__get8(s);
5533    int tga_x_origin = stbi__get16le(s);
5534    int tga_y_origin = stbi__get16le(s);
5535    int tga_width = stbi__get16le(s);
5536    int tga_height = stbi__get16le(s);
5537    int tga_bits_per_pixel = stbi__get8(s);
5538    int tga_comp, tga_rgb16=0;
5539    int tga_inverted = stbi__get8(s);
5540    // int tga_alpha_bits = tga_inverted & 15; // the 4 lowest bits - unused (useless?)
5541    //   image data
5542    unsigned char *tga_data;
5543    unsigned char *tga_palette = NULL;
5544    int i, j;
5545    unsigned char raw_data[4] = {0};
5546    int RLE_count = 0;
5547    int RLE_repeating = 0;
5548    int read_next_pixel = 1;
5549    STBI_NOTUSED(ri);
5550 
5551    //   do a tiny bit of precessing
5552    if ( tga_image_type >= 8 )
5553    {
5554       tga_image_type -= 8;
5555       tga_is_RLE = 1;
5556    }
5557    tga_inverted = 1 - ((tga_inverted >> 5) & 1);
5558 
5559    //   If I'm paletted, then I'll use the number of bits from the palette
5560    if ( tga_indexed ) tga_comp = stbi__tga_get_comp(tga_palette_bits, 0, &tga_rgb16);
5561    else tga_comp = stbi__tga_get_comp(tga_bits_per_pixel, (tga_image_type == 3), &tga_rgb16);
5562 
5563    if(!tga_comp) // shouldn't really happen, stbi__tga_test() should have ensured basic consistency
5564       return stbi__errpuc("bad format", "Can't find out TGA pixelformat");
5565 
5566    //   tga info
5567    *x = tga_width;
5568    *y = tga_height;
5569    if (comp) *comp = tga_comp;
5570 
5571    if (!stbi__mad3sizes_valid(tga_width, tga_height, tga_comp, 0))
5572       return stbi__errpuc("too large", "Corrupt TGA");
5573 
5574    tga_data = (unsigned char*)stbi__malloc_mad3(tga_width, tga_height, tga_comp, 0);
5575    if (!tga_data) return stbi__errpuc("outofmem", "Out of memory");
5576 
5577    // skip to the data's starting position (offset usually = 0)
5578    stbi__skip(s, tga_offset );
5579 
5580    if ( !tga_indexed && !tga_is_RLE && !tga_rgb16 ) {
5581       for (i=0; i < tga_height; ++i) {
5582          int row = tga_inverted ? tga_height -i - 1 : i;
5583          stbi_uc *tga_row = tga_data + row*tga_width*tga_comp;
5584          stbi__getn(s, tga_row, tga_width * tga_comp);
5585       }
5586    } else  {
5587       //   do I need to load a palette?
5588       if ( tga_indexed)
5589       {
5590          //   any data to skip? (offset usually = 0)
5591          stbi__skip(s, tga_palette_start );
5592          //   load the palette
5593          tga_palette = (unsigned char*)stbi__malloc_mad2(tga_palette_len, tga_comp, 0);
5594          if (!tga_palette) {
5595             STBI_FREE(tga_data);
5596             return stbi__errpuc("outofmem", "Out of memory");
5597          }
5598          if (tga_rgb16) {
5599             stbi_uc *pal_entry = tga_palette;
5600             STBI_ASSERT(tga_comp == STBI_rgb);
5601             for (i=0; i < tga_palette_len; ++i) {
5602                stbi__tga_read_rgb16(s, pal_entry);
5603                pal_entry += tga_comp;
5604             }
5605          } else if (!stbi__getn(s, tga_palette, tga_palette_len * tga_comp)) {
5606                STBI_FREE(tga_data);
5607                STBI_FREE(tga_palette);
5608                return stbi__errpuc("bad palette", "Corrupt TGA");
5609          }
5610       }
5611       //   load the data
5612       for (i=0; i < tga_width * tga_height; ++i)
5613       {
5614          //   if I'm in RLE mode, do I need to get a RLE stbi__pngchunk?
5615          if ( tga_is_RLE )
5616          {
5617             if ( RLE_count == 0 )
5618             {
5619                //   yep, get the next byte as a RLE command
5620                int RLE_cmd = stbi__get8(s);
5621                RLE_count = 1 + (RLE_cmd & 127);
5622                RLE_repeating = RLE_cmd >> 7;
5623                read_next_pixel = 1;
5624             } else if ( !RLE_repeating )
5625             {
5626                read_next_pixel = 1;
5627             }
5628          } else
5629          {
5630             read_next_pixel = 1;
5631          }
5632          //   OK, if I need to read a pixel, do it now
5633          if ( read_next_pixel )
5634          {
5635             //   load however much data we did have
5636             if ( tga_indexed )
5637             {
5638                // read in index, then perform the lookup
5639                int pal_idx = (tga_bits_per_pixel == 8) ? stbi__get8(s) : stbi__get16le(s);
5640                if ( pal_idx >= tga_palette_len ) {
5641                   // invalid index
5642                   pal_idx = 0;
5643                }
5644                pal_idx *= tga_comp;
5645                for (j = 0; j < tga_comp; ++j) {
5646                   raw_data[j] = tga_palette[pal_idx+j];
5647                }
5648             } else if(tga_rgb16) {
5649                STBI_ASSERT(tga_comp == STBI_rgb);
5650                stbi__tga_read_rgb16(s, raw_data);
5651             } else {
5652                //   read in the data raw
5653                for (j = 0; j < tga_comp; ++j) {
5654                   raw_data[j] = stbi__get8(s);
5655                }
5656             }
5657             //   clear the reading flag for the next pixel
5658             read_next_pixel = 0;
5659          } // end of reading a pixel
5660 
5661          // copy data
5662          for (j = 0; j < tga_comp; ++j)
5663            tga_data[i*tga_comp+j] = raw_data[j];
5664 
5665          //   in case we're in RLE mode, keep counting down
5666          --RLE_count;
5667       }
5668       //   do I need to invert the image?
5669       if ( tga_inverted )
5670       {
5671          for (j = 0; j*2 < tga_height; ++j)
5672          {
5673             int index1 = j * tga_width * tga_comp;
5674             int index2 = (tga_height - 1 - j) * tga_width * tga_comp;
5675             for (i = tga_width * tga_comp; i > 0; --i)
5676             {
5677                unsigned char temp = tga_data[index1];
5678                tga_data[index1] = tga_data[index2];
5679                tga_data[index2] = temp;
5680                ++index1;
5681                ++index2;
5682             }
5683          }
5684       }
5685       //   clear my palette, if I had one
5686       if ( tga_palette != NULL )
5687       {
5688          STBI_FREE( tga_palette );
5689       }
5690    }
5691 
5692    // swap RGB - if the source data was RGB16, it already is in the right order
5693    if (tga_comp >= 3 && !tga_rgb16)
5694    {
5695       unsigned char* tga_pixel = tga_data;
5696       for (i=0; i < tga_width * tga_height; ++i)
5697       {
5698          unsigned char temp = tga_pixel[0];
5699          tga_pixel[0] = tga_pixel[2];
5700          tga_pixel[2] = temp;
5701          tga_pixel += tga_comp;
5702       }
5703    }
5704 
5705    // convert to target component count
5706    if (req_comp && req_comp != tga_comp)
5707       tga_data = stbi__convert_format(tga_data, tga_comp, req_comp, tga_width, tga_height);
5708 
5709    //   the things I do to get rid of an error message, and yet keep
5710    //   Microsoft's C compilers happy... [8^(
5711    tga_palette_start = tga_palette_len = tga_palette_bits =
5712          tga_x_origin = tga_y_origin = 0;
5713    //   OK, done
5714    return tga_data;
5715 }
5716 #endif
5717 
5718 // *************************************************************************************************
5719 // Photoshop PSD loader -- PD by Thatcher Ulrich, integration by Nicolas Schulz, tweaked by STB
5720 
5721 #ifndef STBI_NO_PSD
stbi__psd_test(stbi__context * s)5722 static int stbi__psd_test(stbi__context *s)
5723 {
5724    int r = (stbi__get32be(s) == 0x38425053);
5725    stbi__rewind(s);
5726    return r;
5727 }
5728 
stbi__psd_decode_rle(stbi__context * s,stbi_uc * p,int pixelCount)5729 static int stbi__psd_decode_rle(stbi__context *s, stbi_uc *p, int pixelCount)
5730 {
5731    int count, nleft, len;
5732 
5733    count = 0;
5734    while ((nleft = pixelCount - count) > 0) {
5735       len = stbi__get8(s);
5736       if (len == 128) {
5737          // No-op.
5738       } else if (len < 128) {
5739          // Copy next len+1 bytes literally.
5740          len++;
5741          if (len > nleft) return 0; // corrupt data
5742          count += len;
5743          while (len) {
5744             *p = stbi__get8(s);
5745             p += 4;
5746             len--;
5747          }
5748       } else if (len > 128) {
5749          stbi_uc   val;
5750          // Next -len+1 bytes in the dest are replicated from next source byte.
5751          // (Interpret len as a negative 8-bit int.)
5752          len = 257 - len;
5753          if (len > nleft) return 0; // corrupt data
5754          val = stbi__get8(s);
5755          count += len;
5756          while (len) {
5757             *p = val;
5758             p += 4;
5759             len--;
5760          }
5761       }
5762    }
5763 
5764    return 1;
5765 }
5766 
stbi__psd_load(stbi__context * s,int * x,int * y,int * comp,int req_comp,stbi__result_info * ri,int bpc)5767 static void *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc)
5768 {
5769    int pixelCount;
5770    int channelCount, compression;
5771    int channel, i;
5772    int bitdepth;
5773    int w,h;
5774    stbi_uc *out;
5775    STBI_NOTUSED(ri);
5776 
5777    // Check identifier
5778    if (stbi__get32be(s) != 0x38425053)   // "8BPS"
5779       return stbi__errpuc("not PSD", "Corrupt PSD image");
5780 
5781    // Check file type version.
5782    if (stbi__get16be(s) != 1)
5783       return stbi__errpuc("wrong version", "Unsupported version of PSD image");
5784 
5785    // Skip 6 reserved bytes.
5786    stbi__skip(s, 6 );
5787 
5788    // Read the number of channels (R, G, B, A, etc).
5789    channelCount = stbi__get16be(s);
5790    if (channelCount < 0 || channelCount > 16)
5791       return stbi__errpuc("wrong channel count", "Unsupported number of channels in PSD image");
5792 
5793    // Read the rows and columns of the image.
5794    h = stbi__get32be(s);
5795    w = stbi__get32be(s);
5796 
5797    // Make sure the depth is 8 bits.
5798    bitdepth = stbi__get16be(s);
5799    if (bitdepth != 8 && bitdepth != 16)
5800       return stbi__errpuc("unsupported bit depth", "PSD bit depth is not 8 or 16 bit");
5801 
5802    // Make sure the color mode is RGB.
5803    // Valid options are:
5804    //   0: Bitmap
5805    //   1: Grayscale
5806    //   2: Indexed color
5807    //   3: RGB color
5808    //   4: CMYK color
5809    //   7: Multichannel
5810    //   8: Duotone
5811    //   9: Lab color
5812    if (stbi__get16be(s) != 3)
5813       return stbi__errpuc("wrong color format", "PSD is not in RGB color format");
5814 
5815    // Skip the Mode Data.  (It's the palette for indexed color; other info for other modes.)
5816    stbi__skip(s,stbi__get32be(s) );
5817 
5818    // Skip the image resources.  (resolution, pen tool paths, etc)
5819    stbi__skip(s, stbi__get32be(s) );
5820 
5821    // Skip the reserved data.
5822    stbi__skip(s, stbi__get32be(s) );
5823 
5824    // Find out if the data is compressed.
5825    // Known values:
5826    //   0: no compression
5827    //   1: RLE compressed
5828    compression = stbi__get16be(s);
5829    if (compression > 1)
5830       return stbi__errpuc("bad compression", "PSD has an unknown compression format");
5831 
5832    // Check size
5833    if (!stbi__mad3sizes_valid(4, w, h, 0))
5834       return stbi__errpuc("too large", "Corrupt PSD");
5835 
5836    // Create the destination image.
5837 
5838    if (!compression && bitdepth == 16 && bpc == 16) {
5839       out = (stbi_uc *) stbi__malloc_mad3(8, w, h, 0);
5840       ri->bits_per_channel = 16;
5841    } else
5842       out = (stbi_uc *) stbi__malloc(4 * w*h);
5843 
5844    if (!out) return stbi__errpuc("outofmem", "Out of memory");
5845    pixelCount = w*h;
5846 
5847    // Initialize the data to zero.
5848    //memset( out, 0, pixelCount * 4 );
5849 
5850    // Finally, the image data.
5851    if (compression) {
5852       // RLE as used by .PSD and .TIFF
5853       // Loop until you get the number of unpacked bytes you are expecting:
5854       //     Read the next source byte into n.
5855       //     If n is between 0 and 127 inclusive, copy the next n+1 bytes literally.
5856       //     Else if n is between -127 and -1 inclusive, copy the next byte -n+1 times.
5857       //     Else if n is 128, noop.
5858       // Endloop
5859 
5860       // The RLE-compressed data is preceded by a 2-byte data count for each row in the data,
5861       // which we're going to just skip.
5862       stbi__skip(s, h * channelCount * 2 );
5863 
5864       // Read the RLE data by channel.
5865       for (channel = 0; channel < 4; channel++) {
5866          stbi_uc *p;
5867 
5868          p = out+channel;
5869          if (channel >= channelCount) {
5870             // Fill this channel with default data.
5871             for (i = 0; i < pixelCount; i++, p += 4)
5872                *p = (channel == 3 ? 255 : 0);
5873          } else {
5874             // Read the RLE data.
5875             if (!stbi__psd_decode_rle(s, p, pixelCount)) {
5876                STBI_FREE(out);
5877                return stbi__errpuc("corrupt", "bad RLE data");
5878             }
5879          }
5880       }
5881 
5882    } else {
5883       // We're at the raw image data.  It's each channel in order (Red, Green, Blue, Alpha, ...)
5884       // where each channel consists of an 8-bit (or 16-bit) value for each pixel in the image.
5885 
5886       // Read the data by channel.
5887       for (channel = 0; channel < 4; channel++) {
5888          if (channel >= channelCount) {
5889             // Fill this channel with default data.
5890             if (bitdepth == 16 && bpc == 16) {
5891                stbi__uint16 *q = ((stbi__uint16 *) out) + channel;
5892                stbi__uint16 val = channel == 3 ? 65535 : 0;
5893                for (i = 0; i < pixelCount; i++, q += 4)
5894                   *q = val;
5895             } else {
5896                stbi_uc *p = out+channel;
5897                stbi_uc val = channel == 3 ? 255 : 0;
5898                for (i = 0; i < pixelCount; i++, p += 4)
5899                   *p = val;
5900             }
5901          } else {
5902             if (ri->bits_per_channel == 16) {    // output bpc
5903                stbi__uint16 *q = ((stbi__uint16 *) out) + channel;
5904                for (i = 0; i < pixelCount; i++, q += 4)
5905                   *q = (stbi__uint16) stbi__get16be(s);
5906             } else {
5907                stbi_uc *p = out+channel;
5908                if (bitdepth == 16) {  // input bpc
5909                   for (i = 0; i < pixelCount; i++, p += 4)
5910                      *p = (stbi_uc) (stbi__get16be(s) >> 8);
5911                } else {
5912                   for (i = 0; i < pixelCount; i++, p += 4)
5913                      *p = stbi__get8(s);
5914                }
5915             }
5916          }
5917       }
5918    }
5919 
5920    // remove weird white matte from PSD
5921    if (channelCount >= 4) {
5922       if (ri->bits_per_channel == 16) {
5923          for (i=0; i < w*h; ++i) {
5924             stbi__uint16 *pixel = (stbi__uint16 *) out + 4*i;
5925             if (pixel[3] != 0 && pixel[3] != 65535) {
5926                float a = pixel[3] / 65535.0f;
5927                float ra = 1.0f / a;
5928                float inv_a = 65535.0f * (1 - ra);
5929                pixel[0] = (stbi__uint16) (pixel[0]*ra + inv_a);
5930                pixel[1] = (stbi__uint16) (pixel[1]*ra + inv_a);
5931                pixel[2] = (stbi__uint16) (pixel[2]*ra + inv_a);
5932             }
5933          }
5934       } else {
5935          for (i=0; i < w*h; ++i) {
5936             unsigned char *pixel = out + 4*i;
5937             if (pixel[3] != 0 && pixel[3] != 255) {
5938                float a = pixel[3] / 255.0f;
5939                float ra = 1.0f / a;
5940                float inv_a = 255.0f * (1 - ra);
5941                pixel[0] = (unsigned char) (pixel[0]*ra + inv_a);
5942                pixel[1] = (unsigned char) (pixel[1]*ra + inv_a);
5943                pixel[2] = (unsigned char) (pixel[2]*ra + inv_a);
5944             }
5945          }
5946       }
5947    }
5948 
5949    // convert to desired output format
5950    if (req_comp && req_comp != 4) {
5951       if (ri->bits_per_channel == 16)
5952          out = (stbi_uc *) stbi__convert_format16((stbi__uint16 *) out, 4, req_comp, w, h);
5953       else
5954          out = stbi__convert_format(out, 4, req_comp, w, h);
5955       if (out == NULL) return out; // stbi__convert_format frees input on failure
5956    }
5957 
5958    if (comp) *comp = 4;
5959    *y = h;
5960    *x = w;
5961 
5962    return out;
5963 }
5964 #endif
5965 
5966 // *************************************************************************************************
5967 // Softimage PIC loader
5968 // by Tom Seddon
5969 //
5970 // See http://softimage.wiki.softimage.com/index.php/INFO:_PIC_file_format
5971 // See http://ozviz.wasp.uwa.edu.au/~pbourke/dataformats/softimagepic/
5972 
5973 #ifndef STBI_NO_PIC
stbi__pic_is4(stbi__context * s,const char * str)5974 static int stbi__pic_is4(stbi__context *s,const char *str)
5975 {
5976    int i;
5977    for (i=0; i<4; ++i)
5978       if (stbi__get8(s) != (stbi_uc)str[i])
5979          return 0;
5980 
5981    return 1;
5982 }
5983 
stbi__pic_test_core(stbi__context * s)5984 static int stbi__pic_test_core(stbi__context *s)
5985 {
5986    int i;
5987 
5988    if (!stbi__pic_is4(s,"\x53\x80\xF6\x34"))
5989       return 0;
5990 
5991    for(i=0;i<84;++i)
5992       stbi__get8(s);
5993 
5994    if (!stbi__pic_is4(s,"PICT"))
5995       return 0;
5996 
5997    return 1;
5998 }
5999 
6000 typedef struct
6001 {
6002    stbi_uc size,type,channel;
6003 } stbi__pic_packet;
6004 
stbi__readval(stbi__context * s,int channel,stbi_uc * dest)6005 static stbi_uc *stbi__readval(stbi__context *s, int channel, stbi_uc *dest)
6006 {
6007    int mask=0x80, i;
6008 
6009    for (i=0; i<4; ++i, mask>>=1) {
6010       if (channel & mask) {
6011          if (stbi__at_eof(s)) return stbi__errpuc("bad file","PIC file too short");
6012          dest[i]=stbi__get8(s);
6013       }
6014    }
6015 
6016    return dest;
6017 }
6018 
stbi__copyval(int channel,stbi_uc * dest,const stbi_uc * src)6019 static void stbi__copyval(int channel,stbi_uc *dest,const stbi_uc *src)
6020 {
6021    int mask=0x80,i;
6022 
6023    for (i=0;i<4; ++i, mask>>=1)
6024       if (channel&mask)
6025          dest[i]=src[i];
6026 }
6027 
stbi__pic_load_core(stbi__context * s,int width,int height,int * comp,stbi_uc * result)6028 static stbi_uc *stbi__pic_load_core(stbi__context *s,int width,int height,int *comp, stbi_uc *result)
6029 {
6030    int act_comp=0,num_packets=0,y,chained;
6031    stbi__pic_packet packets[10];
6032 
6033    // this will (should...) cater for even some bizarre stuff like having data
6034     // for the same channel in multiple packets.
6035    do {
6036       stbi__pic_packet *packet;
6037 
6038       if (num_packets==sizeof(packets)/sizeof(packets[0]))
6039          return stbi__errpuc("bad format","too many packets");
6040 
6041       packet = &packets[num_packets++];
6042 
6043       chained = stbi__get8(s);
6044       packet->size    = stbi__get8(s);
6045       packet->type    = stbi__get8(s);
6046       packet->channel = stbi__get8(s);
6047 
6048       act_comp |= packet->channel;
6049 
6050       if (stbi__at_eof(s))          return stbi__errpuc("bad file","file too short (reading packets)");
6051       if (packet->size != 8)  return stbi__errpuc("bad format","packet isn't 8bpp");
6052    } while (chained);
6053 
6054    *comp = (act_comp & 0x10 ? 4 : 3); // has alpha channel?
6055 
6056    for(y=0; y<height; ++y) {
6057       int packet_idx;
6058 
6059       for(packet_idx=0; packet_idx < num_packets; ++packet_idx) {
6060          stbi__pic_packet *packet = &packets[packet_idx];
6061          stbi_uc *dest = result+y*width*4;
6062 
6063          switch (packet->type) {
6064             default:
6065                return stbi__errpuc("bad format","packet has bad compression type");
6066 
6067             case 0: {//uncompressed
6068                int x;
6069 
6070                for(x=0;x<width;++x, dest+=4)
6071                   if (!stbi__readval(s,packet->channel,dest))
6072                      return 0;
6073                break;
6074             }
6075 
6076             case 1://Pure RLE
6077                {
6078                   int left=width, i;
6079 
6080                   while (left>0) {
6081                      stbi_uc count,value[4];
6082 
6083                      count=stbi__get8(s);
6084                      if (stbi__at_eof(s))   return stbi__errpuc("bad file","file too short (pure read count)");
6085 
6086                      if (count > left)
6087                         count = (stbi_uc) left;
6088 
6089                      if (!stbi__readval(s,packet->channel,value))  return 0;
6090 
6091                      for(i=0; i<count; ++i,dest+=4)
6092                         stbi__copyval(packet->channel,dest,value);
6093                      left -= count;
6094                   }
6095                }
6096                break;
6097 
6098             case 2: {//Mixed RLE
6099                int left=width;
6100                while (left>0) {
6101                   int count = stbi__get8(s), i;
6102                   if (stbi__at_eof(s))  return stbi__errpuc("bad file","file too short (mixed read count)");
6103 
6104                   if (count >= 128) { // Repeated
6105                      stbi_uc value[4];
6106 
6107                      if (count==128)
6108                         count = stbi__get16be(s);
6109                      else
6110                         count -= 127;
6111                      if (count > left)
6112                         return stbi__errpuc("bad file","scanline overrun");
6113 
6114                      if (!stbi__readval(s,packet->channel,value))
6115                         return 0;
6116 
6117                      for(i=0;i<count;++i, dest += 4)
6118                         stbi__copyval(packet->channel,dest,value);
6119                   } else { // Raw
6120                      ++count;
6121                      if (count>left) return stbi__errpuc("bad file","scanline overrun");
6122 
6123                      for(i=0;i<count;++i, dest+=4)
6124                         if (!stbi__readval(s,packet->channel,dest))
6125                            return 0;
6126                   }
6127                   left-=count;
6128                }
6129                break;
6130             }
6131          }
6132       }
6133    }
6134 
6135    return result;
6136 }
6137 
stbi__pic_load(stbi__context * s,int * px,int * py,int * comp,int req_comp,stbi__result_info * ri)6138 static void *stbi__pic_load(stbi__context *s,int *px,int *py,int *comp,int req_comp, stbi__result_info *ri)
6139 {
6140    stbi_uc *result;
6141    int i, x,y, internal_comp;
6142    STBI_NOTUSED(ri);
6143 
6144    if (!comp) comp = &internal_comp;
6145 
6146    for (i=0; i<92; ++i)
6147       stbi__get8(s);
6148 
6149    x = stbi__get16be(s);
6150    y = stbi__get16be(s);
6151    if (stbi__at_eof(s))  return stbi__errpuc("bad file","file too short (pic header)");
6152    if (!stbi__mad3sizes_valid(x, y, 4, 0)) return stbi__errpuc("too large", "PIC image too large to decode");
6153 
6154    stbi__get32be(s); //skip `ratio'
6155    stbi__get16be(s); //skip `fields'
6156    stbi__get16be(s); //skip `pad'
6157 
6158    // intermediate buffer is RGBA
6159    result = (stbi_uc *) stbi__malloc_mad3(x, y, 4, 0);
6160    memset(result, 0xff, x*y*4);
6161 
6162    if (!stbi__pic_load_core(s,x,y,comp, result)) {
6163       STBI_FREE(result);
6164       result=0;
6165    }
6166    *px = x;
6167    *py = y;
6168    if (req_comp == 0) req_comp = *comp;
6169    result=stbi__convert_format(result,4,req_comp,x,y);
6170 
6171    return result;
6172 }
6173 
stbi__pic_test(stbi__context * s)6174 static int stbi__pic_test(stbi__context *s)
6175 {
6176    int r = stbi__pic_test_core(s);
6177    stbi__rewind(s);
6178    return r;
6179 }
6180 #endif
6181 
6182 // *************************************************************************************************
6183 // GIF loader -- public domain by Jean-Marc Lienher -- simplified/shrunk by stb
6184 
6185 #ifndef STBI_NO_GIF
6186 typedef struct
6187 {
6188    stbi__int16 prefix;
6189    stbi_uc first;
6190    stbi_uc suffix;
6191 } stbi__gif_lzw;
6192 
6193 typedef struct
6194 {
6195    int w,h;
6196    stbi_uc *out;                 // output buffer (always 4 components)
6197    stbi_uc *background;          // The current "background" as far as a gif is concerned
6198    stbi_uc *history;
6199    int flags, bgindex, ratio, transparent, eflags;
6200    stbi_uc  pal[256][4];
6201    stbi_uc lpal[256][4];
6202    stbi__gif_lzw codes[8192];
6203    stbi_uc *color_table;
6204    int parse, step;
6205    int lflags;
6206    int start_x, start_y;
6207    int max_x, max_y;
6208    int cur_x, cur_y;
6209    int line_size;
6210    int delay;
6211 } stbi__gif;
6212 
stbi__gif_test_raw(stbi__context * s)6213 static int stbi__gif_test_raw(stbi__context *s)
6214 {
6215    int sz;
6216    if (stbi__get8(s) != 'G' || stbi__get8(s) != 'I' || stbi__get8(s) != 'F' || stbi__get8(s) != '8') return 0;
6217    sz = stbi__get8(s);
6218    if (sz != '9' && sz != '7') return 0;
6219    if (stbi__get8(s) != 'a') return 0;
6220    return 1;
6221 }
6222 
stbi__gif_test(stbi__context * s)6223 static int stbi__gif_test(stbi__context *s)
6224 {
6225    int r = stbi__gif_test_raw(s);
6226    stbi__rewind(s);
6227    return r;
6228 }
6229 
stbi__gif_parse_colortable(stbi__context * s,stbi_uc pal[256][4],int num_entries,int transp)6230 static void stbi__gif_parse_colortable(stbi__context *s, stbi_uc pal[256][4], int num_entries, int transp)
6231 {
6232    int i;
6233    for (i=0; i < num_entries; ++i) {
6234       pal[i][2] = stbi__get8(s);
6235       pal[i][1] = stbi__get8(s);
6236       pal[i][0] = stbi__get8(s);
6237       pal[i][3] = transp == i ? 0 : 255;
6238    }
6239 }
6240 
stbi__gif_header(stbi__context * s,stbi__gif * g,int * comp,int is_info)6241 static int stbi__gif_header(stbi__context *s, stbi__gif *g, int *comp, int is_info)
6242 {
6243    stbi_uc version;
6244    if (stbi__get8(s) != 'G' || stbi__get8(s) != 'I' || stbi__get8(s) != 'F' || stbi__get8(s) != '8')
6245       return stbi__err("not GIF", "Corrupt GIF");
6246 
6247    version = stbi__get8(s);
6248    if (version != '7' && version != '9')    return stbi__err("not GIF", "Corrupt GIF");
6249    if (stbi__get8(s) != 'a')                return stbi__err("not GIF", "Corrupt GIF");
6250 
6251    stbi__g_failure_reason = "";
6252    g->w = stbi__get16le(s);
6253    g->h = stbi__get16le(s);
6254    g->flags = stbi__get8(s);
6255    g->bgindex = stbi__get8(s);
6256    g->ratio = stbi__get8(s);
6257    g->transparent = -1;
6258 
6259    if (comp != 0) *comp = 4;  // can't actually tell whether it's 3 or 4 until we parse the comments
6260 
6261    if (is_info) return 1;
6262 
6263    if (g->flags & 0x80)
6264       stbi__gif_parse_colortable(s,g->pal, 2 << (g->flags & 7), -1);
6265 
6266    return 1;
6267 }
6268 
stbi__gif_info_raw(stbi__context * s,int * x,int * y,int * comp)6269 static int stbi__gif_info_raw(stbi__context *s, int *x, int *y, int *comp)
6270 {
6271    stbi__gif* g = (stbi__gif*) stbi__malloc(sizeof(stbi__gif));
6272    if (!stbi__gif_header(s, g, comp, 1)) {
6273       STBI_FREE(g);
6274       stbi__rewind( s );
6275       return 0;
6276    }
6277    if (x) *x = g->w;
6278    if (y) *y = g->h;
6279    STBI_FREE(g);
6280    return 1;
6281 }
6282 
stbi__out_gif_code(stbi__gif * g,stbi__uint16 code)6283 static void stbi__out_gif_code(stbi__gif *g, stbi__uint16 code)
6284 {
6285    stbi_uc *p, *c;
6286    int idx;
6287 
6288    // recurse to decode the prefixes, since the linked-list is backwards,
6289    // and working backwards through an interleaved image would be nasty
6290    if (g->codes[code].prefix >= 0)
6291       stbi__out_gif_code(g, g->codes[code].prefix);
6292 
6293    if (g->cur_y >= g->max_y) return;
6294 
6295    idx = g->cur_x + g->cur_y;
6296    p = &g->out[idx];
6297    g->history[idx / 4] = 1;
6298 
6299    c = &g->color_table[g->codes[code].suffix * 4];
6300    if (c[3] > 128) { // don't render transparent pixels;
6301       p[0] = c[2];
6302       p[1] = c[1];
6303       p[2] = c[0];
6304       p[3] = c[3];
6305    }
6306    g->cur_x += 4;
6307 
6308    if (g->cur_x >= g->max_x) {
6309       g->cur_x = g->start_x;
6310       g->cur_y += g->step;
6311 
6312       while (g->cur_y >= g->max_y && g->parse > 0) {
6313          g->step = (1 << g->parse) * g->line_size;
6314          g->cur_y = g->start_y + (g->step >> 1);
6315          --g->parse;
6316       }
6317    }
6318 }
6319 
stbi__process_gif_raster(stbi__context * s,stbi__gif * g)6320 static stbi_uc *stbi__process_gif_raster(stbi__context *s, stbi__gif *g)
6321 {
6322    stbi_uc lzw_cs;
6323    stbi__int32 len, init_code;
6324    stbi__uint32 first;
6325    stbi__int32 codesize, codemask, avail, oldcode, bits, valid_bits, clear;
6326    stbi__gif_lzw *p;
6327 
6328    lzw_cs = stbi__get8(s);
6329    if (lzw_cs > 12) return NULL;
6330    clear = 1 << lzw_cs;
6331    first = 1;
6332    codesize = lzw_cs + 1;
6333    codemask = (1 << codesize) - 1;
6334    bits = 0;
6335    valid_bits = 0;
6336    for (init_code = 0; init_code < clear; init_code++) {
6337       g->codes[init_code].prefix = -1;
6338       g->codes[init_code].first = (stbi_uc) init_code;
6339       g->codes[init_code].suffix = (stbi_uc) init_code;
6340    }
6341 
6342    // support no starting clear code
6343    avail = clear+2;
6344    oldcode = -1;
6345 
6346    len = 0;
6347    for(;;) {
6348       if (valid_bits < codesize) {
6349          if (len == 0) {
6350             len = stbi__get8(s); // start new block
6351             if (len == 0)
6352                return g->out;
6353          }
6354          --len;
6355          bits |= (stbi__int32) stbi__get8(s) << valid_bits;
6356          valid_bits += 8;
6357       } else {
6358          stbi__int32 code = bits & codemask;
6359          bits >>= codesize;
6360          valid_bits -= codesize;
6361          // @OPTIMIZE: is there some way we can accelerate the non-clear path?
6362          if (code == clear) {  // clear code
6363             codesize = lzw_cs + 1;
6364             codemask = (1 << codesize) - 1;
6365             avail = clear + 2;
6366             oldcode = -1;
6367             first = 0;
6368          } else if (code == clear + 1) { // end of stream code
6369             stbi__skip(s, len);
6370             while ((len = stbi__get8(s)) > 0)
6371                stbi__skip(s,len);
6372             return g->out;
6373          } else if (code <= avail) {
6374             if (first) {
6375                return stbi__errpuc("no clear code", "Corrupt GIF");
6376             }
6377 
6378             if (oldcode >= 0) {
6379                p = &g->codes[avail++];
6380                if (avail > 8192) {
6381                   return stbi__errpuc("too many codes", "Corrupt GIF");
6382                }
6383 
6384                p->prefix = (stbi__int16) oldcode;
6385                p->first = g->codes[oldcode].first;
6386                p->suffix = (code == avail) ? p->first : g->codes[code].first;
6387             } else if (code == avail)
6388                return stbi__errpuc("illegal code in raster", "Corrupt GIF");
6389 
6390             stbi__out_gif_code(g, (stbi__uint16) code);
6391 
6392             if ((avail & codemask) == 0 && avail <= 0x0FFF) {
6393                codesize++;
6394                codemask = (1 << codesize) - 1;
6395             }
6396 
6397             oldcode = code;
6398          } else {
6399             return stbi__errpuc("illegal code in raster", "Corrupt GIF");
6400          }
6401       }
6402    }
6403 }
6404 
6405 // this function is designed to support animated gifs, although stb_image doesn't support it
6406 // two back is the image from two frames ago, used for a very specific disposal format
stbi__gif_load_next(stbi__context * s,stbi__gif * g,int * comp,int req_comp,stbi_uc * two_back)6407 static stbi_uc *stbi__gif_load_next(stbi__context *s, stbi__gif *g, int *comp, int req_comp, stbi_uc *two_back)
6408 {
6409    int dispose;
6410    int first_frame;
6411    int pi;
6412    int pcount;
6413    STBI_NOTUSED(req_comp);
6414 
6415    // on first frame, any non-written pixels get the background colour (non-transparent)
6416    first_frame = 0;
6417    if (g->out == 0) {
6418       if (!stbi__gif_header(s, g, comp,0)) return 0; // stbi__g_failure_reason set by stbi__gif_header
6419       if (!stbi__mad3sizes_valid(4, g->w, g->h, 0))
6420          return stbi__errpuc("too large", "GIF image is too large");
6421       pcount = g->w * g->h;
6422       g->out = (stbi_uc *) stbi__malloc(4 * pcount);
6423       g->background = (stbi_uc *) stbi__malloc(4 * pcount);
6424       g->history = (stbi_uc *) stbi__malloc(pcount);
6425       if (!g->out || !g->background || !g->history)
6426          return stbi__errpuc("outofmem", "Out of memory");
6427 
6428       // image is treated as "transparent" at the start - ie, nothing overwrites the current background;
6429       // background colour is only used for pixels that are not rendered first frame, after that "background"
6430       // color refers to the color that was there the previous frame.
6431       memset(g->out, 0x00, 4 * pcount);
6432       memset(g->background, 0x00, 4 * pcount); // state of the background (starts transparent)
6433       memset(g->history, 0x00, pcount);        // pixels that were affected previous frame
6434       first_frame = 1;
6435    } else {
6436       // second frame - how do we dispoase of the previous one?
6437       dispose = (g->eflags & 0x1C) >> 2;
6438       pcount = g->w * g->h;
6439 
6440       if ((dispose == 3) && (two_back == 0)) {
6441          dispose = 2; // if I don't have an image to revert back to, default to the old background
6442       }
6443 
6444       if (dispose == 3) { // use previous graphic
6445          for (pi = 0; pi < pcount; ++pi) {
6446             if (g->history[pi]) {
6447                memcpy( &g->out[pi * 4], &two_back[pi * 4], 4 );
6448             }
6449          }
6450       } else if (dispose == 2) {
6451          // restore what was changed last frame to background before that frame;
6452          for (pi = 0; pi < pcount; ++pi) {
6453             if (g->history[pi]) {
6454                memcpy( &g->out[pi * 4], &g->background[pi * 4], 4 );
6455             }
6456          }
6457       } else {
6458          // This is a non-disposal case eithe way, so just
6459          // leave the pixels as is, and they will become the new background
6460          // 1: do not dispose
6461          // 0:  not specified.
6462       }
6463 
6464       // background is what out is after the undoing of the previou frame;
6465       memcpy( g->background, g->out, 4 * g->w * g->h );
6466    }
6467 
6468    // clear my history;
6469    memset( g->history, 0x00, g->w * g->h );        // pixels that were affected previous frame
6470 
6471    for (;;) {
6472       int tag = stbi__get8(s);
6473       switch (tag) {
6474          case 0x2C: /* Image Descriptor */
6475          {
6476             stbi__int32 x, y, w, h;
6477             stbi_uc *o;
6478 
6479             x = stbi__get16le(s);
6480             y = stbi__get16le(s);
6481             w = stbi__get16le(s);
6482             h = stbi__get16le(s);
6483             if (((x + w) > (g->w)) || ((y + h) > (g->h)))
6484                return stbi__errpuc("bad Image Descriptor", "Corrupt GIF");
6485 
6486             g->line_size = g->w * 4;
6487             g->start_x = x * 4;
6488             g->start_y = y * g->line_size;
6489             g->max_x   = g->start_x + w * 4;
6490             g->max_y   = g->start_y + h * g->line_size;
6491             g->cur_x   = g->start_x;
6492             g->cur_y   = g->start_y;
6493 
6494             // if the width of the specified rectangle is 0, that means
6495             // we may not see *any* pixels or the image is malformed;
6496             // to make sure this is caught, move the current y down to
6497             // max_y (which is what out_gif_code checks).
6498             if (w == 0)
6499                g->cur_y = g->max_y;
6500 
6501             g->lflags = stbi__get8(s);
6502 
6503             if (g->lflags & 0x40) {
6504                g->step = 8 * g->line_size; // first interlaced spacing
6505                g->parse = 3;
6506             } else {
6507                g->step = g->line_size;
6508                g->parse = 0;
6509             }
6510 
6511             if (g->lflags & 0x80) {
6512                stbi__gif_parse_colortable(s,g->lpal, 2 << (g->lflags & 7), g->eflags & 0x01 ? g->transparent : -1);
6513                g->color_table = (stbi_uc *) g->lpal;
6514             } else if (g->flags & 0x80) {
6515                g->color_table = (stbi_uc *) g->pal;
6516             } else
6517                return stbi__errpuc("missing color table", "Corrupt GIF");
6518 
6519             o = stbi__process_gif_raster(s, g);
6520             if (!o) return NULL;
6521 
6522             // if this was the first frame,
6523             pcount = g->w * g->h;
6524             if (first_frame && (g->bgindex > 0)) {
6525                // if first frame, any pixel not drawn to gets the background color
6526                for (pi = 0; pi < pcount; ++pi) {
6527                   if (g->history[pi] == 0) {
6528                      g->pal[g->bgindex][3] = 255; // just in case it was made transparent, undo that; It will be reset next frame if need be;
6529                      memcpy( &g->out[pi * 4], &g->pal[g->bgindex], 4 );
6530                   }
6531                }
6532             }
6533 
6534             return o;
6535          }
6536 
6537          case 0x21: // Comment Extension.
6538          {
6539             int len;
6540             int ext = stbi__get8(s);
6541             if (ext == 0xF9) { // Graphic Control Extension.
6542                len = stbi__get8(s);
6543                if (len == 4) {
6544                   g->eflags = stbi__get8(s);
6545                   g->delay = 10 * stbi__get16le(s); // delay - 1/100th of a second, saving as 1/1000ths.
6546 
6547                   // unset old transparent
6548                   if (g->transparent >= 0) {
6549                      g->pal[g->transparent][3] = 255;
6550                   }
6551                   if (g->eflags & 0x01) {
6552                      g->transparent = stbi__get8(s);
6553                      if (g->transparent >= 0) {
6554                         g->pal[g->transparent][3] = 0;
6555                      }
6556                   } else {
6557                      // don't need transparent
6558                      stbi__skip(s, 1);
6559                      g->transparent = -1;
6560                   }
6561                } else {
6562                   stbi__skip(s, len);
6563                   break;
6564                }
6565             }
6566             while ((len = stbi__get8(s)) != 0) {
6567                stbi__skip(s, len);
6568             }
6569             break;
6570          }
6571 
6572          case 0x3B: // gif stream termination code
6573             return (stbi_uc *) s; // using '1' causes warning on some compilers
6574 
6575          default:
6576             return stbi__errpuc("unknown code", "Corrupt GIF");
6577       }
6578    }
6579 }
6580 
stbi__load_gif_main(stbi__context * s,int ** delays,int * x,int * y,int * z,int * comp,int req_comp)6581 static void *stbi__load_gif_main(stbi__context *s, int **delays, int *x, int *y, int *z, int *comp, int req_comp)
6582 {
6583    if (stbi__gif_test(s)) {
6584       int layers = 0;
6585       stbi_uc *u = 0;
6586       stbi_uc *out = 0;
6587       stbi_uc *two_back = 0;
6588       stbi__gif g;
6589       int stride;
6590       memset(&g, 0, sizeof(g));
6591       if (delays) {
6592          *delays = 0;
6593       }
6594 
6595       do {
6596          u = stbi__gif_load_next(s, &g, comp, req_comp, two_back);
6597          if (u == (stbi_uc *) s) u = 0;  // end of animated gif marker
6598 
6599          if (u) {
6600             *x = g.w;
6601             *y = g.h;
6602             ++layers;
6603             stride = g.w * g.h * 4;
6604 
6605             if (out) {
6606                out = (stbi_uc*) STBI_REALLOC( out, layers * stride );
6607                if (delays) {
6608                   *delays = (int*) STBI_REALLOC( *delays, sizeof(int) * layers );
6609                }
6610             } else {
6611                out = (stbi_uc*)stbi__malloc( layers * stride );
6612                if (delays) {
6613                   *delays = (int*) stbi__malloc( layers * sizeof(int) );
6614                }
6615             }
6616             memcpy( out + ((layers - 1) * stride), u, stride );
6617             if (layers >= 2) {
6618                two_back = out - 2 * stride;
6619             }
6620 
6621             if (delays) {
6622                (*delays)[layers - 1U] = g.delay;
6623             }
6624          }
6625       } while (u != 0);
6626 
6627       // free temp buffer;
6628       STBI_FREE(g.out);
6629       STBI_FREE(g.history);
6630       STBI_FREE(g.background);
6631 
6632       // do the final conversion after loading everything;
6633       if (req_comp && req_comp != 4)
6634          out = stbi__convert_format(out, 4, req_comp, layers * g.w, g.h);
6635 
6636       *z = layers;
6637       return out;
6638    } else {
6639       return stbi__errpuc("not GIF", "Image was not as a gif type.");
6640    }
6641 }
6642 
stbi__gif_load(stbi__context * s,int * x,int * y,int * comp,int req_comp,stbi__result_info * ri)6643 static void *stbi__gif_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
6644 {
6645    stbi_uc *u = 0;
6646    stbi__gif g;
6647    memset(&g, 0, sizeof(g));
6648    STBI_NOTUSED(ri);
6649 
6650    u = stbi__gif_load_next(s, &g, comp, req_comp, 0);
6651    if (u == (stbi_uc *) s) u = 0;  // end of animated gif marker
6652    if (u) {
6653       *x = g.w;
6654       *y = g.h;
6655 
6656       // moved conversion to after successful load so that the same
6657       // can be done for multiple frames.
6658       if (req_comp && req_comp != 4)
6659          u = stbi__convert_format(u, 4, req_comp, g.w, g.h);
6660    } else if (g.out) {
6661       // if there was an error and we allocated an image buffer, free it!
6662       STBI_FREE(g.out);
6663    }
6664 
6665    // free buffers needed for multiple frame loading;
6666    STBI_FREE(g.history);
6667    STBI_FREE(g.background);
6668 
6669    return u;
6670 }
6671 
stbi__gif_info(stbi__context * s,int * x,int * y,int * comp)6672 static int stbi__gif_info(stbi__context *s, int *x, int *y, int *comp)
6673 {
6674    return stbi__gif_info_raw(s,x,y,comp);
6675 }
6676 #endif
6677 
6678 // *************************************************************************************************
6679 // Radiance RGBE HDR loader
6680 // originally by Nicolas Schulz
6681 #ifndef STBI_NO_HDR
stbi__hdr_test_core(stbi__context * s,const char * signature)6682 static int stbi__hdr_test_core(stbi__context *s, const char *signature)
6683 {
6684    int i;
6685    for (i=0; signature[i]; ++i)
6686       if (stbi__get8(s) != signature[i])
6687           return 0;
6688    stbi__rewind(s);
6689    return 1;
6690 }
6691 
stbi__hdr_test(stbi__context * s)6692 static int stbi__hdr_test(stbi__context* s)
6693 {
6694    int r = stbi__hdr_test_core(s, "#?RADIANCE\n");
6695    stbi__rewind(s);
6696    if(!r) {
6697        r = stbi__hdr_test_core(s, "#?RGBE\n");
6698        stbi__rewind(s);
6699    }
6700    return r;
6701 }
6702 
6703 #define STBI__HDR_BUFLEN  1024
stbi__hdr_gettoken(stbi__context * z,char * buffer)6704 static char *stbi__hdr_gettoken(stbi__context *z, char *buffer)
6705 {
6706    int len=0;
6707    char c = '\0';
6708 
6709    c = (char) stbi__get8(z);
6710 
6711    while (!stbi__at_eof(z) && c != '\n') {
6712       buffer[len++] = c;
6713       if (len == STBI__HDR_BUFLEN-1) {
6714          // flush to end of line
6715          while (!stbi__at_eof(z) && stbi__get8(z) != '\n')
6716             ;
6717          break;
6718       }
6719       c = (char) stbi__get8(z);
6720    }
6721 
6722    buffer[len] = 0;
6723    return buffer;
6724 }
6725 
stbi__hdr_convert(float * output,stbi_uc * input,int req_comp)6726 static void stbi__hdr_convert(float *output, stbi_uc *input, int req_comp)
6727 {
6728    if ( input[3] != 0 ) {
6729       float f1;
6730       // Exponent
6731       f1 = (float) ldexp(1.0f, input[3] - (int)(128 + 8));
6732       if (req_comp <= 2)
6733          output[0] = (input[0] + input[1] + input[2]) * f1 / 3;
6734       else {
6735          output[0] = input[0] * f1;
6736          output[1] = input[1] * f1;
6737          output[2] = input[2] * f1;
6738       }
6739       if (req_comp == 2) output[1] = 1;
6740       if (req_comp == 4) output[3] = 1;
6741    } else {
6742       switch (req_comp) {
6743          case 4: output[3] = 1; /* fallthrough */
6744          case 3: output[0] = output[1] = output[2] = 0;
6745                  break;
6746          case 2: output[1] = 1; /* fallthrough */
6747          case 1: output[0] = 0;
6748                  break;
6749       }
6750    }
6751 }
6752 
stbi__hdr_load(stbi__context * s,int * x,int * y,int * comp,int req_comp,stbi__result_info * ri)6753 static float *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
6754 {
6755    char buffer[STBI__HDR_BUFLEN];
6756    char *token;
6757    int valid = 0;
6758    int width, height;
6759    stbi_uc *scanline;
6760    float *hdr_data;
6761    int len;
6762    unsigned char count, value;
6763    int i, j, k, c1,c2, z;
6764    const char *headerToken;
6765    STBI_NOTUSED(ri);
6766 
6767    // Check identifier
6768    headerToken = stbi__hdr_gettoken(s,buffer);
6769    if (strcmp(headerToken, "#?RADIANCE") != 0 && strcmp(headerToken, "#?RGBE") != 0)
6770       return stbi__errpf("not HDR", "Corrupt HDR image");
6771 
6772    // Parse header
6773    for(;;) {
6774       token = stbi__hdr_gettoken(s,buffer);
6775       if (token[0] == 0) break;
6776       if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0) valid = 1;
6777    }
6778 
6779    if (!valid)    return stbi__errpf("unsupported format", "Unsupported HDR format");
6780 
6781    // Parse width and height
6782    // can't use sscanf() if we're not using stdio!
6783    token = stbi__hdr_gettoken(s,buffer);
6784    if (strncmp(token, "-Y ", 3))  return stbi__errpf("unsupported data layout", "Unsupported HDR format");
6785    token += 3;
6786    height = (int) strtol(token, &token, 10);
6787    while (*token == ' ') ++token;
6788    if (strncmp(token, "+X ", 3))  return stbi__errpf("unsupported data layout", "Unsupported HDR format");
6789    token += 3;
6790    width = (int) strtol(token, NULL, 10);
6791 
6792    *x = width;
6793    *y = height;
6794 
6795    if (comp) *comp = 3;
6796    if (req_comp == 0) req_comp = 3;
6797 
6798    if (!stbi__mad4sizes_valid(width, height, req_comp, sizeof(float), 0))
6799       return stbi__errpf("too large", "HDR image is too large");
6800 
6801    // Read data
6802    hdr_data = (float *) stbi__malloc_mad4(width, height, req_comp, sizeof(float), 0);
6803    if (!hdr_data)
6804       return stbi__errpf("outofmem", "Out of memory");
6805 
6806    // Load image data
6807    // image data is stored as some number of sca
6808    if ( width < 8 || width >= 32768) {
6809       // Read flat data
6810       for (j=0; j < height; ++j) {
6811          for (i=0; i < width; ++i) {
6812             stbi_uc rgbe[4];
6813            main_decode_loop:
6814             stbi__getn(s, rgbe, 4);
6815             stbi__hdr_convert(hdr_data + j * width * req_comp + i * req_comp, rgbe, req_comp);
6816          }
6817       }
6818    } else {
6819       // Read RLE-encoded data
6820       scanline = NULL;
6821 
6822       for (j = 0; j < height; ++j) {
6823          c1 = stbi__get8(s);
6824          c2 = stbi__get8(s);
6825          len = stbi__get8(s);
6826          if (c1 != 2 || c2 != 2 || (len & 0x80)) {
6827             // not run-length encoded, so we have to actually use THIS data as a decoded
6828             // pixel (note this can't be a valid pixel--one of RGB must be >= 128)
6829             stbi_uc rgbe[4];
6830             rgbe[0] = (stbi_uc) c1;
6831             rgbe[1] = (stbi_uc) c2;
6832             rgbe[2] = (stbi_uc) len;
6833             rgbe[3] = (stbi_uc) stbi__get8(s);
6834             stbi__hdr_convert(hdr_data, rgbe, req_comp);
6835             i = 1;
6836             j = 0;
6837             STBI_FREE(scanline);
6838             goto main_decode_loop; // yes, this makes no sense
6839          }
6840          len <<= 8;
6841          len |= stbi__get8(s);
6842          if (len != width) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("invalid decoded scanline length", "corrupt HDR"); }
6843          if (scanline == NULL) {
6844             scanline = (stbi_uc *) stbi__malloc_mad2(width, 4, 0);
6845             if (!scanline) {
6846                STBI_FREE(hdr_data);
6847                return stbi__errpf("outofmem", "Out of memory");
6848             }
6849          }
6850 
6851          for (k = 0; k < 4; ++k) {
6852             int nleft;
6853             i = 0;
6854             while ((nleft = width - i) > 0) {
6855                count = stbi__get8(s);
6856                if (count > 128) {
6857                   // Run
6858                   value = stbi__get8(s);
6859                   count -= 128;
6860                   if (count > nleft) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("corrupt", "bad RLE data in HDR"); }
6861                   for (z = 0; z < count; ++z)
6862                      scanline[i++ * 4 + k] = value;
6863                } else {
6864                   // Dump
6865                   if (count > nleft) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("corrupt", "bad RLE data in HDR"); }
6866                   for (z = 0; z < count; ++z)
6867                      scanline[i++ * 4 + k] = stbi__get8(s);
6868                }
6869             }
6870          }
6871          for (i=0; i < width; ++i)
6872             stbi__hdr_convert(hdr_data+(j*width + i)*req_comp, scanline + i*4, req_comp);
6873       }
6874       if (scanline)
6875          STBI_FREE(scanline);
6876    }
6877 
6878    return hdr_data;
6879 }
6880 
stbi__hdr_info(stbi__context * s,int * x,int * y,int * comp)6881 static int stbi__hdr_info(stbi__context *s, int *x, int *y, int *comp)
6882 {
6883    char buffer[STBI__HDR_BUFLEN];
6884    char *token;
6885    int valid = 0;
6886    int dummy;
6887 
6888    if (!x) x = &dummy;
6889    if (!y) y = &dummy;
6890    if (!comp) comp = &dummy;
6891 
6892    if (stbi__hdr_test(s) == 0) {
6893        stbi__rewind( s );
6894        return 0;
6895    }
6896 
6897    for(;;) {
6898       token = stbi__hdr_gettoken(s,buffer);
6899       if (token[0] == 0) break;
6900       if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0) valid = 1;
6901    }
6902 
6903    if (!valid) {
6904        stbi__rewind( s );
6905        return 0;
6906    }
6907    token = stbi__hdr_gettoken(s,buffer);
6908    if (strncmp(token, "-Y ", 3)) {
6909        stbi__rewind( s );
6910        return 0;
6911    }
6912    token += 3;
6913    *y = (int) strtol(token, &token, 10);
6914    while (*token == ' ') ++token;
6915    if (strncmp(token, "+X ", 3)) {
6916        stbi__rewind( s );
6917        return 0;
6918    }
6919    token += 3;
6920    *x = (int) strtol(token, NULL, 10);
6921    *comp = 3;
6922    return 1;
6923 }
6924 #endif // STBI_NO_HDR
6925 
6926 #ifndef STBI_NO_BMP
stbi__bmp_info(stbi__context * s,int * x,int * y,int * comp)6927 static int stbi__bmp_info(stbi__context *s, int *x, int *y, int *comp)
6928 {
6929    void *p;
6930    stbi__bmp_data info;
6931 
6932    info.all_a = 255;
6933    p = stbi__bmp_parse_header(s, &info);
6934    stbi__rewind( s );
6935    if (p == NULL)
6936       return 0;
6937    if (x) *x = s->img_x;
6938    if (y) *y = s->img_y;
6939    if (comp) *comp = info.ma ? 4 : 3;
6940    return 1;
6941 }
6942 #endif
6943 
6944 #ifndef STBI_NO_PSD
stbi__psd_info(stbi__context * s,int * x,int * y,int * comp)6945 static int stbi__psd_info(stbi__context *s, int *x, int *y, int *comp)
6946 {
6947    int channelCount, dummy, depth;
6948    if (!x) x = &dummy;
6949    if (!y) y = &dummy;
6950    if (!comp) comp = &dummy;
6951    if (stbi__get32be(s) != 0x38425053) {
6952        stbi__rewind( s );
6953        return 0;
6954    }
6955    if (stbi__get16be(s) != 1) {
6956        stbi__rewind( s );
6957        return 0;
6958    }
6959    stbi__skip(s, 6);
6960    channelCount = stbi__get16be(s);
6961    if (channelCount < 0 || channelCount > 16) {
6962        stbi__rewind( s );
6963        return 0;
6964    }
6965    *y = stbi__get32be(s);
6966    *x = stbi__get32be(s);
6967    depth = stbi__get16be(s);
6968    if (depth != 8 && depth != 16) {
6969        stbi__rewind( s );
6970        return 0;
6971    }
6972    if (stbi__get16be(s) != 3) {
6973        stbi__rewind( s );
6974        return 0;
6975    }
6976    *comp = 4;
6977    return 1;
6978 }
6979 
stbi__psd_is16(stbi__context * s)6980 static int stbi__psd_is16(stbi__context *s)
6981 {
6982    int channelCount, depth;
6983    if (stbi__get32be(s) != 0x38425053) {
6984        stbi__rewind( s );
6985        return 0;
6986    }
6987    if (stbi__get16be(s) != 1) {
6988        stbi__rewind( s );
6989        return 0;
6990    }
6991    stbi__skip(s, 6);
6992    channelCount = stbi__get16be(s);
6993    if (channelCount < 0 || channelCount > 16) {
6994        stbi__rewind( s );
6995        return 0;
6996    }
6997    (void) stbi__get32be(s);
6998    (void) stbi__get32be(s);
6999    depth = stbi__get16be(s);
7000    if (depth != 16) {
7001        stbi__rewind( s );
7002        return 0;
7003    }
7004    return 1;
7005 }
7006 #endif
7007 
7008 #ifndef STBI_NO_PIC
stbi__pic_info(stbi__context * s,int * x,int * y,int * comp)7009 static int stbi__pic_info(stbi__context *s, int *x, int *y, int *comp)
7010 {
7011    int act_comp=0,num_packets=0,chained,dummy;
7012    stbi__pic_packet packets[10];
7013 
7014    if (!x) x = &dummy;
7015    if (!y) y = &dummy;
7016    if (!comp) comp = &dummy;
7017 
7018    if (!stbi__pic_is4(s,"\x53\x80\xF6\x34")) {
7019       stbi__rewind(s);
7020       return 0;
7021    }
7022 
7023    stbi__skip(s, 88);
7024 
7025    *x = stbi__get16be(s);
7026    *y = stbi__get16be(s);
7027    if (stbi__at_eof(s)) {
7028       stbi__rewind( s);
7029       return 0;
7030    }
7031    if ( (*x) != 0 && (1 << 28) / (*x) < (*y)) {
7032       stbi__rewind( s );
7033       return 0;
7034    }
7035 
7036    stbi__skip(s, 8);
7037 
7038    do {
7039       stbi__pic_packet *packet;
7040 
7041       if (num_packets==sizeof(packets)/sizeof(packets[0]))
7042          return 0;
7043 
7044       packet = &packets[num_packets++];
7045       chained = stbi__get8(s);
7046       packet->size    = stbi__get8(s);
7047       packet->type    = stbi__get8(s);
7048       packet->channel = stbi__get8(s);
7049       act_comp |= packet->channel;
7050 
7051       if (stbi__at_eof(s)) {
7052           stbi__rewind( s );
7053           return 0;
7054       }
7055       if (packet->size != 8) {
7056           stbi__rewind( s );
7057           return 0;
7058       }
7059    } while (chained);
7060 
7061    *comp = (act_comp & 0x10 ? 4 : 3);
7062 
7063    return 1;
7064 }
7065 #endif
7066 
7067 // *************************************************************************************************
7068 // Portable Gray Map and Portable Pixel Map loader
7069 // by Ken Miller
7070 //
7071 // PGM: http://netpbm.sourceforge.net/doc/pgm.html
7072 // PPM: http://netpbm.sourceforge.net/doc/ppm.html
7073 //
7074 // Known limitations:
7075 //    Does not support comments in the header section
7076 //    Does not support ASCII image data (formats P2 and P3)
7077 //    Does not support 16-bit-per-channel
7078 
7079 #ifndef STBI_NO_PNM
7080 
stbi__pnm_test(stbi__context * s)7081 static int      stbi__pnm_test(stbi__context *s)
7082 {
7083    char p, t;
7084    p = (char) stbi__get8(s);
7085    t = (char) stbi__get8(s);
7086    if (p != 'P' || (t != '5' && t != '6')) {
7087        stbi__rewind( s );
7088        return 0;
7089    }
7090    return 1;
7091 }
7092 
stbi__pnm_load(stbi__context * s,int * x,int * y,int * comp,int req_comp,stbi__result_info * ri)7093 static void *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
7094 {
7095    stbi_uc *out;
7096    STBI_NOTUSED(ri);
7097 
7098    if (!stbi__pnm_info(s, (int *)&s->img_x, (int *)&s->img_y, (int *)&s->img_n))
7099       return 0;
7100 
7101    *x = s->img_x;
7102    *y = s->img_y;
7103    if (comp) *comp = s->img_n;
7104 
7105    if (!stbi__mad3sizes_valid(s->img_n, s->img_x, s->img_y, 0))
7106       return stbi__errpuc("too large", "PNM too large");
7107 
7108    out = (stbi_uc *) stbi__malloc_mad3(s->img_n, s->img_x, s->img_y, 0);
7109    if (!out) return stbi__errpuc("outofmem", "Out of memory");
7110    stbi__getn(s, out, s->img_n * s->img_x * s->img_y);
7111 
7112    if (req_comp && req_comp != s->img_n) {
7113       out = stbi__convert_format(out, s->img_n, req_comp, s->img_x, s->img_y);
7114       if (out == NULL) return out; // stbi__convert_format frees input on failure
7115    }
7116    return out;
7117 }
7118 
stbi__pnm_isspace(char c)7119 static int      stbi__pnm_isspace(char c)
7120 {
7121    return c == ' ' || c == '\t' || c == '\n' || c == '\v' || c == '\f' || c == '\r';
7122 }
7123 
stbi__pnm_skip_whitespace(stbi__context * s,char * c)7124 static void     stbi__pnm_skip_whitespace(stbi__context *s, char *c)
7125 {
7126    for (;;) {
7127       while (!stbi__at_eof(s) && stbi__pnm_isspace(*c))
7128          *c = (char) stbi__get8(s);
7129 
7130       if (stbi__at_eof(s) || *c != '#')
7131          break;
7132 
7133       while (!stbi__at_eof(s) && *c != '\n' && *c != '\r' )
7134          *c = (char) stbi__get8(s);
7135    }
7136 }
7137 
stbi__pnm_isdigit(char c)7138 static int      stbi__pnm_isdigit(char c)
7139 {
7140    return c >= '0' && c <= '9';
7141 }
7142 
stbi__pnm_getinteger(stbi__context * s,char * c)7143 static int      stbi__pnm_getinteger(stbi__context *s, char *c)
7144 {
7145    int value = 0;
7146 
7147    while (!stbi__at_eof(s) && stbi__pnm_isdigit(*c)) {
7148       value = value*10 + (*c - '0');
7149       *c = (char) stbi__get8(s);
7150    }
7151 
7152    return value;
7153 }
7154 
stbi__pnm_info(stbi__context * s,int * x,int * y,int * comp)7155 static int      stbi__pnm_info(stbi__context *s, int *x, int *y, int *comp)
7156 {
7157    int maxv, dummy;
7158    char c, p, t;
7159 
7160    if (!x) x = &dummy;
7161    if (!y) y = &dummy;
7162    if (!comp) comp = &dummy;
7163 
7164    stbi__rewind(s);
7165 
7166    // Get identifier
7167    p = (char) stbi__get8(s);
7168    t = (char) stbi__get8(s);
7169    if (p != 'P' || (t != '5' && t != '6')) {
7170        stbi__rewind(s);
7171        return 0;
7172    }
7173 
7174    *comp = (t == '6') ? 3 : 1;  // '5' is 1-component .pgm; '6' is 3-component .ppm
7175 
7176    c = (char) stbi__get8(s);
7177    stbi__pnm_skip_whitespace(s, &c);
7178 
7179    *x = stbi__pnm_getinteger(s, &c); // read width
7180    stbi__pnm_skip_whitespace(s, &c);
7181 
7182    *y = stbi__pnm_getinteger(s, &c); // read height
7183    stbi__pnm_skip_whitespace(s, &c);
7184 
7185    maxv = stbi__pnm_getinteger(s, &c);  // read max value
7186 
7187    if (maxv > 255)
7188       return stbi__err("max value > 255", "PPM image not 8-bit");
7189    else
7190       return 1;
7191 }
7192 #endif
7193 
stbi__info_main(stbi__context * s,int * x,int * y,int * comp)7194 static int stbi__info_main(stbi__context *s, int *x, int *y, int *comp)
7195 {
7196    #ifndef STBI_NO_JPEG
7197    if (stbi__jpeg_info(s, x, y, comp)) return 1;
7198    #endif
7199 
7200    #ifndef STBI_NO_PNG
7201    if (stbi__png_info(s, x, y, comp))  return 1;
7202    #endif
7203 
7204    #ifndef STBI_NO_GIF
7205    if (stbi__gif_info(s, x, y, comp))  return 1;
7206    #endif
7207 
7208    #ifndef STBI_NO_BMP
7209    if (stbi__bmp_info(s, x, y, comp))  return 1;
7210    #endif
7211 
7212    #ifndef STBI_NO_PSD
7213    if (stbi__psd_info(s, x, y, comp))  return 1;
7214    #endif
7215 
7216    #ifndef STBI_NO_PIC
7217    if (stbi__pic_info(s, x, y, comp))  return 1;
7218    #endif
7219 
7220    #ifndef STBI_NO_PNM
7221    if (stbi__pnm_info(s, x, y, comp))  return 1;
7222    #endif
7223 
7224    #ifndef STBI_NO_HDR
7225    if (stbi__hdr_info(s, x, y, comp))  return 1;
7226    #endif
7227 
7228    // test tga last because it's a crappy test!
7229    #ifndef STBI_NO_TGA
7230    if (stbi__tga_info(s, x, y, comp))
7231        return 1;
7232    #endif
7233    return stbi__err("unknown image type", "Image not of any known type, or corrupt");
7234 }
7235 
stbi__is_16_main(stbi__context * s)7236 static int stbi__is_16_main(stbi__context *s)
7237 {
7238    #ifndef STBI_NO_PNG
7239    if (stbi__png_is16(s))  return 1;
7240    #endif
7241 
7242    #ifndef STBI_NO_PSD
7243    if (stbi__psd_is16(s))  return 1;
7244    #endif
7245 
7246    return 0;
7247 }
7248 
7249 #ifndef STBI_NO_STDIO
stbi_info(char const * filename,int * x,int * y,int * comp)7250 STBIDEF int stbi_info(char const *filename, int *x, int *y, int *comp)
7251 {
7252     FILE *f = stbi__fopen(filename, "rb");
7253     int result;
7254     if (!f) return stbi__err("can't fopen", "Unable to open file");
7255     result = stbi_info_from_file(f, x, y, comp);
7256     fclose(f);
7257     return result;
7258 }
7259 
stbi_info_from_file(FILE * f,int * x,int * y,int * comp)7260 STBIDEF int stbi_info_from_file(FILE *f, int *x, int *y, int *comp)
7261 {
7262    int r;
7263    stbi__context s;
7264    long pos = ftell(f);
7265    stbi__start_file(&s, f);
7266    r = stbi__info_main(&s,x,y,comp);
7267    fseek(f,pos,SEEK_SET);
7268    return r;
7269 }
7270 
stbi_is_16_bit(char const * filename)7271 STBIDEF int stbi_is_16_bit(char const *filename)
7272 {
7273     FILE *f = stbi__fopen(filename, "rb");
7274     int result;
7275     if (!f) return stbi__err("can't fopen", "Unable to open file");
7276     result = stbi_is_16_bit_from_file(f);
7277     fclose(f);
7278     return result;
7279 }
7280 
stbi_is_16_bit_from_file(FILE * f)7281 STBIDEF int stbi_is_16_bit_from_file(FILE *f)
7282 {
7283    int r;
7284    stbi__context s;
7285    long pos = ftell(f);
7286    stbi__start_file(&s, f);
7287    r = stbi__is_16_main(&s);
7288    fseek(f,pos,SEEK_SET);
7289    return r;
7290 }
7291 #endif // !STBI_NO_STDIO
7292 
stbi_info_from_memory(stbi_uc const * buffer,int len,int * x,int * y,int * comp)7293 STBIDEF int stbi_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp)
7294 {
7295    stbi__context s;
7296    stbi__start_mem(&s,buffer,len);
7297    return stbi__info_main(&s,x,y,comp);
7298 }
7299 
stbi_info_from_callbacks(stbi_io_callbacks const * c,void * user,int * x,int * y,int * comp)7300 STBIDEF int stbi_info_from_callbacks(stbi_io_callbacks const *c, void *user, int *x, int *y, int *comp)
7301 {
7302    stbi__context s;
7303    stbi__start_callbacks(&s, (stbi_io_callbacks *) c, user);
7304    return stbi__info_main(&s,x,y,comp);
7305 }
7306 
stbi_is_16_bit_from_memory(stbi_uc const * buffer,int len)7307 STBIDEF int stbi_is_16_bit_from_memory(stbi_uc const *buffer, int len)
7308 {
7309    stbi__context s;
7310    stbi__start_mem(&s,buffer,len);
7311    return stbi__is_16_main(&s);
7312 }
7313 
stbi_is_16_bit_from_callbacks(stbi_io_callbacks const * c,void * user)7314 STBIDEF int stbi_is_16_bit_from_callbacks(stbi_io_callbacks const *c, void *user)
7315 {
7316    stbi__context s;
7317    stbi__start_callbacks(&s, (stbi_io_callbacks *) c, user);
7318    return stbi__is_16_main(&s);
7319 }
7320 
7321 #endif // STB_IMAGE_IMPLEMENTATION
7322 
7323 /*
7324    revision history:
7325       2.20  (2019-02-07) support utf8 filenames in Windows; fix warnings and platform ifdefs
7326       2.19  (2018-02-11) fix warning
7327       2.18  (2018-01-30) fix warnings
7328       2.17  (2018-01-29) change sbti__shiftsigned to avoid clang -O2 bug
7329                          1-bit BMP
7330                          *_is_16_bit api
7331                          avoid warnings
7332       2.16  (2017-07-23) all functions have 16-bit variants;
7333                          STBI_NO_STDIO works again;
7334                          compilation fixes;
7335                          fix rounding in unpremultiply;
7336                          optimize vertical flip;
7337                          disable raw_len validation;
7338                          documentation fixes
7339       2.15  (2017-03-18) fix png-1,2,4 bug; now all Imagenet JPGs decode;
7340                          warning fixes; disable run-time SSE detection on gcc;
7341                          uniform handling of optional "return" values;
7342                          thread-safe initialization of zlib tables
7343       2.14  (2017-03-03) remove deprecated STBI_JPEG_OLD; fixes for Imagenet JPGs
7344       2.13  (2016-11-29) add 16-bit API, only supported for PNG right now
7345       2.12  (2016-04-02) fix typo in 2.11 PSD fix that caused crashes
7346       2.11  (2016-04-02) allocate large structures on the stack
7347                          remove white matting for transparent PSD
7348                          fix reported channel count for PNG & BMP
7349                          re-enable SSE2 in non-gcc 64-bit
7350                          support RGB-formatted JPEG
7351                          read 16-bit PNGs (only as 8-bit)
7352       2.10  (2016-01-22) avoid warning introduced in 2.09 by STBI_REALLOC_SIZED
7353       2.09  (2016-01-16) allow comments in PNM files
7354                          16-bit-per-pixel TGA (not bit-per-component)
7355                          info() for TGA could break due to .hdr handling
7356                          info() for BMP to shares code instead of sloppy parse
7357                          can use STBI_REALLOC_SIZED if allocator doesn't support realloc
7358                          code cleanup
7359       2.08  (2015-09-13) fix to 2.07 cleanup, reading RGB PSD as RGBA
7360       2.07  (2015-09-13) fix compiler warnings
7361                          partial animated GIF support
7362                          limited 16-bpc PSD support
7363                          #ifdef unused functions
7364                          bug with < 92 byte PIC,PNM,HDR,TGA
7365       2.06  (2015-04-19) fix bug where PSD returns wrong '*comp' value
7366       2.05  (2015-04-19) fix bug in progressive JPEG handling, fix warning
7367       2.04  (2015-04-15) try to re-enable SIMD on MinGW 64-bit
7368       2.03  (2015-04-12) extra corruption checking (mmozeiko)
7369                          stbi_set_flip_vertically_on_load (nguillemot)
7370                          fix NEON support; fix mingw support
7371       2.02  (2015-01-19) fix incorrect assert, fix warning
7372       2.01  (2015-01-17) fix various warnings; suppress SIMD on gcc 32-bit without -msse2
7373       2.00b (2014-12-25) fix STBI_MALLOC in progressive JPEG
7374       2.00  (2014-12-25) optimize JPG, including x86 SSE2 & NEON SIMD (ryg)
7375                          progressive JPEG (stb)
7376                          PGM/PPM support (Ken Miller)
7377                          STBI_MALLOC,STBI_REALLOC,STBI_FREE
7378                          GIF bugfix -- seemingly never worked
7379                          STBI_NO_*, STBI_ONLY_*
7380       1.48  (2014-12-14) fix incorrectly-named assert()
7381       1.47  (2014-12-14) 1/2/4-bit PNG support, both direct and paletted (Omar Cornut & stb)
7382                          optimize PNG (ryg)
7383                          fix bug in interlaced PNG with user-specified channel count (stb)
7384       1.46  (2014-08-26)
7385               fix broken tRNS chunk (colorkey-style transparency) in non-paletted PNG
7386       1.45  (2014-08-16)
7387               fix MSVC-ARM internal compiler error by wrapping malloc
7388       1.44  (2014-08-07)
7389               various warning fixes from Ronny Chevalier
7390       1.43  (2014-07-15)
7391               fix MSVC-only compiler problem in code changed in 1.42
7392       1.42  (2014-07-09)
7393               don't define _CRT_SECURE_NO_WARNINGS (affects user code)
7394               fixes to stbi__cleanup_jpeg path
7395               added STBI_ASSERT to avoid requiring assert.h
7396       1.41  (2014-06-25)
7397               fix search&replace from 1.36 that messed up comments/error messages
7398       1.40  (2014-06-22)
7399               fix gcc struct-initialization warning
7400       1.39  (2014-06-15)
7401               fix to TGA optimization when req_comp != number of components in TGA;
7402               fix to GIF loading because BMP wasn't rewinding (whoops, no GIFs in my test suite)
7403               add support for BMP version 5 (more ignored fields)
7404       1.38  (2014-06-06)
7405               suppress MSVC warnings on integer casts truncating values
7406               fix accidental rename of 'skip' field of I/O
7407       1.37  (2014-06-04)
7408               remove duplicate typedef
7409       1.36  (2014-06-03)
7410               convert to header file single-file library
7411               if de-iphone isn't set, load iphone images color-swapped instead of returning NULL
7412       1.35  (2014-05-27)
7413               various warnings
7414               fix broken STBI_SIMD path
7415               fix bug where stbi_load_from_file no longer left file pointer in correct place
7416               fix broken non-easy path for 32-bit BMP (possibly never used)
7417               TGA optimization by Arseny Kapoulkine
7418       1.34  (unknown)
7419               use STBI_NOTUSED in stbi__resample_row_generic(), fix one more leak in tga failure case
7420       1.33  (2011-07-14)
7421               make stbi_is_hdr work in STBI_NO_HDR (as specified), minor compiler-friendly improvements
7422       1.32  (2011-07-13)
7423               support for "info" function for all supported filetypes (SpartanJ)
7424       1.31  (2011-06-20)
7425               a few more leak fixes, bug in PNG handling (SpartanJ)
7426       1.30  (2011-06-11)
7427               added ability to load files via callbacks to accomidate custom input streams (Ben Wenger)
7428               removed deprecated format-specific test/load functions
7429               removed support for installable file formats (stbi_loader) -- would have been broken for IO callbacks anyway
7430               error cases in bmp and tga give messages and don't leak (Raymond Barbiero, grisha)
7431               fix inefficiency in decoding 32-bit BMP (David Woo)
7432       1.29  (2010-08-16)
7433               various warning fixes from Aurelien Pocheville
7434       1.28  (2010-08-01)
7435               fix bug in GIF palette transparency (SpartanJ)
7436       1.27  (2010-08-01)
7437               cast-to-stbi_uc to fix warnings
7438       1.26  (2010-07-24)
7439               fix bug in file buffering for PNG reported by SpartanJ
7440       1.25  (2010-07-17)
7441               refix trans_data warning (Won Chun)
7442       1.24  (2010-07-12)
7443               perf improvements reading from files on platforms with lock-heavy fgetc()
7444               minor perf improvements for jpeg
7445               deprecated type-specific functions so we'll get feedback if they're needed
7446               attempt to fix trans_data warning (Won Chun)
7447       1.23    fixed bug in iPhone support
7448       1.22  (2010-07-10)
7449               removed image *writing* support
7450               stbi_info support from Jetro Lauha
7451               GIF support from Jean-Marc Lienher
7452               iPhone PNG-extensions from James Brown
7453               warning-fixes from Nicolas Schulz and Janez Zemva (i.stbi__err. Janez (U+017D)emva)
7454       1.21    fix use of 'stbi_uc' in header (reported by jon blow)
7455       1.20    added support for Softimage PIC, by Tom Seddon
7456       1.19    bug in interlaced PNG corruption check (found by ryg)
7457       1.18  (2008-08-02)
7458               fix a threading bug (local mutable static)
7459       1.17    support interlaced PNG
7460       1.16    major bugfix - stbi__convert_format converted one too many pixels
7461       1.15    initialize some fields for thread safety
7462       1.14    fix threadsafe conversion bug
7463               header-file-only version (#define STBI_HEADER_FILE_ONLY before including)
7464       1.13    threadsafe
7465       1.12    const qualifiers in the API
7466       1.11    Support installable IDCT, colorspace conversion routines
7467       1.10    Fixes for 64-bit (don't use "unsigned long")
7468               optimized upsampling by Fabian "ryg" Giesen
7469       1.09    Fix format-conversion for PSD code (bad global variables!)
7470       1.08    Thatcher Ulrich's PSD code integrated by Nicolas Schulz
7471       1.07    attempt to fix C++ warning/errors again
7472       1.06    attempt to fix C++ warning/errors again
7473       1.05    fix TGA loading to return correct *comp and use good luminance calc
7474       1.04    default float alpha is 1, not 255; use 'void *' for stbi_image_free
7475       1.03    bugfixes to STBI_NO_STDIO, STBI_NO_HDR
7476       1.02    support for (subset of) HDR files, float interface for preferred access to them
7477       1.01    fix bug: possible bug in handling right-side up bmps... not sure
7478               fix bug: the stbi__bmp_load() and stbi__tga_load() functions didn't work at all
7479       1.00    interface to zlib that skips zlib header
7480       0.99    correct handling of alpha in palette
7481       0.98    TGA loader by lonesock; dynamically add loaders (untested)
7482       0.97    jpeg errors on too large a file; also catch another malloc failure
7483       0.96    fix detection of invalid v value - particleman@mollyrocket forum
7484       0.95    during header scan, seek to markers in case of padding
7485       0.94    STBI_NO_STDIO to disable stdio usage; rename all #defines the same
7486       0.93    handle jpegtran output; verbose errors
7487       0.92    read 4,8,16,24,32-bit BMP files of several formats
7488       0.91    output 24-bit Windows 3.0 BMP files
7489       0.90    fix a few more warnings; bump version number to approach 1.0
7490       0.61    bugfixes due to Marc LeBlanc, Christopher Lloyd
7491       0.60    fix compiling as c++
7492       0.59    fix warnings: merge Dave Moore's -Wall fixes
7493       0.58    fix bug: zlib uncompressed mode len/nlen was wrong endian
7494       0.57    fix bug: jpg last huffman symbol before marker was >9 bits but less than 16 available
7495       0.56    fix bug: zlib uncompressed mode len vs. nlen
7496       0.55    fix bug: restart_interval not initialized to 0
7497       0.54    allow NULL for 'int *comp'
7498       0.53    fix bug in png 3->4; speedup png decoding
7499       0.52    png handles req_comp=3,4 directly; minor cleanup; jpeg comments
7500       0.51    obey req_comp requests, 1-component jpegs return as 1-component,
7501               on 'test' only check type, not whether we support this variant
7502       0.50  (2006-11-19)
7503               first released version
7504 */
7505 
7506 
7507 /*
7508 ------------------------------------------------------------------------------
7509 This software is available under 2 licenses -- choose whichever you prefer.
7510 ------------------------------------------------------------------------------
7511 ALTERNATIVE A - MIT License
7512 Copyright (c) 2017 Sean Barrett
7513 Permission is hereby granted, free of charge, to any person obtaining a copy of
7514 this software and associated documentation files (the "Software"), to deal in
7515 the Software without restriction, including without limitation the rights to
7516 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
7517 of the Software, and to permit persons to whom the Software is furnished to do
7518 so, subject to the following conditions:
7519 The above copyright notice and this permission notice shall be included in all
7520 copies or substantial portions of the Software.
7521 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
7522 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
7523 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
7524 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
7525 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
7526 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
7527 SOFTWARE.
7528 ------------------------------------------------------------------------------
7529 ALTERNATIVE B - Public Domain (www.unlicense.org)
7530 This is free and unencumbered software released into the public domain.
7531 Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
7532 software, either in source code form or as a compiled binary, for any purpose,
7533 commercial or non-commercial, and by any means.
7534 In jurisdictions that recognize copyright laws, the author or authors of this
7535 software dedicate any and all copyright interest in the software to the public
7536 domain. We make this dedication for the benefit of the public at large and to
7537 the detriment of our heirs and successors. We intend this dedication to be an
7538 overt act of relinquishment in perpetuity of all present and future rights to
7539 this software under copyright law.
7540 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
7541 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
7542 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
7543 AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
7544 ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
7545 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
7546 ------------------------------------------------------------------------------
7547 */
7548