1 #if defined(__has_warning)
2 #	if __has_warning("-Wcomma")
3 #		pragma GCC diagnostic ignored "-Wcomma" // Possible misuse of comma operator here
4 #	endif
5 #endif
6 
7 #if defined(_MSC_VER)
8 #	pragma warning(disable:4244) // warning C4244: '=': conversion from 'int' to 'stbi__uint16', possible loss of data
9 #	pragma warning(disable:4245) // warning C4245: 'argument': conversion from 'int' to 'char', signed/unsigned mismatch
10 #	pragma warning(disable:4456) // warning C4456: declaration of 'k' hides previous local declaration
11 #endif
12 
13 /* stb_image - v2.15 - public domain image loader - http://nothings.org/stb_image.h
14                                      no warranty implied; use at your own risk
15 
16    Do this:
17       #define STB_IMAGE_IMPLEMENTATION
18    before you include this file in *one* C or C++ file to create the implementation.
19 
20    // i.e. it should look like this:
21    #include ...
22    #include ...
23    #include ...
24    #define STB_IMAGE_IMPLEMENTATION
25    #include "stb_image.h"
26 
27    You can #define STBI_ASSERT(x) before the #include to avoid using assert.h.
28    And #define STBI_MALLOC, STBI_REALLOC, and STBI_FREE to avoid using malloc,realloc,free
29 
30 
31    QUICK NOTES:
32       Primarily of interest to game developers and other people who can
33           avoid problematic images and only need the trivial interface
34 
35       JPEG baseline & progressive (12 bpc/arithmetic not supported, same as stock IJG lib)
36       PNG 1/2/4/8/16-bit-per-channel
37 
38       TGA (not sure what subset, if a subset)
39       BMP non-1bpp, non-RLE
40       PSD (composited view only, no extra channels, 8/16 bit-per-channel)
41 
42       GIF (*comp always reports as 4-channel)
43       HDR (radiance rgbE format)
44       PIC (Softimage PIC)
45       PNM (PPM and PGM binary only)
46 
47       Animated GIF still needs a proper API, but here's one way to do it:
48           http://gist.github.com/urraka/685d9a6340b26b830d49
49 
50       - decode from memory or through FILE (define STBI_NO_STDIO to remove code)
51       - decode from arbitrary I/O callbacks
52       - SIMD acceleration on x86/x64 (SSE2) and ARM (NEON)
53 
54    Full documentation under "DOCUMENTATION" below.
55 
56 
57 LICENSE
58 
59   See end of file for license information.
60 
61 RECENT REVISION HISTORY:
62 
63       2.15  (2017-03-18) fix png-1,2,4; all Imagenet JPGs; no runtime SSE detection on GCC
64       2.14  (2017-03-03) remove deprecated STBI_JPEG_OLD; fixes for Imagenet JPGs
65       2.13  (2016-12-04) experimental 16-bit API, only for PNG so far; fixes
66       2.12  (2016-04-02) fix typo in 2.11 PSD fix that caused crashes
67       2.11  (2016-04-02) 16-bit PNGS; enable SSE2 in non-gcc x64
68                          RGB-format JPEG; remove white matting in PSD;
69                          allocate large structures on the stack;
70                          correct channel count for PNG & BMP
71       2.10  (2016-01-22) avoid warning introduced in 2.09
72       2.09  (2016-01-16) 16-bit TGA; comments in PNM files; STBI_REALLOC_SIZED
73       2.08  (2015-09-13) fix to 2.07 cleanup, reading RGB PSD as RGBA
74       2.07  (2015-09-13) partial animated GIF support
75                          limited 16-bit PSD support
76                          minor bugs, code cleanup, and compiler warnings
77 
78    See end of file for full revision history.
79 
80 
81  ============================    Contributors    =========================
82 
83  Image formats                          Extensions, features
84     Sean Barrett (jpeg, png, bmp)          Jetro Lauha (stbi_info)
85     Nicolas Schulz (hdr, psd)              Martin "SpartanJ" Golini (stbi_info)
86     Jonathan Dummer (tga)                  James "moose2000" Brown (iPhone PNG)
87     Jean-Marc Lienher (gif)                Ben "Disch" Wenger (io callbacks)
88     Tom Seddon (pic)                       Omar Cornut (1/2/4-bit PNG)
89     Thatcher Ulrich (psd)                  Nicolas Guillemot (vertical flip)
90     Ken Miller (pgm, ppm)                  Richard Mitton (16-bit PSD)
91     github:urraka (animated gif)           Junggon Kim (PNM comments)
92                                            Daniel Gibson (16-bit TGA)
93                                            socks-the-fox (16-bit PNG)
94                                            Jeremy Sawicki (handle all ImageNet JPGs)
95  Optimizations & bugfixes
96     Fabian "ryg" Giesen
97     Arseny Kapoulkine
98 
99  Bug & warning fixes
100     Marc LeBlanc            David Woo          Guillaume George   Martins Mozeiko
101     Christpher Lloyd        Jerry Jansson      Joseph Thomson     Phil Jordan
102     Dave Moore              Roy Eltham         Hayaki Saito       Nathan Reed
103     Won Chun                Luke Graham        Johan Duparc       Nick Verigakis
104     the Horde3D community   Thomas Ruf         Ronny Chevalier    Baldur Karlsson
105     Janez Zemva             John Bartholomew   Michal Cichon      github:rlyeh
106     Jonathan Blow           Ken Hamada         Tero Hanninen      github:romigrou
107     Laurent Gomila          Cort Stratton      Sergio Gonzalez    github:svdijk
108     Aruelien Pocheville     Thibault Reuille   Cass Everitt       github:snagar
109     Ryamond Barbiero        Paul Du Bois       Engin Manap        github:Zelex
110     Michaelangel007@github  Philipp Wiesemann  Dale Weiler        github:grim210
111     Oriol Ferrer Mesia      Josh Tobin         Matthew Gregan     github:sammyhw
112     Blazej Dariusz Roszkowski                  Gregory Mullen     github:phprus
113 
114 */
115 
116 #ifndef STBI_INCLUDE_STB_IMAGE_H
117 #define STBI_INCLUDE_STB_IMAGE_H
118 
119 // DOCUMENTATION
120 //
121 // Limitations:
122 //    - no 16-bit-per-channel PNG
123 //    - no 12-bit-per-channel JPEG
124 //    - no JPEGs with arithmetic coding
125 //    - no 1-bit BMP
126 //    - GIF always returns *comp=4
127 //
128 // Basic usage (see HDR discussion below for HDR usage):
129 //    int x,y,n;
130 //    unsigned char *data = stbi_load(filename, &x, &y, &n, 0);
131 //    // ... process data if not NULL ...
132 //    // ... x = width, y = height, n = # 8-bit components per pixel ...
133 //    // ... replace '0' with '1'..'4' to force that many components per pixel
134 //    // ... but 'n' will always be the number that it would have been if you said 0
135 //    stbi_image_free(data)
136 //
137 // Standard parameters:
138 //    int *x                 -- outputs image width in pixels
139 //    int *y                 -- outputs image height in pixels
140 //    int *channels_in_file  -- outputs # of image components in image file
141 //    int desired_channels   -- if non-zero, # of image components requested in result
142 //
143 // The return value from an image loader is an 'unsigned char *' which points
144 // to the pixel data, or NULL on an allocation failure or if the image is
145 // corrupt or invalid. The pixel data consists of *y scanlines of *x pixels,
146 // with each pixel consisting of N interleaved 8-bit components; the first
147 // pixel pointed to is top-left-most in the image. There is no padding between
148 // image scanlines or between pixels, regardless of format. The number of
149 // components N is 'req_comp' if req_comp is non-zero, or *comp otherwise.
150 // If req_comp is non-zero, *comp has the number of components that _would_
151 // have been output otherwise. E.g. if you set req_comp to 4, you will always
152 // get RGBA output, but you can check *comp to see if it's trivially opaque
153 // because e.g. there were only 3 channels in the source image.
154 //
155 // An output image with N components has the following components interleaved
156 // in this order in each pixel:
157 //
158 //     N=#comp     components
159 //       1           grey
160 //       2           grey, alpha
161 //       3           red, green, blue
162 //       4           red, green, blue, alpha
163 //
164 // If image loading fails for any reason, the return value will be NULL,
165 // and *x, *y, *comp will be unchanged. The function stbi_failure_reason()
166 // can be queried for an extremely brief, end-user unfriendly explanation
167 // of why the load failed. Define STBI_NO_FAILURE_STRINGS to avoid
168 // compiling these strings at all, and STBI_FAILURE_USERMSG to get slightly
169 // more user-friendly ones.
170 //
171 // Paletted PNG, BMP, GIF, and PIC images are automatically depalettized.
172 //
173 // ===========================================================================
174 //
175 // Philosophy
176 //
177 // stb libraries are designed with the following priorities:
178 //
179 //    1. easy to use
180 //    2. easy to maintain
181 //    3. good performance
182 //
183 // Sometimes I let "good performance" creep up in priority over "easy to maintain",
184 // and for best performance I may provide less-easy-to-use APIs that give higher
185 // performance, in addition to the easy to use ones. Nevertheless, it's important
186 // to keep in mind that from the standpoint of you, a client of this library,
187 // all you care about is #1 and #3, and stb libraries DO NOT emphasize #3 above all.
188 //
189 // Some secondary priorities arise directly from the first two, some of which
190 // make more explicit reasons why performance can't be emphasized.
191 //
192 //    - Portable ("ease of use")
193 //    - Small source code footprint ("easy to maintain")
194 //    - No dependencies ("ease of use")
195 //
196 // ===========================================================================
197 //
198 // I/O callbacks
199 //
200 // I/O callbacks allow you to read from arbitrary sources, like packaged
201 // files or some other source. Data read from callbacks are processed
202 // through a small internal buffer (currently 128 bytes) to try to reduce
203 // overhead.
204 //
205 // The three functions you must define are "read" (reads some bytes of data),
206 // "skip" (skips some bytes of data), "eof" (reports if the stream is at the end).
207 //
208 // ===========================================================================
209 //
210 // SIMD support
211 //
212 // The JPEG decoder will try to automatically use SIMD kernels on x86 when
213 // supported by the compiler. For ARM Neon support, you must explicitly
214 // request it.
215 //
216 // (The old do-it-yourself SIMD API is no longer supported in the current
217 // code.)
218 //
219 // On x86, SSE2 will automatically be used when available based on a run-time
220 // test; if not, the generic C versions are used as a fall-back. On ARM targets,
221 // the typical path is to have separate builds for NEON and non-NEON devices
222 // (at least this is true for iOS and Android). Therefore, the NEON support is
223 // toggled by a build flag: define STBI_NEON to get NEON loops.
224 //
225 // If for some reason you do not want to use any of SIMD code, or if
226 // you have issues compiling it, you can disable it entirely by
227 // defining STBI_NO_SIMD.
228 //
229 // ===========================================================================
230 //
231 // HDR image support   (disable by defining STBI_NO_HDR)
232 //
233 // stb_image now supports loading HDR images in general, and currently
234 // the Radiance .HDR file format, although the support is provided
235 // generically. You can still load any file through the existing interface;
236 // if you attempt to load an HDR file, it will be automatically remapped to
237 // LDR, assuming gamma 2.2 and an arbitrary scale factor defaulting to 1;
238 // both of these constants can be reconfigured through this interface:
239 //
240 //     stbi_hdr_to_ldr_gamma(2.2f);
241 //     stbi_hdr_to_ldr_scale(1.0f);
242 //
243 // (note, do not use _inverse_ constants; stbi_image will invert them
244 // appropriately).
245 //
246 // Additionally, there is a new, parallel interface for loading files as
247 // (linear) floats to preserve the full dynamic range:
248 //
249 //    float *data = stbi_loadf(filename, &x, &y, &n, 0);
250 //
251 // If you load LDR images through this interface, those images will
252 // be promoted to floating point values, run through the inverse of
253 // constants corresponding to the above:
254 //
255 //     stbi_ldr_to_hdr_scale(1.0f);
256 //     stbi_ldr_to_hdr_gamma(2.2f);
257 //
258 // Finally, given a filename (or an open file or memory block--see header
259 // file for details) containing image data, you can query for the "most
260 // appropriate" interface to use (that is, whether the image is HDR or
261 // not), using:
262 //
263 //     stbi_is_hdr(char *filename);
264 //
265 // ===========================================================================
266 //
267 // iPhone PNG support:
268 //
269 // By default we convert iphone-formatted PNGs back to RGB, even though
270 // they are internally encoded differently. You can disable this conversion
271 // by by calling stbi_convert_iphone_png_to_rgb(0), in which case
272 // you will always just get the native iphone "format" through (which
273 // is BGR stored in RGB).
274 //
275 // Call stbi_set_unpremultiply_on_load(1) as well to force a divide per
276 // pixel to remove any premultiplied alpha *only* if the image file explicitly
277 // says there's premultiplied data (currently only happens in iPhone images,
278 // and only if iPhone convert-to-rgb processing is on).
279 //
280 // ===========================================================================
281 //
282 // ADDITIONAL CONFIGURATION
283 //
284 //  - You can suppress implementation of any of the decoders to reduce
285 //    your code footprint by #defining one or more of the following
286 //    symbols before creating the implementation.
287 //
288 //        STBI_NO_JPEG
289 //        STBI_NO_PNG
290 //        STBI_NO_BMP
291 //        STBI_NO_PSD
292 //        STBI_NO_TGA
293 //        STBI_NO_GIF
294 //        STBI_NO_HDR
295 //        STBI_NO_PIC
296 //        STBI_NO_PNM   (.ppm and .pgm)
297 //
298 //  - You can request *only* certain decoders and suppress all other ones
299 //    (this will be more forward-compatible, as addition of new decoders
300 //    doesn't require you to disable them explicitly):
301 //
302 //        STBI_ONLY_JPEG
303 //        STBI_ONLY_PNG
304 //        STBI_ONLY_BMP
305 //        STBI_ONLY_PSD
306 //        STBI_ONLY_TGA
307 //        STBI_ONLY_GIF
308 //        STBI_ONLY_HDR
309 //        STBI_ONLY_PIC
310 //        STBI_ONLY_PNM   (.ppm and .pgm)
311 //
312 //   - If you use STBI_NO_PNG (or _ONLY_ without PNG), and you still
313 //     want the zlib decoder to be available, #define STBI_SUPPORT_ZLIB
314 //
315 
316 
317 #ifndef STBI_NO_STDIO
318 #include <stdio.h>
319 #endif // STBI_NO_STDIO
320 
321 #define STBI_VERSION 1
322 
323 enum
324 {
325    STBI_default = 0, // only used for req_comp
326 
327    STBI_grey       = 1,
328    STBI_grey_alpha = 2,
329    STBI_rgb        = 3,
330    STBI_rgb_alpha  = 4
331 };
332 
333 typedef unsigned char stbi_uc;
334 typedef unsigned short stbi_us;
335 
336 #ifdef __cplusplus
337 extern "C" {
338 #endif
339 
340 #ifdef STB_IMAGE_STATIC
341 #define STBIDEF static
342 #else
343 #define STBIDEF extern
344 #endif
345 
346 //////////////////////////////////////////////////////////////////////////////
347 //
348 // PRIMARY API - works on images of any type
349 //
350 
351 //
352 // load image by filename, open file, or memory buffer
353 //
354 
355 typedef struct
356 {
357    int      (*read)  (void *user,char *data,int size);   // fill 'data' with 'size' bytes.  return number of bytes actually read
358    void     (*skip)  (void *user,int n);                 // skip the next 'n' bytes, or 'unget' the last -n bytes if negative
359    int      (*eof)   (void *user);                       // returns nonzero if we are at end of file/data
360 } stbi_io_callbacks;
361 
362 ////////////////////////////////////
363 //
364 // 8-bits-per-channel interface
365 //
366 
367 STBIDEF stbi_uc *stbi_load               (char              const *filename,           int *x, int *y, int *channels_in_file, int desired_channels);
368 STBIDEF stbi_uc *stbi_load_from_memory   (stbi_uc           const *buffer, int len   , int *x, int *y, int *channels_in_file, int desired_channels);
369 STBIDEF stbi_uc *stbi_load_from_callbacks(stbi_io_callbacks const *clbk  , void *user, int *x, int *y, int *channels_in_file, int desired_channels);
370 
371 #ifndef STBI_NO_STDIO
372 STBIDEF stbi_uc *stbi_load_from_file   (FILE *f, int *x, int *y, int *channels_in_file, int desired_channels);
373 // for stbi_load_from_file, file pointer is left pointing immediately after image
374 #endif
375 
376 ////////////////////////////////////
377 //
378 // 16-bits-per-channel interface
379 //
380 
381 STBIDEF stbi_us *stbi_load_16(char const *filename, int *x, int *y, int *channels_in_file, int desired_channels);
382 #ifndef STBI_NO_STDIO
383 STBIDEF stbi_us *stbi_load_from_file_16(FILE *f, int *x, int *y, int *channels_in_file, int desired_channels);
384 #endif
385 // @TODO the other variants
386 
387 ////////////////////////////////////
388 //
389 // float-per-channel interface
390 //
391 #ifndef STBI_NO_LINEAR
392    STBIDEF float *stbi_loadf                 (char const *filename,           int *x, int *y, int *channels_in_file, int desired_channels);
393    STBIDEF float *stbi_loadf_from_memory     (stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels);
394    STBIDEF float *stbi_loadf_from_callbacks  (stbi_io_callbacks const *clbk, void *user, int *x, int *y,  int *channels_in_file, int desired_channels);
395 
396    #ifndef STBI_NO_STDIO
397    STBIDEF float *stbi_loadf_from_file  (FILE *f, int *x, int *y, int *channels_in_file, int desired_channels);
398    #endif
399 #endif
400 
401 #ifndef STBI_NO_HDR
402    STBIDEF void   stbi_hdr_to_ldr_gamma(float gamma);
403    STBIDEF void   stbi_hdr_to_ldr_scale(float scale);
404 #endif // STBI_NO_HDR
405 
406 #ifndef STBI_NO_LINEAR
407    STBIDEF void   stbi_ldr_to_hdr_gamma(float gamma);
408    STBIDEF void   stbi_ldr_to_hdr_scale(float scale);
409 #endif // STBI_NO_LINEAR
410 
411 // stbi_is_hdr is always defined, but always returns false if STBI_NO_HDR
412 STBIDEF int    stbi_is_hdr_from_callbacks(stbi_io_callbacks const *clbk, void *user);
413 STBIDEF int    stbi_is_hdr_from_memory(stbi_uc const *buffer, int len);
414 #ifndef STBI_NO_STDIO
415 STBIDEF int      stbi_is_hdr          (char const *filename);
416 STBIDEF int      stbi_is_hdr_from_file(FILE *f);
417 #endif // STBI_NO_STDIO
418 
419 
420 // get a VERY brief reason for failure
421 // NOT THREADSAFE
422 STBIDEF const char *stbi_failure_reason  (void);
423 
424 // free the loaded image -- this is just free()
425 STBIDEF void     stbi_image_free      (void *retval_from_stbi_load);
426 
427 // get image dimensions & components without fully decoding
428 STBIDEF int      stbi_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp);
429 STBIDEF int      stbi_info_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp);
430 
431 #ifndef STBI_NO_STDIO
432 STBIDEF int      stbi_info            (char const *filename,     int *x, int *y, int *comp);
433 STBIDEF int      stbi_info_from_file  (FILE *f,                  int *x, int *y, int *comp);
434 
435 #endif
436 
437 
438 
439 // for image formats that explicitly notate that they have premultiplied alpha,
440 // we just return the colors as stored in the file. set this flag to force
441 // unpremultiplication. results are undefined if the unpremultiply overflow.
442 STBIDEF void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply);
443 
444 // indicate whether we should process iphone images back to canonical format,
445 // or just pass them through "as-is"
446 STBIDEF void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert);
447 
448 // flip the image vertically, so the first pixel in the output array is the bottom left
449 STBIDEF void stbi_set_flip_vertically_on_load(int flag_true_if_should_flip);
450 
451 // ZLIB client - used by PNG, available for other purposes
452 
453 STBIDEF char *stbi_zlib_decode_malloc_guesssize(const char *buffer, int len, int initial_size, int *outlen);
454 STBIDEF char *stbi_zlib_decode_malloc_guesssize_headerflag(const char *buffer, int len, int initial_size, int *outlen, int parse_header);
455 STBIDEF char *stbi_zlib_decode_malloc(const char *buffer, int len, int *outlen);
456 STBIDEF int   stbi_zlib_decode_buffer(char *obuffer, int olen, const char *ibuffer, int ilen);
457 
458 STBIDEF char *stbi_zlib_decode_noheader_malloc(const char *buffer, int len, int *outlen);
459 STBIDEF int   stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char *ibuffer, int ilen);
460 
461 
462 #ifdef __cplusplus
463 }
464 #endif
465 
466 //
467 //
468 ////   end header file   /////////////////////////////////////////////////////
469 #endif // STBI_INCLUDE_STB_IMAGE_H
470 
471 #ifdef STB_IMAGE_IMPLEMENTATION
472 
473 #if defined(STBI_ONLY_JPEG) || defined(STBI_ONLY_PNG) || defined(STBI_ONLY_BMP) \
474   || defined(STBI_ONLY_TGA) || defined(STBI_ONLY_GIF) || defined(STBI_ONLY_PSD) \
475   || defined(STBI_ONLY_HDR) || defined(STBI_ONLY_PIC) || defined(STBI_ONLY_PNM) \
476   || defined(STBI_ONLY_ZLIB)
477    #ifndef STBI_ONLY_JPEG
478    #define STBI_NO_JPEG
479    #endif
480    #ifndef STBI_ONLY_PNG
481    #define STBI_NO_PNG
482    #endif
483    #ifndef STBI_ONLY_BMP
484    #define STBI_NO_BMP
485    #endif
486    #ifndef STBI_ONLY_PSD
487    #define STBI_NO_PSD
488    #endif
489    #ifndef STBI_ONLY_TGA
490    #define STBI_NO_TGA
491    #endif
492    #ifndef STBI_ONLY_GIF
493    #define STBI_NO_GIF
494    #endif
495    #ifndef STBI_ONLY_HDR
496    #define STBI_NO_HDR
497    #endif
498    #ifndef STBI_ONLY_PIC
499    #define STBI_NO_PIC
500    #endif
501    #ifndef STBI_ONLY_PNM
502    #define STBI_NO_PNM
503    #endif
504 #endif
505 
506 #if defined(STBI_NO_PNG) && !defined(STBI_SUPPORT_ZLIB) && !defined(STBI_NO_ZLIB)
507 #define STBI_NO_ZLIB
508 #endif
509 
510 
511 #include <stdarg.h>
512 #include <stddef.h> // ptrdiff_t on osx
513 #include <stdlib.h>
514 #include <string.h>
515 #include <limits.h>
516 
517 #if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR)
518 #include <math.h>  // ldexp
519 #endif
520 
521 #ifndef STBI_NO_STDIO
522 #include <stdio.h>
523 #endif
524 
525 #ifndef STBI_ASSERT
526 #include <assert.h>
527 #define STBI_ASSERT(x) assert(x)
528 #endif
529 
530 
531 #ifndef _MSC_VER
532    #ifdef __cplusplus
533    #define stbi_inline inline
534    #else
535    #define stbi_inline
536    #endif
537 #else
538    #define stbi_inline __forceinline
539 #endif
540 
541 
542 #ifdef _MSC_VER
543 typedef unsigned short stbi__uint16;
544 typedef   signed short stbi__int16;
545 typedef unsigned int   stbi__uint32;
546 typedef   signed int   stbi__int32;
547 #else
548 #include <stdint.h>
549 typedef uint16_t stbi__uint16;
550 typedef int16_t  stbi__int16;
551 typedef uint32_t stbi__uint32;
552 typedef int32_t  stbi__int32;
553 #endif
554 
555 // should produce compiler error if size is wrong
556 typedef unsigned char validate_uint32[sizeof(stbi__uint32)==4 ? 1 : -1];
557 
558 #ifdef _MSC_VER
559 #define STBI_NOTUSED(v)  (void)(v)
560 #else
561 #define STBI_NOTUSED(v)  (void)sizeof(v)
562 #endif
563 
564 #ifdef _MSC_VER
565 #define STBI_HAS_LROTL
566 #endif
567 
568 #ifdef STBI_HAS_LROTL
569    #define stbi_lrot(x,y)  _lrotl(x,y)
570 #else
571    #define stbi_lrot(x,y)  (((x) << (y)) | ((x) >> (32 - (y))))
572 #endif
573 
574 #if defined(STBI_MALLOC) && defined(STBI_FREE) && (defined(STBI_REALLOC) || defined(STBI_REALLOC_SIZED))
575 // ok
576 #elif !defined(STBI_MALLOC) && !defined(STBI_FREE) && !defined(STBI_REALLOC) && !defined(STBI_REALLOC_SIZED)
577 // ok
578 #else
579 #error "Must define all or none of STBI_MALLOC, STBI_FREE, and STBI_REALLOC (or STBI_REALLOC_SIZED)."
580 #endif
581 
582 #ifndef STBI_MALLOC
583 #define STBI_MALLOC(sz)           malloc(sz)
584 #define STBI_REALLOC(p,newsz)     realloc(p,newsz)
585 #define STBI_FREE(p)              free(p)
586 #endif
587 
588 #ifndef STBI_REALLOC_SIZED
589 #define STBI_REALLOC_SIZED(p,oldsz,newsz) STBI_REALLOC(p,newsz)
590 #endif
591 
592 // x86/x64 detection
593 #if defined(__x86_64__) || defined(_M_X64)
594 #define STBI__X64_TARGET
595 #elif defined(__i386) || defined(_M_IX86)
596 #define STBI__X86_TARGET
597 #endif
598 
599 #if defined(__GNUC__) && defined(STBI__X86_TARGET) && !defined(__SSE2__) && !defined(STBI_NO_SIMD)
600 // gcc doesn't support sse2 intrinsics unless you compile with -msse2,
601 // which in turn means it gets to use SSE2 everywhere. This is unfortunate,
602 // but previous attempts to provide the SSE2 functions with runtime
603 // detection caused numerous issues. The way architecture extensions are
604 // exposed in GCC/Clang is, sadly, not really suited for one-file libs.
605 // New behavior: if compiled with -msse2, we use SSE2 without any
606 // detection; if not, we don't use it at all.
607 #define STBI_NO_SIMD
608 #endif
609 
610 #if defined(__MINGW32__) && defined(STBI__X86_TARGET) && !defined(STBI_MINGW_ENABLE_SSE2) && !defined(STBI_NO_SIMD)
611 // Note that __MINGW32__ doesn't actually mean 32-bit, so we have to avoid STBI__X64_TARGET
612 //
613 // 32-bit MinGW wants ESP to be 16-byte aligned, but this is not in the
614 // Windows ABI and VC++ as well as Windows DLLs don't maintain that invariant.
615 // As a result, enabling SSE2 on 32-bit MinGW is dangerous when not
616 // simultaneously enabling "-mstackrealign".
617 //
618 // See https://github.com/nothings/stb/issues/81 for more information.
619 //
620 // So default to no SSE2 on 32-bit MinGW. If you've read this far and added
621 // -mstackrealign to your build settings, feel free to #define STBI_MINGW_ENABLE_SSE2.
622 #define STBI_NO_SIMD
623 #endif
624 
625 #if !defined(STBI_NO_SIMD) && (defined(STBI__X86_TARGET) || defined(STBI__X64_TARGET))
626 #define STBI_SSE2
627 #include <emmintrin.h>
628 
629 #ifdef _MSC_VER
630 
631 #if _MSC_VER >= 1400  // not VC6
632 #include <intrin.h> // __cpuid
stbi__cpuid3(void)633 static int stbi__cpuid3(void)
634 {
635    int info[4];
636    __cpuid(info,1);
637    return info[3];
638 }
639 #else
stbi__cpuid3(void)640 static int stbi__cpuid3(void)
641 {
642    int res;
643    __asm {
644       mov  eax,1
645       cpuid
646       mov  res,edx
647    }
648    return res;
649 }
650 #endif
651 
652 #define STBI_SIMD_ALIGN(type, name) __declspec(align(16)) type name
653 
stbi__sse2_available()654 static int stbi__sse2_available()
655 {
656    int info3 = stbi__cpuid3();
657    return ((info3 >> 26) & 1) != 0;
658 }
659 #else // assume GCC-style if not VC++
660 #define STBI_SIMD_ALIGN(type, name) type name __attribute__((aligned(16)))
661 
stbi__sse2_available()662 static int stbi__sse2_available()
663 {
664    // If we're even attempting to compile this on GCC/Clang, that means
665    // -msse2 is on, which means the compiler is allowed to use SSE2
666    // instructions at will, and so are we.
667    return 1;
668 }
669 #endif
670 #endif
671 
672 // ARM NEON
673 #if defined(STBI_NO_SIMD) && defined(STBI_NEON)
674 #undef STBI_NEON
675 #endif
676 
677 #ifdef STBI_NEON
678 #include <arm_neon.h>
679 // assume GCC or Clang on ARM targets
680 #define STBI_SIMD_ALIGN(type, name) type name __attribute__((aligned(16)))
681 #endif
682 
683 #ifndef STBI_SIMD_ALIGN
684 #define STBI_SIMD_ALIGN(type, name) type name
685 #endif
686 
687 ///////////////////////////////////////////////
688 //
689 //  stbi__context struct and start_xxx functions
690 
691 // stbi__context structure is our basic context used by all images, so it
692 // contains all the IO context, plus some basic image information
693 typedef struct
694 {
695    stbi__uint32 img_x, img_y;
696    int img_n, img_out_n;
697 
698    stbi_io_callbacks io;
699    void *io_user_data;
700 
701    int read_from_callbacks;
702    int buflen;
703    stbi_uc buffer_start[128];
704 
705    stbi_uc *img_buffer, *img_buffer_end;
706    stbi_uc *img_buffer_original, *img_buffer_original_end;
707 } stbi__context;
708 
709 
710 static void stbi__refill_buffer(stbi__context *s);
711 
712 // initialize a memory-decode context
stbi__start_mem(stbi__context * s,stbi_uc const * buffer,int len)713 static void stbi__start_mem(stbi__context *s, stbi_uc const *buffer, int len)
714 {
715    s->io.read = NULL;
716    s->read_from_callbacks = 0;
717    s->img_buffer = s->img_buffer_original = (stbi_uc *) buffer;
718    s->img_buffer_end = s->img_buffer_original_end = (stbi_uc *) buffer+len;
719 }
720 
721 // initialize a callback-based context
stbi__start_callbacks(stbi__context * s,stbi_io_callbacks * c,void * user)722 static void stbi__start_callbacks(stbi__context *s, stbi_io_callbacks *c, void *user)
723 {
724    s->io = *c;
725    s->io_user_data = user;
726    s->buflen = sizeof(s->buffer_start);
727    s->read_from_callbacks = 1;
728    s->img_buffer_original = s->buffer_start;
729    stbi__refill_buffer(s);
730    s->img_buffer_original_end = s->img_buffer_end;
731 }
732 
733 #ifndef STBI_NO_STDIO
734 
stbi__stdio_read(void * user,char * data,int size)735 static int stbi__stdio_read(void *user, char *data, int size)
736 {
737    return (int) fread(data,1,size,(FILE*) user);
738 }
739 
stbi__stdio_skip(void * user,int n)740 static void stbi__stdio_skip(void *user, int n)
741 {
742    fseek((FILE*) user, n, SEEK_CUR);
743 }
744 
stbi__stdio_eof(void * user)745 static int stbi__stdio_eof(void *user)
746 {
747    return feof((FILE*) user);
748 }
749 
750 static stbi_io_callbacks stbi__stdio_callbacks =
751 {
752    stbi__stdio_read,
753    stbi__stdio_skip,
754    stbi__stdio_eof,
755 };
756 
stbi__start_file(stbi__context * s,FILE * f)757 static void stbi__start_file(stbi__context *s, FILE *f)
758 {
759    stbi__start_callbacks(s, &stbi__stdio_callbacks, (void *) f);
760 }
761 
762 //static void stop_file(stbi__context *s) { }
763 
764 #endif // !STBI_NO_STDIO
765 
stbi__rewind(stbi__context * s)766 static void stbi__rewind(stbi__context *s)
767 {
768    // conceptually rewind SHOULD rewind to the beginning of the stream,
769    // but we just rewind to the beginning of the initial buffer, because
770    // we only use it after doing 'test', which only ever looks at at most 92 bytes
771    s->img_buffer = s->img_buffer_original;
772    s->img_buffer_end = s->img_buffer_original_end;
773 }
774 
775 enum
776 {
777    STBI_ORDER_RGB,
778    STBI_ORDER_BGR
779 };
780 
781 typedef struct
782 {
783    int bits_per_channel;
784    int num_channels;
785    int channel_order;
786 } stbi__result_info;
787 
788 #ifndef STBI_NO_JPEG
789 static int      stbi__jpeg_test(stbi__context *s);
790 static void    *stbi__jpeg_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
791 static int      stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp);
792 #endif
793 
794 #ifndef STBI_NO_PNG
795 static int      stbi__png_test(stbi__context *s);
796 static void    *stbi__png_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
797 static int      stbi__png_info(stbi__context *s, int *x, int *y, int *comp);
798 #endif
799 
800 #ifndef STBI_NO_BMP
801 static int      stbi__bmp_test(stbi__context *s);
802 static void    *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
803 static int      stbi__bmp_info(stbi__context *s, int *x, int *y, int *comp);
804 #endif
805 
806 #ifndef STBI_NO_TGA
807 static int      stbi__tga_test(stbi__context *s);
808 static void    *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
809 static int      stbi__tga_info(stbi__context *s, int *x, int *y, int *comp);
810 #endif
811 
812 #ifndef STBI_NO_PSD
813 static int      stbi__psd_test(stbi__context *s);
814 static void    *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc);
815 static int      stbi__psd_info(stbi__context *s, int *x, int *y, int *comp);
816 #endif
817 
818 #ifndef STBI_NO_HDR
819 static int      stbi__hdr_test(stbi__context *s);
820 static float   *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
821 static int      stbi__hdr_info(stbi__context *s, int *x, int *y, int *comp);
822 #endif
823 
824 #ifndef STBI_NO_PIC
825 static int      stbi__pic_test(stbi__context *s);
826 static void    *stbi__pic_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
827 static int      stbi__pic_info(stbi__context *s, int *x, int *y, int *comp);
828 #endif
829 
830 #ifndef STBI_NO_GIF
831 static int      stbi__gif_test(stbi__context *s);
832 static void    *stbi__gif_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
833 static int      stbi__gif_info(stbi__context *s, int *x, int *y, int *comp);
834 #endif
835 
836 #ifndef STBI_NO_PNM
837 static int      stbi__pnm_test(stbi__context *s);
838 static void    *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
839 static int      stbi__pnm_info(stbi__context *s, int *x, int *y, int *comp);
840 #endif
841 
842 // this is not threadsafe
843 static const char *stbi__g_failure_reason;
844 
stbi_failure_reason(void)845 STBIDEF const char *stbi_failure_reason(void)
846 {
847    return stbi__g_failure_reason;
848 }
849 
stbi__err(const char * str)850 static int stbi__err(const char *str)
851 {
852    stbi__g_failure_reason = str;
853    return 0;
854 }
855 
stbi__malloc(size_t size)856 static void *stbi__malloc(size_t size)
857 {
858     return STBI_MALLOC(size);
859 }
860 
861 // stb_image uses ints pervasively, including for offset calculations.
862 // therefore the largest decoded image size we can support with the
863 // current code, even on 64-bit targets, is INT_MAX. this is not a
864 // significant limitation for the intended use case.
865 //
866 // we do, however, need to make sure our size calculations don't
867 // overflow. hence a few helper functions for size calculations that
868 // multiply integers together, making sure that they're non-negative
869 // and no overflow occurs.
870 
871 // return 1 if the sum is valid, 0 on overflow.
872 // negative terms are considered invalid.
stbi__addsizes_valid(int a,int b)873 static int stbi__addsizes_valid(int a, int b)
874 {
875    if (b < 0) return 0;
876    // now 0 <= b <= INT_MAX, hence also
877    // 0 <= INT_MAX - b <= INTMAX.
878    // And "a + b <= INT_MAX" (which might overflow) is the
879    // same as a <= INT_MAX - b (no overflow)
880    return a <= INT_MAX - b;
881 }
882 
883 // returns 1 if the product is valid, 0 on overflow.
884 // negative factors are considered invalid.
stbi__mul2sizes_valid(int a,int b)885 static int stbi__mul2sizes_valid(int a, int b)
886 {
887    if (a < 0 || b < 0) return 0;
888    if (b == 0) return 1; // mul-by-0 is always safe
889    // portable way to check for no overflows in a*b
890    return a <= INT_MAX/b;
891 }
892 
893 // returns 1 if "a*b + add" has no negative terms/factors and doesn't overflow
stbi__mad2sizes_valid(int a,int b,int add)894 static int stbi__mad2sizes_valid(int a, int b, int add)
895 {
896    return stbi__mul2sizes_valid(a, b) && stbi__addsizes_valid(a*b, add);
897 }
898 
899 // returns 1 if "a*b*c + add" has no negative terms/factors and doesn't overflow
stbi__mad3sizes_valid(int a,int b,int c,int add)900 static int stbi__mad3sizes_valid(int a, int b, int c, int add)
901 {
902    return stbi__mul2sizes_valid(a, b) && stbi__mul2sizes_valid(a*b, c) &&
903       stbi__addsizes_valid(a*b*c, add);
904 }
905 
906 // returns 1 if "a*b*c*d + add" has no negative terms/factors and doesn't overflow
stbi__mad4sizes_valid(int a,int b,int c,int d,int add)907 static int stbi__mad4sizes_valid(int a, int b, int c, int d, int add)
908 {
909    return stbi__mul2sizes_valid(a, b) && stbi__mul2sizes_valid(a*b, c) &&
910       stbi__mul2sizes_valid(a*b*c, d) && stbi__addsizes_valid(a*b*c*d, add);
911 }
912 
913 // mallocs with size overflow checking
stbi__malloc_mad2(int a,int b,int add)914 static void *stbi__malloc_mad2(int a, int b, int add)
915 {
916    if (!stbi__mad2sizes_valid(a, b, add)) return NULL;
917    return stbi__malloc(a*b + add);
918 }
919 
stbi__malloc_mad3(int a,int b,int c,int add)920 static void *stbi__malloc_mad3(int a, int b, int c, int add)
921 {
922    if (!stbi__mad3sizes_valid(a, b, c, add)) return NULL;
923    return stbi__malloc(a*b*c + add);
924 }
925 
stbi__malloc_mad4(int a,int b,int c,int d,int add)926 static void *stbi__malloc_mad4(int a, int b, int c, int d, int add)
927 {
928    if (!stbi__mad4sizes_valid(a, b, c, d, add)) return NULL;
929    return stbi__malloc(a*b*c*d + add);
930 }
931 
932 // stbi__err - error
933 // stbi__errpf - error returning pointer to float
934 // stbi__errpuc - error returning pointer to unsigned char
935 
936 #ifdef STBI_NO_FAILURE_STRINGS
937    #define stbi__err(x,y)  0
938 #elif defined(STBI_FAILURE_USERMSG)
939    #define stbi__err(x,y)  stbi__err(y)
940 #else
941    #define stbi__err(x,y)  stbi__err(x)
942 #endif
943 
944 #define stbi__errpf(x,y)   ((float *)(size_t) (stbi__err(x,y)?NULL:NULL))
945 #define stbi__errpuc(x,y)  ((unsigned char *)(size_t) (stbi__err(x,y)?NULL:NULL))
946 
stbi_image_free(void * retval_from_stbi_load)947 STBIDEF void stbi_image_free(void *retval_from_stbi_load)
948 {
949    STBI_FREE(retval_from_stbi_load);
950 }
951 
952 #ifndef STBI_NO_LINEAR
953 static float   *stbi__ldr_to_hdr(stbi_uc *data, int x, int y, int comp);
954 #endif
955 
956 #ifndef STBI_NO_HDR
957 static stbi_uc *stbi__hdr_to_ldr(float   *data, int x, int y, int comp);
958 #endif
959 
960 static int stbi__vertically_flip_on_load = 0;
961 
stbi_set_flip_vertically_on_load(int flag_true_if_should_flip)962 STBIDEF void stbi_set_flip_vertically_on_load(int flag_true_if_should_flip)
963 {
964     stbi__vertically_flip_on_load = flag_true_if_should_flip;
965 }
966 
stbi__load_main(stbi__context * s,int * x,int * y,int * comp,int req_comp,stbi__result_info * ri,int bpc)967 static void *stbi__load_main(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc)
968 {
969    memset(ri, 0, sizeof(*ri)); // make sure it's initialized if we add new fields
970    ri->bits_per_channel = 8; // default is 8 so most paths don't have to be changed
971    ri->channel_order = STBI_ORDER_RGB; // all current input & output are this, but this is here so we can add BGR order
972    ri->num_channels = 0;
973 
974    #ifndef STBI_NO_JPEG
975    if (stbi__jpeg_test(s)) return stbi__jpeg_load(s,x,y,comp,req_comp, ri);
976    #endif
977    #ifndef STBI_NO_PNG
978    if (stbi__png_test(s))  return stbi__png_load(s,x,y,comp,req_comp, ri);
979    #endif
980    #ifndef STBI_NO_BMP
981    if (stbi__bmp_test(s))  return stbi__bmp_load(s,x,y,comp,req_comp, ri);
982    #endif
983    #ifndef STBI_NO_GIF
984    if (stbi__gif_test(s))  return stbi__gif_load(s,x,y,comp,req_comp, ri);
985    #endif
986    #ifndef STBI_NO_PSD
987    if (stbi__psd_test(s))  return stbi__psd_load(s,x,y,comp,req_comp, ri, bpc);
988    #endif
989    #ifndef STBI_NO_PIC
990    if (stbi__pic_test(s))  return stbi__pic_load(s,x,y,comp,req_comp, ri);
991    #endif
992    #ifndef STBI_NO_PNM
993    if (stbi__pnm_test(s))  return stbi__pnm_load(s,x,y,comp,req_comp, ri);
994    #endif
995 
996    #ifndef STBI_NO_HDR
997    if (stbi__hdr_test(s)) {
998       float *hdr = stbi__hdr_load(s, x,y,comp,req_comp, ri);
999       return stbi__hdr_to_ldr(hdr, *x, *y, req_comp ? req_comp : *comp);
1000    }
1001    #endif
1002 
1003    #ifndef STBI_NO_TGA
1004    // test tga last because it's a crappy test!
1005    if (stbi__tga_test(s))
1006       return stbi__tga_load(s,x,y,comp,req_comp, ri);
1007    #endif
1008 
1009    return stbi__errpuc("unknown image type", "Image not of any known type, or corrupt");
1010 }
1011 
stbi__convert_16_to_8(stbi__uint16 * orig,int w,int h,int channels)1012 static stbi_uc *stbi__convert_16_to_8(stbi__uint16 *orig, int w, int h, int channels)
1013 {
1014    int i;
1015    int img_len = w * h * channels;
1016    stbi_uc *reduced;
1017 
1018    reduced = (stbi_uc *) stbi__malloc(img_len);
1019    if (reduced == NULL) return stbi__errpuc("outofmem", "Out of memory");
1020 
1021    for (i = 0; i < img_len; ++i)
1022       reduced[i] = (stbi_uc)((orig[i] >> 8) & 0xFF); // top half of each byte is sufficient approx of 16->8 bit scaling
1023 
1024    STBI_FREE(orig);
1025    return reduced;
1026 }
1027 
stbi__convert_8_to_16(stbi_uc * orig,int w,int h,int channels)1028 static stbi__uint16 *stbi__convert_8_to_16(stbi_uc *orig, int w, int h, int channels)
1029 {
1030    int i;
1031    int img_len = w * h * channels;
1032    stbi__uint16 *enlarged;
1033 
1034    enlarged = (stbi__uint16 *) stbi__malloc(img_len*2);
1035    if (enlarged == NULL) return (stbi__uint16 *) stbi__errpuc("outofmem", "Out of memory");
1036 
1037    for (i = 0; i < img_len; ++i)
1038       enlarged[i] = (stbi__uint16)((orig[i] << 8) + orig[i]); // replicate to high and low byte, maps 0->0, 255->0xffff
1039 
1040    STBI_FREE(orig);
1041    return enlarged;
1042 }
1043 
stbi__load_and_postprocess_8bit(stbi__context * s,int * x,int * y,int * comp,int req_comp)1044 static unsigned char *stbi__load_and_postprocess_8bit(stbi__context *s, int *x, int *y, int *comp, int req_comp)
1045 {
1046    stbi__result_info ri;
1047    void *result = stbi__load_main(s, x, y, comp, req_comp, &ri, 8);
1048 
1049    if (result == NULL)
1050       return NULL;
1051 
1052    if (ri.bits_per_channel != 8) {
1053       STBI_ASSERT(ri.bits_per_channel == 16);
1054       result = stbi__convert_16_to_8((stbi__uint16 *) result, *x, *y, req_comp == 0 ? *comp : req_comp);
1055       ri.bits_per_channel = 8;
1056    }
1057 
1058    // @TODO: move stbi__convert_format to here
1059 
1060    if (stbi__vertically_flip_on_load) {
1061       int w = *x, h = *y;
1062       int channels = req_comp ? req_comp : *comp;
1063       int row,col,z;
1064       stbi_uc *image = (stbi_uc *) result;
1065 
1066       // @OPTIMIZE: use a bigger temp buffer and memcpy multiple pixels at once
1067       for (row = 0; row < (h>>1); row++) {
1068          for (col = 0; col < w; col++) {
1069             for (z = 0; z < channels; z++) {
1070                stbi_uc temp = image[(row * w + col) * channels + z];
1071                image[(row * w + col) * channels + z] = image[((h - row - 1) * w + col) * channels + z];
1072                image[((h - row - 1) * w + col) * channels + z] = temp;
1073             }
1074          }
1075       }
1076    }
1077 
1078    return (unsigned char *) result;
1079 }
1080 
stbi__load_and_postprocess_16bit(stbi__context * s,int * x,int * y,int * comp,int req_comp)1081 static stbi__uint16 *stbi__load_and_postprocess_16bit(stbi__context *s, int *x, int *y, int *comp, int req_comp)
1082 {
1083    stbi__result_info ri;
1084    void *result = stbi__load_main(s, x, y, comp, req_comp, &ri, 16);
1085 
1086    if (result == NULL)
1087       return NULL;
1088 
1089    if (ri.bits_per_channel != 16) {
1090       STBI_ASSERT(ri.bits_per_channel == 8);
1091       result = stbi__convert_8_to_16((stbi_uc *) result, *x, *y, req_comp == 0 ? *comp : req_comp);
1092       ri.bits_per_channel = 16;
1093    }
1094 
1095    // @TODO: move stbi__convert_format16 to here
1096    // @TODO: special case RGB-to-Y (and RGBA-to-YA) for 8-bit-to-16-bit case to keep more precision
1097 
1098    if (stbi__vertically_flip_on_load) {
1099       int w = *x, h = *y;
1100       int channels = req_comp ? req_comp : *comp;
1101       int row,col,z;
1102       stbi__uint16 *image = (stbi__uint16 *) result;
1103 
1104       // @OPTIMIZE: use a bigger temp buffer and memcpy multiple pixels at once
1105       for (row = 0; row < (h>>1); row++) {
1106          for (col = 0; col < w; col++) {
1107             for (z = 0; z < channels; z++) {
1108                stbi__uint16 temp = image[(row * w + col) * channels + z];
1109                image[(row * w + col) * channels + z] = image[((h - row - 1) * w + col) * channels + z];
1110                image[((h - row - 1) * w + col) * channels + z] = temp;
1111             }
1112          }
1113       }
1114    }
1115 
1116    return (stbi__uint16 *) result;
1117 }
1118 
1119 #ifndef STBI_NO_HDR
stbi__float_postprocess(float * result,int * x,int * y,int * comp,int req_comp)1120 static void stbi__float_postprocess(float *result, int *x, int *y, int *comp, int req_comp)
1121 {
1122    if (stbi__vertically_flip_on_load && result != NULL) {
1123       int w = *x, h = *y;
1124       int depth = req_comp ? req_comp : *comp;
1125       int row,col,z;
1126       float temp;
1127 
1128       // @OPTIMIZE: use a bigger temp buffer and memcpy multiple pixels at once
1129       for (row = 0; row < (h>>1); row++) {
1130          for (col = 0; col < w; col++) {
1131             for (z = 0; z < depth; z++) {
1132                temp = result[(row * w + col) * depth + z];
1133                result[(row * w + col) * depth + z] = result[((h - row - 1) * w + col) * depth + z];
1134                result[((h - row - 1) * w + col) * depth + z] = temp;
1135             }
1136          }
1137       }
1138    }
1139 }
1140 #endif
1141 
1142 #ifndef STBI_NO_STDIO
1143 
stbi__fopen(char const * filename,char const * mode)1144 static FILE *stbi__fopen(char const *filename, char const *mode)
1145 {
1146    FILE *f;
1147 #if defined(_MSC_VER) && _MSC_VER >= 1400
1148    if (0 != fopen_s(&f, filename, mode))
1149       f=0;
1150 #else
1151    f = fopen(filename, mode);
1152 #endif
1153    return f;
1154 }
1155 
1156 
stbi_load(char const * filename,int * x,int * y,int * comp,int req_comp)1157 STBIDEF stbi_uc *stbi_load(char const *filename, int *x, int *y, int *comp, int req_comp)
1158 {
1159    FILE *f = stbi__fopen(filename, "rb");
1160    unsigned char *result;
1161    if (!f) return stbi__errpuc("can't fopen", "Unable to open file");
1162    result = stbi_load_from_file(f,x,y,comp,req_comp);
1163    fclose(f);
1164    return result;
1165 }
1166 
stbi_load_from_file(FILE * f,int * x,int * y,int * comp,int req_comp)1167 STBIDEF stbi_uc *stbi_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
1168 {
1169    unsigned char *result;
1170    stbi__context s;
1171    stbi__start_file(&s,f);
1172    result = stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp);
1173    if (result) {
1174       // need to 'unget' all the characters in the IO buffer
1175       fseek(f, - (int) (s.img_buffer_end - s.img_buffer), SEEK_CUR);
1176    }
1177    return result;
1178 }
1179 
stbi_load_from_file_16(FILE * f,int * x,int * y,int * comp,int req_comp)1180 STBIDEF stbi__uint16 *stbi_load_from_file_16(FILE *f, int *x, int *y, int *comp, int req_comp)
1181 {
1182    stbi__uint16 *result;
1183    stbi__context s;
1184    stbi__start_file(&s,f);
1185    result = stbi__load_and_postprocess_16bit(&s,x,y,comp,req_comp);
1186    if (result) {
1187       // need to 'unget' all the characters in the IO buffer
1188       fseek(f, - (int) (s.img_buffer_end - s.img_buffer), SEEK_CUR);
1189    }
1190    return result;
1191 }
1192 
stbi_load_16(char const * filename,int * x,int * y,int * comp,int req_comp)1193 STBIDEF stbi_us *stbi_load_16(char const *filename, int *x, int *y, int *comp, int req_comp)
1194 {
1195    FILE *f = stbi__fopen(filename, "rb");
1196    stbi__uint16 *result;
1197    if (!f) return (stbi_us *) stbi__errpuc("can't fopen", "Unable to open file");
1198    result = stbi_load_from_file_16(f,x,y,comp,req_comp);
1199    fclose(f);
1200    return result;
1201 }
1202 
1203 
1204 #endif //!STBI_NO_STDIO
1205 
stbi_load_from_memory(stbi_uc const * buffer,int len,int * x,int * y,int * comp,int req_comp)1206 STBIDEF stbi_uc *stbi_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
1207 {
1208    stbi__context s;
1209    stbi__start_mem(&s,buffer,len);
1210    return stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp);
1211 }
1212 
stbi_load_from_callbacks(stbi_io_callbacks const * clbk,void * user,int * x,int * y,int * comp,int req_comp)1213 STBIDEF stbi_uc *stbi_load_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp)
1214 {
1215    stbi__context s;
1216    stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user);
1217    return stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp);
1218 }
1219 
1220 #ifndef STBI_NO_LINEAR
stbi__loadf_main(stbi__context * s,int * x,int * y,int * comp,int req_comp)1221 static float *stbi__loadf_main(stbi__context *s, int *x, int *y, int *comp, int req_comp)
1222 {
1223    unsigned char *data;
1224    #ifndef STBI_NO_HDR
1225    if (stbi__hdr_test(s)) {
1226       stbi__result_info ri;
1227       float *hdr_data = stbi__hdr_load(s,x,y,comp,req_comp, &ri);
1228       if (hdr_data)
1229          stbi__float_postprocess(hdr_data,x,y,comp,req_comp);
1230       return hdr_data;
1231    }
1232    #endif
1233    data = stbi__load_and_postprocess_8bit(s, x, y, comp, req_comp);
1234    if (data)
1235       return stbi__ldr_to_hdr(data, *x, *y, req_comp ? req_comp : *comp);
1236    return stbi__errpf("unknown image type", "Image not of any known type, or corrupt");
1237 }
1238 
stbi_loadf_from_memory(stbi_uc const * buffer,int len,int * x,int * y,int * comp,int req_comp)1239 STBIDEF float *stbi_loadf_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
1240 {
1241    stbi__context s;
1242    stbi__start_mem(&s,buffer,len);
1243    return stbi__loadf_main(&s,x,y,comp,req_comp);
1244 }
1245 
stbi_loadf_from_callbacks(stbi_io_callbacks const * clbk,void * user,int * x,int * y,int * comp,int req_comp)1246 STBIDEF float *stbi_loadf_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp)
1247 {
1248    stbi__context s;
1249    stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user);
1250    return stbi__loadf_main(&s,x,y,comp,req_comp);
1251 }
1252 
1253 #ifndef STBI_NO_STDIO
stbi_loadf(char const * filename,int * x,int * y,int * comp,int req_comp)1254 STBIDEF float *stbi_loadf(char const *filename, int *x, int *y, int *comp, int req_comp)
1255 {
1256    float *result;
1257    FILE *f = stbi__fopen(filename, "rb");
1258    if (!f) return stbi__errpf("can't fopen", "Unable to open file");
1259    result = stbi_loadf_from_file(f,x,y,comp,req_comp);
1260    fclose(f);
1261    return result;
1262 }
1263 
stbi_loadf_from_file(FILE * f,int * x,int * y,int * comp,int req_comp)1264 STBIDEF float *stbi_loadf_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
1265 {
1266    stbi__context s;
1267    stbi__start_file(&s,f);
1268    return stbi__loadf_main(&s,x,y,comp,req_comp);
1269 }
1270 #endif // !STBI_NO_STDIO
1271 
1272 #endif // !STBI_NO_LINEAR
1273 
1274 // these is-hdr-or-not is defined independent of whether STBI_NO_LINEAR is
1275 // defined, for API simplicity; if STBI_NO_LINEAR is defined, it always
1276 // reports false!
1277 
stbi_is_hdr_from_memory(stbi_uc const * buffer,int len)1278 STBIDEF int stbi_is_hdr_from_memory(stbi_uc const *buffer, int len)
1279 {
1280    #ifndef STBI_NO_HDR
1281    stbi__context s;
1282    stbi__start_mem(&s,buffer,len);
1283    return stbi__hdr_test(&s);
1284    #else
1285    STBI_NOTUSED(buffer);
1286    STBI_NOTUSED(len);
1287    return 0;
1288    #endif
1289 }
1290 
1291 #ifndef STBI_NO_STDIO
stbi_is_hdr(char const * filename)1292 STBIDEF int      stbi_is_hdr          (char const *filename)
1293 {
1294    FILE *f = stbi__fopen(filename, "rb");
1295    int result=0;
1296    if (f) {
1297       result = stbi_is_hdr_from_file(f);
1298       fclose(f);
1299    }
1300    return result;
1301 }
1302 
stbi_is_hdr_from_file(FILE * f)1303 STBIDEF int      stbi_is_hdr_from_file(FILE *f)
1304 {
1305    #ifndef STBI_NO_HDR
1306    stbi__context s;
1307    stbi__start_file(&s,f);
1308    return stbi__hdr_test(&s);
1309    #else
1310    STBI_NOTUSED(f);
1311    return 0;
1312    #endif
1313 }
1314 #endif // !STBI_NO_STDIO
1315 
stbi_is_hdr_from_callbacks(stbi_io_callbacks const * clbk,void * user)1316 STBIDEF int      stbi_is_hdr_from_callbacks(stbi_io_callbacks const *clbk, void *user)
1317 {
1318    #ifndef STBI_NO_HDR
1319    stbi__context s;
1320    stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user);
1321    return stbi__hdr_test(&s);
1322    #else
1323    STBI_NOTUSED(clbk);
1324    STBI_NOTUSED(user);
1325    return 0;
1326    #endif
1327 }
1328 
1329 #ifndef STBI_NO_LINEAR
1330 static float stbi__l2h_gamma=2.2f, stbi__l2h_scale=1.0f;
1331 
stbi_ldr_to_hdr_gamma(float gamma)1332 STBIDEF void   stbi_ldr_to_hdr_gamma(float gamma) { stbi__l2h_gamma = gamma; }
stbi_ldr_to_hdr_scale(float scale)1333 STBIDEF void   stbi_ldr_to_hdr_scale(float scale) { stbi__l2h_scale = scale; }
1334 #endif
1335 
1336 static float stbi__h2l_gamma_i=1.0f/2.2f, stbi__h2l_scale_i=1.0f;
1337 
stbi_hdr_to_ldr_gamma(float gamma)1338 STBIDEF void   stbi_hdr_to_ldr_gamma(float gamma) { stbi__h2l_gamma_i = 1/gamma; }
stbi_hdr_to_ldr_scale(float scale)1339 STBIDEF void   stbi_hdr_to_ldr_scale(float scale) { stbi__h2l_scale_i = 1/scale; }
1340 
1341 
1342 //////////////////////////////////////////////////////////////////////////////
1343 //
1344 // Common code used by all image loaders
1345 //
1346 
1347 enum
1348 {
1349    STBI__SCAN_load=0,
1350    STBI__SCAN_type,
1351    STBI__SCAN_header
1352 };
1353 
stbi__refill_buffer(stbi__context * s)1354 static void stbi__refill_buffer(stbi__context *s)
1355 {
1356    int n = (s->io.read)(s->io_user_data,(char*)s->buffer_start,s->buflen);
1357    if (n == 0) {
1358       // at end of file, treat same as if from memory, but need to handle case
1359       // where s->img_buffer isn't pointing to safe memory, e.g. 0-byte file
1360       s->read_from_callbacks = 0;
1361       s->img_buffer = s->buffer_start;
1362       s->img_buffer_end = s->buffer_start+1;
1363       *s->img_buffer = 0;
1364    } else {
1365       s->img_buffer = s->buffer_start;
1366       s->img_buffer_end = s->buffer_start + n;
1367    }
1368 }
1369 
stbi__get8(stbi__context * s)1370 stbi_inline static stbi_uc stbi__get8(stbi__context *s)
1371 {
1372    if (s->img_buffer < s->img_buffer_end)
1373       return *s->img_buffer++;
1374    if (s->read_from_callbacks) {
1375       stbi__refill_buffer(s);
1376       return *s->img_buffer++;
1377    }
1378    return 0;
1379 }
1380 
stbi__at_eof(stbi__context * s)1381 stbi_inline static int stbi__at_eof(stbi__context *s)
1382 {
1383    if (s->io.read) {
1384       if (!(s->io.eof)(s->io_user_data)) return 0;
1385       // if feof() is true, check if buffer = end
1386       // special case: we've only got the special 0 character at the end
1387       if (s->read_from_callbacks == 0) return 1;
1388    }
1389 
1390    return s->img_buffer >= s->img_buffer_end;
1391 }
1392 
stbi__skip(stbi__context * s,int n)1393 static void stbi__skip(stbi__context *s, int n)
1394 {
1395    if (n < 0) {
1396       s->img_buffer = s->img_buffer_end;
1397       return;
1398    }
1399    if (s->io.read) {
1400       int blen = (int) (s->img_buffer_end - s->img_buffer);
1401       if (blen < n) {
1402          s->img_buffer = s->img_buffer_end;
1403          (s->io.skip)(s->io_user_data, n - blen);
1404          return;
1405       }
1406    }
1407    s->img_buffer += n;
1408 }
1409 
stbi__getn(stbi__context * s,stbi_uc * buffer,int n)1410 static int stbi__getn(stbi__context *s, stbi_uc *buffer, int n)
1411 {
1412    if (s->io.read) {
1413       int blen = (int) (s->img_buffer_end - s->img_buffer);
1414       if (blen < n) {
1415          int res, count;
1416 
1417          memcpy(buffer, s->img_buffer, blen);
1418 
1419          count = (s->io.read)(s->io_user_data, (char*) buffer + blen, n - blen);
1420          res = (count == (n-blen));
1421          s->img_buffer = s->img_buffer_end;
1422          return res;
1423       }
1424    }
1425 
1426    if (s->img_buffer+n <= s->img_buffer_end) {
1427       memcpy(buffer, s->img_buffer, n);
1428       s->img_buffer += n;
1429       return 1;
1430    } else
1431       return 0;
1432 }
1433 
stbi__get16be(stbi__context * s)1434 static int stbi__get16be(stbi__context *s)
1435 {
1436    int z = stbi__get8(s);
1437    return (z << 8) + stbi__get8(s);
1438 }
1439 
stbi__get32be(stbi__context * s)1440 static stbi__uint32 stbi__get32be(stbi__context *s)
1441 {
1442    stbi__uint32 z = stbi__get16be(s);
1443    return (z << 16) + stbi__get16be(s);
1444 }
1445 
1446 #if defined(STBI_NO_BMP) && defined(STBI_NO_TGA) && defined(STBI_NO_GIF)
1447 // nothing
1448 #else
stbi__get16le(stbi__context * s)1449 static int stbi__get16le(stbi__context *s)
1450 {
1451    int z = stbi__get8(s);
1452    return z + (stbi__get8(s) << 8);
1453 }
1454 #endif
1455 
1456 #ifndef STBI_NO_BMP
stbi__get32le(stbi__context * s)1457 static stbi__uint32 stbi__get32le(stbi__context *s)
1458 {
1459    stbi__uint32 z = stbi__get16le(s);
1460    return z + (stbi__get16le(s) << 16);
1461 }
1462 #endif
1463 
1464 #define STBI__BYTECAST(x)  ((stbi_uc) ((x) & 255))  // truncate int to byte without warnings
1465 
1466 
1467 //////////////////////////////////////////////////////////////////////////////
1468 //
1469 //  generic converter from built-in img_n to req_comp
1470 //    individual types do this automatically as much as possible (e.g. jpeg
1471 //    does all cases internally since it needs to colorspace convert anyway,
1472 //    and it never has alpha, so very few cases ). png can automatically
1473 //    interleave an alpha=255 channel, but falls back to this for other cases
1474 //
1475 //  assume data buffer is malloced, so malloc a new one and free that one
1476 //  only failure mode is malloc failing
1477 
stbi__compute_y(int r,int g,int b)1478 static stbi_uc stbi__compute_y(int r, int g, int b)
1479 {
1480    return (stbi_uc) (((r*77) + (g*150) +  (29*b)) >> 8);
1481 }
1482 
stbi__convert_format(unsigned char * data,int img_n,int req_comp,unsigned int x,unsigned int y)1483 static unsigned char *stbi__convert_format(unsigned char *data, int img_n, int req_comp, unsigned int x, unsigned int y)
1484 {
1485    int i,j;
1486    unsigned char *good;
1487 
1488    if (req_comp == img_n) return data;
1489    STBI_ASSERT(req_comp >= 1 && req_comp <= 4);
1490 
1491    good = (unsigned char *) stbi__malloc_mad3(req_comp, x, y, 0);
1492    if (good == NULL) {
1493       STBI_FREE(data);
1494       return stbi__errpuc("outofmem", "Out of memory");
1495    }
1496 
1497    for (j=0; j < (int) y; ++j) {
1498       unsigned char *src  = data + j * x * img_n   ;
1499       unsigned char *dest = good + j * x * req_comp;
1500 
1501       #define STBI__COMBO(a,b)  ((a)*8+(b))
1502       #define STBI__CASE(a,b)   case STBI__COMBO(a,b): for(i=x-1; i >= 0; --i, src += a, dest += b)
1503       // convert source image with img_n components to one with req_comp components;
1504       // avoid switch per pixel, so use switch per scanline and massive macros
1505       switch (STBI__COMBO(img_n, req_comp)) {
1506          STBI__CASE(1,2) { dest[0]=src[0], dest[1]=255;                                     } break;
1507          STBI__CASE(1,3) { dest[0]=dest[1]=dest[2]=src[0];                                  } break;
1508          STBI__CASE(1,4) { dest[0]=dest[1]=dest[2]=src[0], dest[3]=255;                     } break;
1509          STBI__CASE(2,1) { dest[0]=src[0];                                                  } break;
1510          STBI__CASE(2,3) { dest[0]=dest[1]=dest[2]=src[0];                                  } break;
1511          STBI__CASE(2,4) { dest[0]=dest[1]=dest[2]=src[0], dest[3]=src[1];                  } break;
1512          STBI__CASE(3,4) { dest[0]=src[0],dest[1]=src[1],dest[2]=src[2],dest[3]=255;        } break;
1513          STBI__CASE(3,1) { dest[0]=stbi__compute_y(src[0],src[1],src[2]);                   } break;
1514          STBI__CASE(3,2) { dest[0]=stbi__compute_y(src[0],src[1],src[2]), dest[1] = 255;    } break;
1515          STBI__CASE(4,1) { dest[0]=stbi__compute_y(src[0],src[1],src[2]);                   } break;
1516          STBI__CASE(4,2) { dest[0]=stbi__compute_y(src[0],src[1],src[2]), dest[1] = src[3]; } break;
1517          STBI__CASE(4,3) { dest[0]=src[0],dest[1]=src[1],dest[2]=src[2];                    } break;
1518          default: STBI_ASSERT(0);
1519       }
1520       #undef STBI__CASE
1521    }
1522 
1523    STBI_FREE(data);
1524    return good;
1525 }
1526 
stbi__compute_y_16(int r,int g,int b)1527 static stbi__uint16 stbi__compute_y_16(int r, int g, int b)
1528 {
1529    return (stbi__uint16) (((r*77) + (g*150) +  (29*b)) >> 8);
1530 }
1531 
stbi__convert_format16(stbi__uint16 * data,int img_n,int req_comp,unsigned int x,unsigned int y)1532 static stbi__uint16 *stbi__convert_format16(stbi__uint16 *data, int img_n, int req_comp, unsigned int x, unsigned int y)
1533 {
1534    int i,j;
1535    stbi__uint16 *good;
1536 
1537    if (req_comp == img_n) return data;
1538    STBI_ASSERT(req_comp >= 1 && req_comp <= 4);
1539 
1540    good = (stbi__uint16 *) stbi__malloc(req_comp * x * y * 2);
1541    if (good == NULL) {
1542       STBI_FREE(data);
1543       return (stbi__uint16 *) stbi__errpuc("outofmem", "Out of memory");
1544    }
1545 
1546    for (j=0; j < (int) y; ++j) {
1547       stbi__uint16 *src  = data + j * x * img_n   ;
1548       stbi__uint16 *dest = good + j * x * req_comp;
1549 
1550       #define STBI__COMBO(a,b)  ((a)*8+(b))
1551       #define STBI__CASE(a,b)   case STBI__COMBO(a,b): for(i=x-1; i >= 0; --i, src += a, dest += b)
1552       // convert source image with img_n components to one with req_comp components;
1553       // avoid switch per pixel, so use switch per scanline and massive macros
1554       switch (STBI__COMBO(img_n, req_comp)) {
1555          STBI__CASE(1,2) { dest[0]=src[0], dest[1]=0xffff;                                     } break;
1556          STBI__CASE(1,3) { dest[0]=dest[1]=dest[2]=src[0];                                     } break;
1557          STBI__CASE(1,4) { dest[0]=dest[1]=dest[2]=src[0], dest[3]=0xffff;                     } break;
1558          STBI__CASE(2,1) { dest[0]=src[0];                                                     } break;
1559          STBI__CASE(2,3) { dest[0]=dest[1]=dest[2]=src[0];                                     } break;
1560          STBI__CASE(2,4) { dest[0]=dest[1]=dest[2]=src[0], dest[3]=src[1];                     } break;
1561          STBI__CASE(3,4) { dest[0]=src[0],dest[1]=src[1],dest[2]=src[2],dest[3]=0xffff;        } break;
1562          STBI__CASE(3,1) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]);                   } break;
1563          STBI__CASE(3,2) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]), dest[1] = 0xffff; } break;
1564          STBI__CASE(4,1) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]);                   } break;
1565          STBI__CASE(4,2) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]), dest[1] = src[3]; } break;
1566          STBI__CASE(4,3) { dest[0]=src[0],dest[1]=src[1],dest[2]=src[2];                       } break;
1567          default: STBI_ASSERT(0);
1568       }
1569       #undef STBI__CASE
1570    }
1571 
1572    STBI_FREE(data);
1573    return good;
1574 }
1575 
1576 #ifndef STBI_NO_LINEAR
stbi__ldr_to_hdr(stbi_uc * data,int x,int y,int comp)1577 static float   *stbi__ldr_to_hdr(stbi_uc *data, int x, int y, int comp)
1578 {
1579    int i,k,n;
1580    float *output;
1581    if (!data) return NULL;
1582    output = (float *) stbi__malloc_mad4(x, y, comp, sizeof(float), 0);
1583    if (output == NULL) { STBI_FREE(data); return stbi__errpf("outofmem", "Out of memory"); }
1584    // compute number of non-alpha components
1585    if (comp & 1) n = comp; else n = comp-1;
1586    for (i=0; i < x*y; ++i) {
1587       for (k=0; k < n; ++k) {
1588          output[i*comp + k] = (float) (pow(data[i*comp+k]/255.0f, stbi__l2h_gamma) * stbi__l2h_scale);
1589       }
1590       if (k < comp) output[i*comp + k] = data[i*comp+k]/255.0f;
1591    }
1592    STBI_FREE(data);
1593    return output;
1594 }
1595 #endif
1596 
1597 #ifndef STBI_NO_HDR
1598 #define stbi__float2int(x)   ((int) (x))
stbi__hdr_to_ldr(float * data,int x,int y,int comp)1599 static stbi_uc *stbi__hdr_to_ldr(float   *data, int x, int y, int comp)
1600 {
1601    int i,k,n;
1602    stbi_uc *output;
1603    if (!data) return NULL;
1604    output = (stbi_uc *) stbi__malloc_mad3(x, y, comp, 0);
1605    if (output == NULL) { STBI_FREE(data); return stbi__errpuc("outofmem", "Out of memory"); }
1606    // compute number of non-alpha components
1607    if (comp & 1) n = comp; else n = comp-1;
1608    for (i=0; i < x*y; ++i) {
1609       for (k=0; k < n; ++k) {
1610          float z = (float) pow(data[i*comp+k]*stbi__h2l_scale_i, stbi__h2l_gamma_i) * 255 + 0.5f;
1611          if (z < 0) z = 0;
1612          if (z > 255) z = 255;
1613          output[i*comp + k] = (stbi_uc) stbi__float2int(z);
1614       }
1615       if (k < comp) {
1616          float z = data[i*comp+k] * 255 + 0.5f;
1617          if (z < 0) z = 0;
1618          if (z > 255) z = 255;
1619          output[i*comp + k] = (stbi_uc) stbi__float2int(z);
1620       }
1621    }
1622    STBI_FREE(data);
1623    return output;
1624 }
1625 #endif
1626 
1627 //////////////////////////////////////////////////////////////////////////////
1628 //
1629 //  "baseline" JPEG/JFIF decoder
1630 //
1631 //    simple implementation
1632 //      - doesn't support delayed output of y-dimension
1633 //      - simple interface (only one output format: 8-bit interleaved RGB)
1634 //      - doesn't try to recover corrupt jpegs
1635 //      - doesn't allow partial loading, loading multiple at once
1636 //      - still fast on x86 (copying globals into locals doesn't help x86)
1637 //      - allocates lots of intermediate memory (full size of all components)
1638 //        - non-interleaved case requires this anyway
1639 //        - allows good upsampling (see next)
1640 //    high-quality
1641 //      - upsampled channels are bilinearly interpolated, even across blocks
1642 //      - quality integer IDCT derived from IJG's 'slow'
1643 //    performance
1644 //      - fast huffman; reasonable integer IDCT
1645 //      - some SIMD kernels for common paths on targets with SSE2/NEON
1646 //      - uses a lot of intermediate memory, could cache poorly
1647 
1648 #ifndef STBI_NO_JPEG
1649 
1650 // huffman decoding acceleration
1651 #define FAST_BITS   9  // larger handles more cases; smaller stomps less cache
1652 
1653 typedef struct
1654 {
1655    stbi_uc  fast[1 << FAST_BITS];
1656    // weirdly, repacking this into AoS is a 10% speed loss, instead of a win
1657    stbi__uint16 code[256];
1658    stbi_uc  values[256];
1659    stbi_uc  size[257];
1660    unsigned int maxcode[18];
1661    int    delta[17];   // old 'firstsymbol' - old 'firstcode'
1662 } stbi__huffman;
1663 
1664 typedef struct
1665 {
1666    stbi__context *s;
1667    stbi__huffman huff_dc[4];
1668    stbi__huffman huff_ac[4];
1669    stbi__uint16 dequant[4][64];
1670    stbi__int16 fast_ac[4][1 << FAST_BITS];
1671 
1672 // sizes for components, interleaved MCUs
1673    int img_h_max, img_v_max;
1674    int img_mcu_x, img_mcu_y;
1675    int img_mcu_w, img_mcu_h;
1676 
1677 // definition of jpeg image component
1678    struct
1679    {
1680       int id;
1681       int h,v;
1682       int tq;
1683       int hd,ha;
1684       int dc_pred;
1685 
1686       int x,y,w2,h2;
1687       stbi_uc *data;
1688       void *raw_data, *raw_coeff;
1689       stbi_uc *linebuf;
1690       short   *coeff;   // progressive only
1691       int      coeff_w, coeff_h; // number of 8x8 coefficient blocks
1692    } img_comp[4];
1693 
1694    stbi__uint32   code_buffer; // jpeg entropy-coded buffer
1695    int            code_bits;   // number of valid bits
1696    unsigned char  marker;      // marker seen while filling entropy buffer
1697    int            nomore;      // flag if we saw a marker so must stop
1698 
1699    int            progressive;
1700    int            spec_start;
1701    int            spec_end;
1702    int            succ_high;
1703    int            succ_low;
1704    int            eob_run;
1705    int            jfif;
1706    int            app14_color_transform; // Adobe APP14 tag
1707    int            rgb;
1708 
1709    int scan_n, order[4];
1710    int restart_interval, todo;
1711 
1712 // kernels
1713    void (*idct_block_kernel)(stbi_uc *out, int out_stride, short data[64]);
1714    void (*YCbCr_to_RGB_kernel)(stbi_uc *out, const stbi_uc *y, const stbi_uc *pcb, const stbi_uc *pcr, int count, int step);
1715    stbi_uc *(*resample_row_hv_2_kernel)(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs);
1716 } stbi__jpeg;
1717 
stbi__build_huffman(stbi__huffman * h,int * count)1718 static int stbi__build_huffman(stbi__huffman *h, int *count)
1719 {
1720    int i,j,k=0,code;
1721    // build size list for each symbol (from JPEG spec)
1722    for (i=0; i < 16; ++i)
1723       for (j=0; j < count[i]; ++j)
1724          h->size[k++] = (stbi_uc) (i+1);
1725    h->size[k] = 0;
1726 
1727    // compute actual symbols (from jpeg spec)
1728    code = 0;
1729    k = 0;
1730    for(j=1; j <= 16; ++j) {
1731       // compute delta to add to code to compute symbol id
1732       h->delta[j] = k - code;
1733       if (h->size[k] == j) {
1734          while (h->size[k] == j)
1735             h->code[k++] = (stbi__uint16) (code++);
1736          if (code-1 >= (1 << j)) return stbi__err("bad code lengths","Corrupt JPEG");
1737       }
1738       // compute largest code + 1 for this size, preshifted as needed later
1739       h->maxcode[j] = code << (16-j);
1740       code <<= 1;
1741    }
1742    h->maxcode[j] = 0xffffffff;
1743 
1744    // build non-spec acceleration table; 255 is flag for not-accelerated
1745    memset(h->fast, 255, 1 << FAST_BITS);
1746    for (i=0; i < k; ++i) {
1747       int s = h->size[i];
1748       if (s <= FAST_BITS) {
1749          int c = h->code[i] << (FAST_BITS-s);
1750          int m = 1 << (FAST_BITS-s);
1751          for (j=0; j < m; ++j) {
1752             h->fast[c+j] = (stbi_uc) i;
1753          }
1754       }
1755    }
1756    return 1;
1757 }
1758 
1759 // build a table that decodes both magnitude and value of small ACs in
1760 // one go.
stbi__build_fast_ac(stbi__int16 * fast_ac,stbi__huffman * h)1761 static void stbi__build_fast_ac(stbi__int16 *fast_ac, stbi__huffman *h)
1762 {
1763    int i;
1764    for (i=0; i < (1 << FAST_BITS); ++i) {
1765       stbi_uc fast = h->fast[i];
1766       fast_ac[i] = 0;
1767       if (fast < 255) {
1768          int rs = h->values[fast];
1769          int run = (rs >> 4) & 15;
1770          int magbits = rs & 15;
1771          int len = h->size[fast];
1772 
1773          if (magbits && len + magbits <= FAST_BITS) {
1774             // magnitude code followed by receive_extend code
1775             int k = ((i << len) & ((1 << FAST_BITS) - 1)) >> (FAST_BITS - magbits);
1776             int m = 1 << (magbits - 1);
1777             if (k < m) k += (~0U << magbits) + 1;
1778             // if the result is small enough, we can fit it in fast_ac table
1779             if (k >= -128 && k <= 127)
1780                fast_ac[i] = (stbi__int16) ((k << 8) + (run << 4) + (len + magbits));
1781          }
1782       }
1783    }
1784 }
1785 
stbi__grow_buffer_unsafe(stbi__jpeg * j)1786 static void stbi__grow_buffer_unsafe(stbi__jpeg *j)
1787 {
1788    do {
1789       int b = j->nomore ? 0 : stbi__get8(j->s);
1790       if (b == 0xff) {
1791          int c = stbi__get8(j->s);
1792          while (c == 0xff) c = stbi__get8(j->s); // consume fill bytes
1793          if (c != 0) {
1794             j->marker = (unsigned char) c;
1795             j->nomore = 1;
1796             return;
1797          }
1798       }
1799       j->code_buffer |= b << (24 - j->code_bits);
1800       j->code_bits += 8;
1801    } while (j->code_bits <= 24);
1802 }
1803 
1804 // (1 << n) - 1
1805 static stbi__uint32 stbi__bmask[17]={0,1,3,7,15,31,63,127,255,511,1023,2047,4095,8191,16383,32767,65535};
1806 
1807 // decode a jpeg huffman value from the bitstream
stbi__jpeg_huff_decode(stbi__jpeg * j,stbi__huffman * h)1808 stbi_inline static int stbi__jpeg_huff_decode(stbi__jpeg *j, stbi__huffman *h)
1809 {
1810    unsigned int temp;
1811    int c,k;
1812 
1813    if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
1814 
1815    // look at the top FAST_BITS and determine what symbol ID it is,
1816    // if the code is <= FAST_BITS
1817    c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1);
1818    k = h->fast[c];
1819    if (k < 255) {
1820       int s = h->size[k];
1821       if (s > j->code_bits)
1822          return -1;
1823       j->code_buffer <<= s;
1824       j->code_bits -= s;
1825       return h->values[k];
1826    }
1827 
1828    // naive test is to shift the code_buffer down so k bits are
1829    // valid, then test against maxcode. To speed this up, we've
1830    // preshifted maxcode left so that it has (16-k) 0s at the
1831    // end; in other words, regardless of the number of bits, it
1832    // wants to be compared against something shifted to have 16;
1833    // that way we don't need to shift inside the loop.
1834    temp = j->code_buffer >> 16;
1835    for (k=FAST_BITS+1 ; ; ++k)
1836       if (temp < h->maxcode[k])
1837          break;
1838    if (k == 17) {
1839       // error! code not found
1840       j->code_bits -= 16;
1841       return -1;
1842    }
1843 
1844    if (k > j->code_bits)
1845       return -1;
1846 
1847    // convert the huffman code to the symbol id
1848    c = ((j->code_buffer >> (32 - k)) & stbi__bmask[k]) + h->delta[k];
1849    STBI_ASSERT((((j->code_buffer) >> (32 - h->size[c])) & stbi__bmask[h->size[c]]) == h->code[c]);
1850 
1851    // convert the id to a symbol
1852    j->code_bits -= k;
1853    j->code_buffer <<= k;
1854    return h->values[c];
1855 }
1856 
1857 // bias[n] = (-1<<n) + 1
1858 static int const stbi__jbias[16] = {0,-1,-3,-7,-15,-31,-63,-127,-255,-511,-1023,-2047,-4095,-8191,-16383,-32767};
1859 
1860 // combined JPEG 'receive' and JPEG 'extend', since baseline
1861 // always extends everything it receives.
stbi__extend_receive(stbi__jpeg * j,int n)1862 stbi_inline static int stbi__extend_receive(stbi__jpeg *j, int n)
1863 {
1864    unsigned int k;
1865    int sgn;
1866    if (j->code_bits < n) stbi__grow_buffer_unsafe(j);
1867 
1868    sgn = (stbi__int32)j->code_buffer >> 31; // sign bit is always in MSB
1869    k = stbi_lrot(j->code_buffer, n);
1870    STBI_ASSERT(n >= 0 && n < (int) (sizeof(stbi__bmask)/sizeof(*stbi__bmask)));
1871    j->code_buffer = k & ~stbi__bmask[n];
1872    k &= stbi__bmask[n];
1873    j->code_bits -= n;
1874    return k + (stbi__jbias[n] & ~sgn);
1875 }
1876 
1877 // get some unsigned bits
stbi__jpeg_get_bits(stbi__jpeg * j,int n)1878 stbi_inline static int stbi__jpeg_get_bits(stbi__jpeg *j, int n)
1879 {
1880    unsigned int k;
1881    if (j->code_bits < n) stbi__grow_buffer_unsafe(j);
1882    k = stbi_lrot(j->code_buffer, n);
1883    j->code_buffer = k & ~stbi__bmask[n];
1884    k &= stbi__bmask[n];
1885    j->code_bits -= n;
1886    return k;
1887 }
1888 
stbi__jpeg_get_bit(stbi__jpeg * j)1889 stbi_inline static int stbi__jpeg_get_bit(stbi__jpeg *j)
1890 {
1891    unsigned int k;
1892    if (j->code_bits < 1) stbi__grow_buffer_unsafe(j);
1893    k = j->code_buffer;
1894    j->code_buffer <<= 1;
1895    --j->code_bits;
1896    return k & 0x80000000;
1897 }
1898 
1899 // given a value that's at position X in the zigzag stream,
1900 // where does it appear in the 8x8 matrix coded as row-major?
1901 static stbi_uc stbi__jpeg_dezigzag[64+15] =
1902 {
1903     0,  1,  8, 16,  9,  2,  3, 10,
1904    17, 24, 32, 25, 18, 11,  4,  5,
1905    12, 19, 26, 33, 40, 48, 41, 34,
1906    27, 20, 13,  6,  7, 14, 21, 28,
1907    35, 42, 49, 56, 57, 50, 43, 36,
1908    29, 22, 15, 23, 30, 37, 44, 51,
1909    58, 59, 52, 45, 38, 31, 39, 46,
1910    53, 60, 61, 54, 47, 55, 62, 63,
1911    // let corrupt input sample past end
1912    63, 63, 63, 63, 63, 63, 63, 63,
1913    63, 63, 63, 63, 63, 63, 63
1914 };
1915 
1916 // decode one 64-entry block--
stbi__jpeg_decode_block(stbi__jpeg * j,short data[64],stbi__huffman * hdc,stbi__huffman * hac,stbi__int16 * fac,int b,stbi__uint16 * dequant)1917 static int stbi__jpeg_decode_block(stbi__jpeg *j, short data[64], stbi__huffman *hdc, stbi__huffman *hac, stbi__int16 *fac, int b, stbi__uint16 *dequant)
1918 {
1919    int diff,dc,k;
1920    int t;
1921 
1922    if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
1923    t = stbi__jpeg_huff_decode(j, hdc);
1924    if (t < 0) return stbi__err("bad huffman code","Corrupt JPEG");
1925 
1926    // 0 all the ac values now so we can do it 32-bits at a time
1927    memset(data,0,64*sizeof(data[0]));
1928 
1929    diff = t ? stbi__extend_receive(j, t) : 0;
1930    dc = j->img_comp[b].dc_pred + diff;
1931    j->img_comp[b].dc_pred = dc;
1932    data[0] = (short) (dc * dequant[0]);
1933 
1934    // decode AC components, see JPEG spec
1935    k = 1;
1936    do {
1937       unsigned int zig;
1938       int c,r,s;
1939       if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
1940       c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1);
1941       r = fac[c];
1942       if (r) { // fast-AC path
1943          k += (r >> 4) & 15; // run
1944          s = r & 15; // combined length
1945          j->code_buffer <<= s;
1946          j->code_bits -= s;
1947          // decode into unzigzag'd location
1948          zig = stbi__jpeg_dezigzag[k++];
1949          data[zig] = (short) ((r >> 8) * dequant[zig]);
1950       } else {
1951          int rs = stbi__jpeg_huff_decode(j, hac);
1952          if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG");
1953          s = rs & 15;
1954          r = rs >> 4;
1955          if (s == 0) {
1956             if (rs != 0xf0) break; // end block
1957             k += 16;
1958          } else {
1959             k += r;
1960             // decode into unzigzag'd location
1961             zig = stbi__jpeg_dezigzag[k++];
1962             data[zig] = (short) (stbi__extend_receive(j,s) * dequant[zig]);
1963          }
1964       }
1965    } while (k < 64);
1966    return 1;
1967 }
1968 
stbi__jpeg_decode_block_prog_dc(stbi__jpeg * j,short data[64],stbi__huffman * hdc,int b)1969 static int stbi__jpeg_decode_block_prog_dc(stbi__jpeg *j, short data[64], stbi__huffman *hdc, int b)
1970 {
1971    int diff,dc;
1972    int t;
1973    if (j->spec_end != 0) return stbi__err("can't merge dc and ac", "Corrupt JPEG");
1974 
1975    if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
1976 
1977    if (j->succ_high == 0) {
1978       // first scan for DC coefficient, must be first
1979       memset(data,0,64*sizeof(data[0])); // 0 all the ac values now
1980       t = stbi__jpeg_huff_decode(j, hdc);
1981       diff = t ? stbi__extend_receive(j, t) : 0;
1982 
1983       dc = j->img_comp[b].dc_pred + diff;
1984       j->img_comp[b].dc_pred = dc;
1985       data[0] = (short) (dc << j->succ_low);
1986    } else {
1987       // refinement scan for DC coefficient
1988       if (stbi__jpeg_get_bit(j))
1989          data[0] += (short) (1 << j->succ_low);
1990    }
1991    return 1;
1992 }
1993 
1994 // @OPTIMIZE: store non-zigzagged during the decode passes,
1995 // and only de-zigzag when dequantizing
stbi__jpeg_decode_block_prog_ac(stbi__jpeg * j,short data[64],stbi__huffman * hac,stbi__int16 * fac)1996 static int stbi__jpeg_decode_block_prog_ac(stbi__jpeg *j, short data[64], stbi__huffman *hac, stbi__int16 *fac)
1997 {
1998    int k;
1999    if (j->spec_start == 0) return stbi__err("can't merge dc and ac", "Corrupt JPEG");
2000 
2001    if (j->succ_high == 0) {
2002       int shift = j->succ_low;
2003 
2004       if (j->eob_run) {
2005          --j->eob_run;
2006          return 1;
2007       }
2008 
2009       k = j->spec_start;
2010       do {
2011          unsigned int zig;
2012          int c,r,s;
2013          if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
2014          c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1);
2015          r = fac[c];
2016          if (r) { // fast-AC path
2017             k += (r >> 4) & 15; // run
2018             s = r & 15; // combined length
2019             j->code_buffer <<= s;
2020             j->code_bits -= s;
2021             zig = stbi__jpeg_dezigzag[k++];
2022             data[zig] = (short) ((r >> 8) << shift);
2023          } else {
2024             int rs = stbi__jpeg_huff_decode(j, hac);
2025             if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG");
2026             s = rs & 15;
2027             r = rs >> 4;
2028             if (s == 0) {
2029                if (r < 15) {
2030                   j->eob_run = (1 << r);
2031                   if (r)
2032                      j->eob_run += stbi__jpeg_get_bits(j, r);
2033                   --j->eob_run;
2034                   break;
2035                }
2036                k += 16;
2037             } else {
2038                k += r;
2039                zig = stbi__jpeg_dezigzag[k++];
2040                data[zig] = (short) (stbi__extend_receive(j,s) << shift);
2041             }
2042          }
2043       } while (k <= j->spec_end);
2044    } else {
2045       // refinement scan for these AC coefficients
2046 
2047       short bit = (short) (1 << j->succ_low);
2048 
2049       if (j->eob_run) {
2050          --j->eob_run;
2051          for (k = j->spec_start; k <= j->spec_end; ++k) {
2052             short *p = &data[stbi__jpeg_dezigzag[k]];
2053             if (*p != 0)
2054                if (stbi__jpeg_get_bit(j))
2055                   if ((*p & bit)==0) {
2056                      if (*p > 0)
2057                         *p += bit;
2058                      else
2059                         *p -= bit;
2060                   }
2061          }
2062       } else {
2063          k = j->spec_start;
2064          do {
2065             int r,s;
2066             int rs = stbi__jpeg_huff_decode(j, hac); // @OPTIMIZE see if we can use the fast path here, advance-by-r is so slow, eh
2067             if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG");
2068             s = rs & 15;
2069             r = rs >> 4;
2070             if (s == 0) {
2071                if (r < 15) {
2072                   j->eob_run = (1 << r) - 1;
2073                   if (r)
2074                      j->eob_run += stbi__jpeg_get_bits(j, r);
2075                   r = 64; // force end of block
2076                } else {
2077                   // r=15 s=0 should write 16 0s, so we just do
2078                   // a run of 15 0s and then write s (which is 0),
2079                   // so we don't have to do anything special here
2080                }
2081             } else {
2082                if (s != 1) return stbi__err("bad huffman code", "Corrupt JPEG");
2083                // sign bit
2084                if (stbi__jpeg_get_bit(j))
2085                   s = bit;
2086                else
2087                   s = -bit;
2088             }
2089 
2090             // advance by r
2091             while (k <= j->spec_end) {
2092                short *p = &data[stbi__jpeg_dezigzag[k++]];
2093                if (*p != 0) {
2094                   if (stbi__jpeg_get_bit(j))
2095                      if ((*p & bit)==0) {
2096                         if (*p > 0)
2097                            *p += bit;
2098                         else
2099                            *p -= bit;
2100                      }
2101                } else {
2102                   if (r == 0) {
2103                      *p = (short) s;
2104                      break;
2105                   }
2106                   --r;
2107                }
2108             }
2109          } while (k <= j->spec_end);
2110       }
2111    }
2112    return 1;
2113 }
2114 
2115 // take a -128..127 value and stbi__clamp it and convert to 0..255
stbi__clamp(int x)2116 stbi_inline static stbi_uc stbi__clamp(int x)
2117 {
2118    // trick to use a single test to catch both cases
2119    if ((unsigned int) x > 255) {
2120       if (x < 0) return 0;
2121       if (x > 255) return 255;
2122    }
2123    return (stbi_uc) x;
2124 }
2125 
2126 #define stbi__f2f(x)  ((int) (((x) * 4096 + 0.5)))
2127 #define stbi__fsh(x)  ((x) << 12)
2128 
2129 // derived from jidctint -- DCT_ISLOW
2130 #define STBI__IDCT_1D(s0,s1,s2,s3,s4,s5,s6,s7) \
2131    int t0,t1,t2,t3,p1,p2,p3,p4,p5,x0,x1,x2,x3; \
2132    p2 = s2;                                    \
2133    p3 = s6;                                    \
2134    p1 = (p2+p3) * stbi__f2f(0.5411961f);       \
2135    t2 = p1 + p3*stbi__f2f(-1.847759065f);      \
2136    t3 = p1 + p2*stbi__f2f( 0.765366865f);      \
2137    p2 = s0;                                    \
2138    p3 = s4;                                    \
2139    t0 = stbi__fsh(p2+p3);                      \
2140    t1 = stbi__fsh(p2-p3);                      \
2141    x0 = t0+t3;                                 \
2142    x3 = t0-t3;                                 \
2143    x1 = t1+t2;                                 \
2144    x2 = t1-t2;                                 \
2145    t0 = s7;                                    \
2146    t1 = s5;                                    \
2147    t2 = s3;                                    \
2148    t3 = s1;                                    \
2149    p3 = t0+t2;                                 \
2150    p4 = t1+t3;                                 \
2151    p1 = t0+t3;                                 \
2152    p2 = t1+t2;                                 \
2153    p5 = (p3+p4)*stbi__f2f( 1.175875602f);      \
2154    t0 = t0*stbi__f2f( 0.298631336f);           \
2155    t1 = t1*stbi__f2f( 2.053119869f);           \
2156    t2 = t2*stbi__f2f( 3.072711026f);           \
2157    t3 = t3*stbi__f2f( 1.501321110f);           \
2158    p1 = p5 + p1*stbi__f2f(-0.899976223f);      \
2159    p2 = p5 + p2*stbi__f2f(-2.562915447f);      \
2160    p3 = p3*stbi__f2f(-1.961570560f);           \
2161    p4 = p4*stbi__f2f(-0.390180644f);           \
2162    t3 += p1+p4;                                \
2163    t2 += p2+p3;                                \
2164    t1 += p2+p4;                                \
2165    t0 += p1+p3;
2166 
stbi__idct_block(stbi_uc * out,int out_stride,short data[64])2167 static void stbi__idct_block(stbi_uc *out, int out_stride, short data[64])
2168 {
2169    int i,val[64],*v=val;
2170    stbi_uc *o;
2171    short *d = data;
2172 
2173    // columns
2174    for (i=0; i < 8; ++i,++d, ++v) {
2175       // if all zeroes, shortcut -- this avoids dequantizing 0s and IDCTing
2176       if (d[ 8]==0 && d[16]==0 && d[24]==0 && d[32]==0
2177            && d[40]==0 && d[48]==0 && d[56]==0) {
2178          //    no shortcut                 0     seconds
2179          //    (1|2|3|4|5|6|7)==0          0     seconds
2180          //    all separate               -0.047 seconds
2181          //    1 && 2|3 && 4|5 && 6|7:    -0.047 seconds
2182          int dcterm = d[0] << 2;
2183          v[0] = v[8] = v[16] = v[24] = v[32] = v[40] = v[48] = v[56] = dcterm;
2184       } else {
2185          STBI__IDCT_1D(d[ 0],d[ 8],d[16],d[24],d[32],d[40],d[48],d[56])
2186          // constants scaled things up by 1<<12; let's bring them back
2187          // down, but keep 2 extra bits of precision
2188          x0 += 512; x1 += 512; x2 += 512; x3 += 512;
2189          v[ 0] = (x0+t3) >> 10;
2190          v[56] = (x0-t3) >> 10;
2191          v[ 8] = (x1+t2) >> 10;
2192          v[48] = (x1-t2) >> 10;
2193          v[16] = (x2+t1) >> 10;
2194          v[40] = (x2-t1) >> 10;
2195          v[24] = (x3+t0) >> 10;
2196          v[32] = (x3-t0) >> 10;
2197       }
2198    }
2199 
2200    for (i=0, v=val, o=out; i < 8; ++i,v+=8,o+=out_stride) {
2201       // no fast case since the first 1D IDCT spread components out
2202       STBI__IDCT_1D(v[0],v[1],v[2],v[3],v[4],v[5],v[6],v[7])
2203       // constants scaled things up by 1<<12, plus we had 1<<2 from first
2204       // loop, plus horizontal and vertical each scale by sqrt(8) so together
2205       // we've got an extra 1<<3, so 1<<17 total we need to remove.
2206       // so we want to round that, which means adding 0.5 * 1<<17,
2207       // aka 65536. Also, we'll end up with -128 to 127 that we want
2208       // to encode as 0..255 by adding 128, so we'll add that before the shift
2209       x0 += 65536 + (128<<17);
2210       x1 += 65536 + (128<<17);
2211       x2 += 65536 + (128<<17);
2212       x3 += 65536 + (128<<17);
2213       // tried computing the shifts into temps, or'ing the temps to see
2214       // if any were out of range, but that was slower
2215       o[0] = stbi__clamp((x0+t3) >> 17);
2216       o[7] = stbi__clamp((x0-t3) >> 17);
2217       o[1] = stbi__clamp((x1+t2) >> 17);
2218       o[6] = stbi__clamp((x1-t2) >> 17);
2219       o[2] = stbi__clamp((x2+t1) >> 17);
2220       o[5] = stbi__clamp((x2-t1) >> 17);
2221       o[3] = stbi__clamp((x3+t0) >> 17);
2222       o[4] = stbi__clamp((x3-t0) >> 17);
2223    }
2224 }
2225 
2226 #ifdef STBI_SSE2
2227 // sse2 integer IDCT. not the fastest possible implementation but it
2228 // produces bit-identical results to the generic C version so it's
2229 // fully "transparent".
stbi__idct_simd(stbi_uc * out,int out_stride,short data[64])2230 static void stbi__idct_simd(stbi_uc *out, int out_stride, short data[64])
2231 {
2232    // This is constructed to match our regular (generic) integer IDCT exactly.
2233    __m128i row0, row1, row2, row3, row4, row5, row6, row7;
2234    __m128i tmp;
2235 
2236    // dot product constant: even elems=x, odd elems=y
2237    #define dct_const(x,y)  _mm_setr_epi16((x),(y),(x),(y),(x),(y),(x),(y))
2238 
2239    // out(0) = c0[even]*x + c0[odd]*y   (c0, x, y 16-bit, out 32-bit)
2240    // out(1) = c1[even]*x + c1[odd]*y
2241    #define dct_rot(out0,out1, x,y,c0,c1) \
2242       __m128i c0##lo = _mm_unpacklo_epi16((x),(y)); \
2243       __m128i c0##hi = _mm_unpackhi_epi16((x),(y)); \
2244       __m128i out0##_l = _mm_madd_epi16(c0##lo, c0); \
2245       __m128i out0##_h = _mm_madd_epi16(c0##hi, c0); \
2246       __m128i out1##_l = _mm_madd_epi16(c0##lo, c1); \
2247       __m128i out1##_h = _mm_madd_epi16(c0##hi, c1)
2248 
2249    // out = in << 12  (in 16-bit, out 32-bit)
2250    #define dct_widen(out, in) \
2251       __m128i out##_l = _mm_srai_epi32(_mm_unpacklo_epi16(_mm_setzero_si128(), (in)), 4); \
2252       __m128i out##_h = _mm_srai_epi32(_mm_unpackhi_epi16(_mm_setzero_si128(), (in)), 4)
2253 
2254    // wide add
2255    #define dct_wadd(out, a, b) \
2256       __m128i out##_l = _mm_add_epi32(a##_l, b##_l); \
2257       __m128i out##_h = _mm_add_epi32(a##_h, b##_h)
2258 
2259    // wide sub
2260    #define dct_wsub(out, a, b) \
2261       __m128i out##_l = _mm_sub_epi32(a##_l, b##_l); \
2262       __m128i out##_h = _mm_sub_epi32(a##_h, b##_h)
2263 
2264    // butterfly a/b, add bias, then shift by "s" and pack
2265    #define dct_bfly32o(out0, out1, a,b,bias,s) \
2266       { \
2267          __m128i abiased_l = _mm_add_epi32(a##_l, bias); \
2268          __m128i abiased_h = _mm_add_epi32(a##_h, bias); \
2269          dct_wadd(sum, abiased, b); \
2270          dct_wsub(dif, abiased, b); \
2271          out0 = _mm_packs_epi32(_mm_srai_epi32(sum_l, s), _mm_srai_epi32(sum_h, s)); \
2272          out1 = _mm_packs_epi32(_mm_srai_epi32(dif_l, s), _mm_srai_epi32(dif_h, s)); \
2273       }
2274 
2275    // 8-bit interleave step (for transposes)
2276    #define dct_interleave8(a, b) \
2277       tmp = a; \
2278       a = _mm_unpacklo_epi8(a, b); \
2279       b = _mm_unpackhi_epi8(tmp, b)
2280 
2281    // 16-bit interleave step (for transposes)
2282    #define dct_interleave16(a, b) \
2283       tmp = a; \
2284       a = _mm_unpacklo_epi16(a, b); \
2285       b = _mm_unpackhi_epi16(tmp, b)
2286 
2287    #define dct_pass(bias,shift) \
2288       { \
2289          /* even part */ \
2290          dct_rot(t2e,t3e, row2,row6, rot0_0,rot0_1); \
2291          __m128i sum04 = _mm_add_epi16(row0, row4); \
2292          __m128i dif04 = _mm_sub_epi16(row0, row4); \
2293          dct_widen(t0e, sum04); \
2294          dct_widen(t1e, dif04); \
2295          dct_wadd(x0, t0e, t3e); \
2296          dct_wsub(x3, t0e, t3e); \
2297          dct_wadd(x1, t1e, t2e); \
2298          dct_wsub(x2, t1e, t2e); \
2299          /* odd part */ \
2300          dct_rot(y0o,y2o, row7,row3, rot2_0,rot2_1); \
2301          dct_rot(y1o,y3o, row5,row1, rot3_0,rot3_1); \
2302          __m128i sum17 = _mm_add_epi16(row1, row7); \
2303          __m128i sum35 = _mm_add_epi16(row3, row5); \
2304          dct_rot(y4o,y5o, sum17,sum35, rot1_0,rot1_1); \
2305          dct_wadd(x4, y0o, y4o); \
2306          dct_wadd(x5, y1o, y5o); \
2307          dct_wadd(x6, y2o, y5o); \
2308          dct_wadd(x7, y3o, y4o); \
2309          dct_bfly32o(row0,row7, x0,x7,bias,shift); \
2310          dct_bfly32o(row1,row6, x1,x6,bias,shift); \
2311          dct_bfly32o(row2,row5, x2,x5,bias,shift); \
2312          dct_bfly32o(row3,row4, x3,x4,bias,shift); \
2313       }
2314 
2315    __m128i rot0_0 = dct_const(stbi__f2f(0.5411961f), stbi__f2f(0.5411961f) + stbi__f2f(-1.847759065f));
2316    __m128i rot0_1 = dct_const(stbi__f2f(0.5411961f) + stbi__f2f( 0.765366865f), stbi__f2f(0.5411961f));
2317    __m128i rot1_0 = dct_const(stbi__f2f(1.175875602f) + stbi__f2f(-0.899976223f), stbi__f2f(1.175875602f));
2318    __m128i rot1_1 = dct_const(stbi__f2f(1.175875602f), stbi__f2f(1.175875602f) + stbi__f2f(-2.562915447f));
2319    __m128i rot2_0 = dct_const(stbi__f2f(-1.961570560f) + stbi__f2f( 0.298631336f), stbi__f2f(-1.961570560f));
2320    __m128i rot2_1 = dct_const(stbi__f2f(-1.961570560f), stbi__f2f(-1.961570560f) + stbi__f2f( 3.072711026f));
2321    __m128i rot3_0 = dct_const(stbi__f2f(-0.390180644f) + stbi__f2f( 2.053119869f), stbi__f2f(-0.390180644f));
2322    __m128i rot3_1 = dct_const(stbi__f2f(-0.390180644f), stbi__f2f(-0.390180644f) + stbi__f2f( 1.501321110f));
2323 
2324    // rounding biases in column/row passes, see stbi__idct_block for explanation.
2325    __m128i bias_0 = _mm_set1_epi32(512);
2326    __m128i bias_1 = _mm_set1_epi32(65536 + (128<<17));
2327 
2328    // load
2329    row0 = _mm_load_si128((const __m128i *) (data + 0*8));
2330    row1 = _mm_load_si128((const __m128i *) (data + 1*8));
2331    row2 = _mm_load_si128((const __m128i *) (data + 2*8));
2332    row3 = _mm_load_si128((const __m128i *) (data + 3*8));
2333    row4 = _mm_load_si128((const __m128i *) (data + 4*8));
2334    row5 = _mm_load_si128((const __m128i *) (data + 5*8));
2335    row6 = _mm_load_si128((const __m128i *) (data + 6*8));
2336    row7 = _mm_load_si128((const __m128i *) (data + 7*8));
2337 
2338    // column pass
2339    dct_pass(bias_0, 10);
2340 
2341    {
2342       // 16bit 8x8 transpose pass 1
2343       dct_interleave16(row0, row4);
2344       dct_interleave16(row1, row5);
2345       dct_interleave16(row2, row6);
2346       dct_interleave16(row3, row7);
2347 
2348       // transpose pass 2
2349       dct_interleave16(row0, row2);
2350       dct_interleave16(row1, row3);
2351       dct_interleave16(row4, row6);
2352       dct_interleave16(row5, row7);
2353 
2354       // transpose pass 3
2355       dct_interleave16(row0, row1);
2356       dct_interleave16(row2, row3);
2357       dct_interleave16(row4, row5);
2358       dct_interleave16(row6, row7);
2359    }
2360 
2361    // row pass
2362    dct_pass(bias_1, 17);
2363 
2364    {
2365       // pack
2366       __m128i p0 = _mm_packus_epi16(row0, row1); // a0a1a2a3...a7b0b1b2b3...b7
2367       __m128i p1 = _mm_packus_epi16(row2, row3);
2368       __m128i p2 = _mm_packus_epi16(row4, row5);
2369       __m128i p3 = _mm_packus_epi16(row6, row7);
2370 
2371       // 8bit 8x8 transpose pass 1
2372       dct_interleave8(p0, p2); // a0e0a1e1...
2373       dct_interleave8(p1, p3); // c0g0c1g1...
2374 
2375       // transpose pass 2
2376       dct_interleave8(p0, p1); // a0c0e0g0...
2377       dct_interleave8(p2, p3); // b0d0f0h0...
2378 
2379       // transpose pass 3
2380       dct_interleave8(p0, p2); // a0b0c0d0...
2381       dct_interleave8(p1, p3); // a4b4c4d4...
2382 
2383       // store
2384       _mm_storel_epi64((__m128i *) out, p0); out += out_stride;
2385       _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p0, 0x4e)); out += out_stride;
2386       _mm_storel_epi64((__m128i *) out, p2); out += out_stride;
2387       _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p2, 0x4e)); out += out_stride;
2388       _mm_storel_epi64((__m128i *) out, p1); out += out_stride;
2389       _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p1, 0x4e)); out += out_stride;
2390       _mm_storel_epi64((__m128i *) out, p3); out += out_stride;
2391       _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p3, 0x4e));
2392    }
2393 
2394 #undef dct_const
2395 #undef dct_rot
2396 #undef dct_widen
2397 #undef dct_wadd
2398 #undef dct_wsub
2399 #undef dct_bfly32o
2400 #undef dct_interleave8
2401 #undef dct_interleave16
2402 #undef dct_pass
2403 }
2404 
2405 #endif // STBI_SSE2
2406 
2407 #ifdef STBI_NEON
2408 
2409 // NEON integer IDCT. should produce bit-identical
2410 // results to the generic C version.
stbi__idct_simd(stbi_uc * out,int out_stride,short data[64])2411 static void stbi__idct_simd(stbi_uc *out, int out_stride, short data[64])
2412 {
2413    int16x8_t row0, row1, row2, row3, row4, row5, row6, row7;
2414 
2415    int16x4_t rot0_0 = vdup_n_s16(stbi__f2f(0.5411961f));
2416    int16x4_t rot0_1 = vdup_n_s16(stbi__f2f(-1.847759065f));
2417    int16x4_t rot0_2 = vdup_n_s16(stbi__f2f( 0.765366865f));
2418    int16x4_t rot1_0 = vdup_n_s16(stbi__f2f( 1.175875602f));
2419    int16x4_t rot1_1 = vdup_n_s16(stbi__f2f(-0.899976223f));
2420    int16x4_t rot1_2 = vdup_n_s16(stbi__f2f(-2.562915447f));
2421    int16x4_t rot2_0 = vdup_n_s16(stbi__f2f(-1.961570560f));
2422    int16x4_t rot2_1 = vdup_n_s16(stbi__f2f(-0.390180644f));
2423    int16x4_t rot3_0 = vdup_n_s16(stbi__f2f( 0.298631336f));
2424    int16x4_t rot3_1 = vdup_n_s16(stbi__f2f( 2.053119869f));
2425    int16x4_t rot3_2 = vdup_n_s16(stbi__f2f( 3.072711026f));
2426    int16x4_t rot3_3 = vdup_n_s16(stbi__f2f( 1.501321110f));
2427 
2428 #define dct_long_mul(out, inq, coeff) \
2429    int32x4_t out##_l = vmull_s16(vget_low_s16(inq), coeff); \
2430    int32x4_t out##_h = vmull_s16(vget_high_s16(inq), coeff)
2431 
2432 #define dct_long_mac(out, acc, inq, coeff) \
2433    int32x4_t out##_l = vmlal_s16(acc##_l, vget_low_s16(inq), coeff); \
2434    int32x4_t out##_h = vmlal_s16(acc##_h, vget_high_s16(inq), coeff)
2435 
2436 #define dct_widen(out, inq) \
2437    int32x4_t out##_l = vshll_n_s16(vget_low_s16(inq), 12); \
2438    int32x4_t out##_h = vshll_n_s16(vget_high_s16(inq), 12)
2439 
2440 // wide add
2441 #define dct_wadd(out, a, b) \
2442    int32x4_t out##_l = vaddq_s32(a##_l, b##_l); \
2443    int32x4_t out##_h = vaddq_s32(a##_h, b##_h)
2444 
2445 // wide sub
2446 #define dct_wsub(out, a, b) \
2447    int32x4_t out##_l = vsubq_s32(a##_l, b##_l); \
2448    int32x4_t out##_h = vsubq_s32(a##_h, b##_h)
2449 
2450 // butterfly a/b, then shift using "shiftop" by "s" and pack
2451 #define dct_bfly32o(out0,out1, a,b,shiftop,s) \
2452    { \
2453       dct_wadd(sum, a, b); \
2454       dct_wsub(dif, a, b); \
2455       out0 = vcombine_s16(shiftop(sum_l, s), shiftop(sum_h, s)); \
2456       out1 = vcombine_s16(shiftop(dif_l, s), shiftop(dif_h, s)); \
2457    }
2458 
2459 #define dct_pass(shiftop, shift) \
2460    { \
2461       /* even part */ \
2462       int16x8_t sum26 = vaddq_s16(row2, row6); \
2463       dct_long_mul(p1e, sum26, rot0_0); \
2464       dct_long_mac(t2e, p1e, row6, rot0_1); \
2465       dct_long_mac(t3e, p1e, row2, rot0_2); \
2466       int16x8_t sum04 = vaddq_s16(row0, row4); \
2467       int16x8_t dif04 = vsubq_s16(row0, row4); \
2468       dct_widen(t0e, sum04); \
2469       dct_widen(t1e, dif04); \
2470       dct_wadd(x0, t0e, t3e); \
2471       dct_wsub(x3, t0e, t3e); \
2472       dct_wadd(x1, t1e, t2e); \
2473       dct_wsub(x2, t1e, t2e); \
2474       /* odd part */ \
2475       int16x8_t sum15 = vaddq_s16(row1, row5); \
2476       int16x8_t sum17 = vaddq_s16(row1, row7); \
2477       int16x8_t sum35 = vaddq_s16(row3, row5); \
2478       int16x8_t sum37 = vaddq_s16(row3, row7); \
2479       int16x8_t sumodd = vaddq_s16(sum17, sum35); \
2480       dct_long_mul(p5o, sumodd, rot1_0); \
2481       dct_long_mac(p1o, p5o, sum17, rot1_1); \
2482       dct_long_mac(p2o, p5o, sum35, rot1_2); \
2483       dct_long_mul(p3o, sum37, rot2_0); \
2484       dct_long_mul(p4o, sum15, rot2_1); \
2485       dct_wadd(sump13o, p1o, p3o); \
2486       dct_wadd(sump24o, p2o, p4o); \
2487       dct_wadd(sump23o, p2o, p3o); \
2488       dct_wadd(sump14o, p1o, p4o); \
2489       dct_long_mac(x4, sump13o, row7, rot3_0); \
2490       dct_long_mac(x5, sump24o, row5, rot3_1); \
2491       dct_long_mac(x6, sump23o, row3, rot3_2); \
2492       dct_long_mac(x7, sump14o, row1, rot3_3); \
2493       dct_bfly32o(row0,row7, x0,x7,shiftop,shift); \
2494       dct_bfly32o(row1,row6, x1,x6,shiftop,shift); \
2495       dct_bfly32o(row2,row5, x2,x5,shiftop,shift); \
2496       dct_bfly32o(row3,row4, x3,x4,shiftop,shift); \
2497    }
2498 
2499    // load
2500    row0 = vld1q_s16(data + 0*8);
2501    row1 = vld1q_s16(data + 1*8);
2502    row2 = vld1q_s16(data + 2*8);
2503    row3 = vld1q_s16(data + 3*8);
2504    row4 = vld1q_s16(data + 4*8);
2505    row5 = vld1q_s16(data + 5*8);
2506    row6 = vld1q_s16(data + 6*8);
2507    row7 = vld1q_s16(data + 7*8);
2508 
2509    // add DC bias
2510    row0 = vaddq_s16(row0, vsetq_lane_s16(1024, vdupq_n_s16(0), 0));
2511 
2512    // column pass
2513    dct_pass(vrshrn_n_s32, 10);
2514 
2515    // 16bit 8x8 transpose
2516    {
2517 // these three map to a single VTRN.16, VTRN.32, and VSWP, respectively.
2518 // whether compilers actually get this is another story, sadly.
2519 #define dct_trn16(x, y) { int16x8x2_t t = vtrnq_s16(x, y); x = t.val[0]; y = t.val[1]; }
2520 #define dct_trn32(x, y) { int32x4x2_t t = vtrnq_s32(vreinterpretq_s32_s16(x), vreinterpretq_s32_s16(y)); x = vreinterpretq_s16_s32(t.val[0]); y = vreinterpretq_s16_s32(t.val[1]); }
2521 #define dct_trn64(x, y) { int16x8_t x0 = x; int16x8_t y0 = y; x = vcombine_s16(vget_low_s16(x0), vget_low_s16(y0)); y = vcombine_s16(vget_high_s16(x0), vget_high_s16(y0)); }
2522 
2523       // pass 1
2524       dct_trn16(row0, row1); // a0b0a2b2a4b4a6b6
2525       dct_trn16(row2, row3);
2526       dct_trn16(row4, row5);
2527       dct_trn16(row6, row7);
2528 
2529       // pass 2
2530       dct_trn32(row0, row2); // a0b0c0d0a4b4c4d4
2531       dct_trn32(row1, row3);
2532       dct_trn32(row4, row6);
2533       dct_trn32(row5, row7);
2534 
2535       // pass 3
2536       dct_trn64(row0, row4); // a0b0c0d0e0f0g0h0
2537       dct_trn64(row1, row5);
2538       dct_trn64(row2, row6);
2539       dct_trn64(row3, row7);
2540 
2541 #undef dct_trn16
2542 #undef dct_trn32
2543 #undef dct_trn64
2544    }
2545 
2546    // row pass
2547    // vrshrn_n_s32 only supports shifts up to 16, we need
2548    // 17. so do a non-rounding shift of 16 first then follow
2549    // up with a rounding shift by 1.
2550    dct_pass(vshrn_n_s32, 16);
2551 
2552    {
2553       // pack and round
2554       uint8x8_t p0 = vqrshrun_n_s16(row0, 1);
2555       uint8x8_t p1 = vqrshrun_n_s16(row1, 1);
2556       uint8x8_t p2 = vqrshrun_n_s16(row2, 1);
2557       uint8x8_t p3 = vqrshrun_n_s16(row3, 1);
2558       uint8x8_t p4 = vqrshrun_n_s16(row4, 1);
2559       uint8x8_t p5 = vqrshrun_n_s16(row5, 1);
2560       uint8x8_t p6 = vqrshrun_n_s16(row6, 1);
2561       uint8x8_t p7 = vqrshrun_n_s16(row7, 1);
2562 
2563       // again, these can translate into one instruction, but often don't.
2564 #define dct_trn8_8(x, y) { uint8x8x2_t t = vtrn_u8(x, y); x = t.val[0]; y = t.val[1]; }
2565 #define dct_trn8_16(x, y) { uint16x4x2_t t = vtrn_u16(vreinterpret_u16_u8(x), vreinterpret_u16_u8(y)); x = vreinterpret_u8_u16(t.val[0]); y = vreinterpret_u8_u16(t.val[1]); }
2566 #define dct_trn8_32(x, y) { uint32x2x2_t t = vtrn_u32(vreinterpret_u32_u8(x), vreinterpret_u32_u8(y)); x = vreinterpret_u8_u32(t.val[0]); y = vreinterpret_u8_u32(t.val[1]); }
2567 
2568       // sadly can't use interleaved stores here since we only write
2569       // 8 bytes to each scan line!
2570 
2571       // 8x8 8-bit transpose pass 1
2572       dct_trn8_8(p0, p1);
2573       dct_trn8_8(p2, p3);
2574       dct_trn8_8(p4, p5);
2575       dct_trn8_8(p6, p7);
2576 
2577       // pass 2
2578       dct_trn8_16(p0, p2);
2579       dct_trn8_16(p1, p3);
2580       dct_trn8_16(p4, p6);
2581       dct_trn8_16(p5, p7);
2582 
2583       // pass 3
2584       dct_trn8_32(p0, p4);
2585       dct_trn8_32(p1, p5);
2586       dct_trn8_32(p2, p6);
2587       dct_trn8_32(p3, p7);
2588 
2589       // store
2590       vst1_u8(out, p0); out += out_stride;
2591       vst1_u8(out, p1); out += out_stride;
2592       vst1_u8(out, p2); out += out_stride;
2593       vst1_u8(out, p3); out += out_stride;
2594       vst1_u8(out, p4); out += out_stride;
2595       vst1_u8(out, p5); out += out_stride;
2596       vst1_u8(out, p6); out += out_stride;
2597       vst1_u8(out, p7);
2598 
2599 #undef dct_trn8_8
2600 #undef dct_trn8_16
2601 #undef dct_trn8_32
2602    }
2603 
2604 #undef dct_long_mul
2605 #undef dct_long_mac
2606 #undef dct_widen
2607 #undef dct_wadd
2608 #undef dct_wsub
2609 #undef dct_bfly32o
2610 #undef dct_pass
2611 }
2612 
2613 #endif // STBI_NEON
2614 
2615 #define STBI__MARKER_none  0xff
2616 // if there's a pending marker from the entropy stream, return that
2617 // otherwise, fetch from the stream and get a marker. if there's no
2618 // marker, return 0xff, which is never a valid marker value
stbi__get_marker(stbi__jpeg * j)2619 static stbi_uc stbi__get_marker(stbi__jpeg *j)
2620 {
2621    stbi_uc x;
2622    if (j->marker != STBI__MARKER_none) { x = j->marker; j->marker = STBI__MARKER_none; return x; }
2623    x = stbi__get8(j->s);
2624    if (x != 0xff) return STBI__MARKER_none;
2625    while (x == 0xff)
2626       x = stbi__get8(j->s); // consume repeated 0xff fill bytes
2627    return x;
2628 }
2629 
2630 // in each scan, we'll have scan_n components, and the order
2631 // of the components is specified by order[]
2632 #define STBI__RESTART(x)     ((x) >= 0xd0 && (x) <= 0xd7)
2633 
2634 // after a restart interval, stbi__jpeg_reset the entropy decoder and
2635 // the dc prediction
stbi__jpeg_reset(stbi__jpeg * j)2636 static void stbi__jpeg_reset(stbi__jpeg *j)
2637 {
2638    j->code_bits = 0;
2639    j->code_buffer = 0;
2640    j->nomore = 0;
2641    j->img_comp[0].dc_pred = j->img_comp[1].dc_pred = j->img_comp[2].dc_pred = j->img_comp[3].dc_pred = 0;
2642    j->marker = STBI__MARKER_none;
2643    j->todo = j->restart_interval ? j->restart_interval : 0x7fffffff;
2644    j->eob_run = 0;
2645    // no more than 1<<31 MCUs if no restart_interal? that's plenty safe,
2646    // since we don't even allow 1<<30 pixels
2647 }
2648 
stbi__parse_entropy_coded_data(stbi__jpeg * z)2649 static int stbi__parse_entropy_coded_data(stbi__jpeg *z)
2650 {
2651    stbi__jpeg_reset(z);
2652    if (!z->progressive) {
2653       if (z->scan_n == 1) {
2654          int i,j;
2655          STBI_SIMD_ALIGN(short, data[64]);
2656          int n = z->order[0];
2657          // non-interleaved data, we just need to process one block at a time,
2658          // in trivial scanline order
2659          // number of blocks to do just depends on how many actual "pixels" this
2660          // component has, independent of interleaved MCU blocking and such
2661          int w = (z->img_comp[n].x+7) >> 3;
2662          int h = (z->img_comp[n].y+7) >> 3;
2663          for (j=0; j < h; ++j) {
2664             for (i=0; i < w; ++i) {
2665                int ha = z->img_comp[n].ha;
2666                if (!stbi__jpeg_decode_block(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+ha, z->fast_ac[ha], n, z->dequant[z->img_comp[n].tq])) return 0;
2667                z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*j*8+i*8, z->img_comp[n].w2, data);
2668                // every data block is an MCU, so countdown the restart interval
2669                if (--z->todo <= 0) {
2670                   if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
2671                   // if it's NOT a restart, then just bail, so we get corrupt data
2672                   // rather than no data
2673                   if (!STBI__RESTART(z->marker)) return 1;
2674                   stbi__jpeg_reset(z);
2675                }
2676             }
2677          }
2678          return 1;
2679       } else { // interleaved
2680          int i,j,k,x,y;
2681          STBI_SIMD_ALIGN(short, data[64]);
2682          for (j=0; j < z->img_mcu_y; ++j) {
2683             for (i=0; i < z->img_mcu_x; ++i) {
2684                // scan an interleaved mcu... process scan_n components in order
2685                for (k=0; k < z->scan_n; ++k) {
2686                   int n = z->order[k];
2687                   // scan out an mcu's worth of this component; that's just determined
2688                   // by the basic H and V specified for the component
2689                   for (y=0; y < z->img_comp[n].v; ++y) {
2690                      for (x=0; x < z->img_comp[n].h; ++x) {
2691                         int x2 = (i*z->img_comp[n].h + x)*8;
2692                         int y2 = (j*z->img_comp[n].v + y)*8;
2693                         int ha = z->img_comp[n].ha;
2694                         if (!stbi__jpeg_decode_block(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+ha, z->fast_ac[ha], n, z->dequant[z->img_comp[n].tq])) return 0;
2695                         z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*y2+x2, z->img_comp[n].w2, data);
2696                      }
2697                   }
2698                }
2699                // after all interleaved components, that's an interleaved MCU,
2700                // so now count down the restart interval
2701                if (--z->todo <= 0) {
2702                   if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
2703                   if (!STBI__RESTART(z->marker)) return 1;
2704                   stbi__jpeg_reset(z);
2705                }
2706             }
2707          }
2708          return 1;
2709       }
2710    } else {
2711       if (z->scan_n == 1) {
2712          int i,j;
2713          int n = z->order[0];
2714          // non-interleaved data, we just need to process one block at a time,
2715          // in trivial scanline order
2716          // number of blocks to do just depends on how many actual "pixels" this
2717          // component has, independent of interleaved MCU blocking and such
2718          int w = (z->img_comp[n].x+7) >> 3;
2719          int h = (z->img_comp[n].y+7) >> 3;
2720          for (j=0; j < h; ++j) {
2721             for (i=0; i < w; ++i) {
2722                short *data = z->img_comp[n].coeff + 64 * (i + j * z->img_comp[n].coeff_w);
2723                if (z->spec_start == 0) {
2724                   if (!stbi__jpeg_decode_block_prog_dc(z, data, &z->huff_dc[z->img_comp[n].hd], n))
2725                      return 0;
2726                } else {
2727                   int ha = z->img_comp[n].ha;
2728                   if (!stbi__jpeg_decode_block_prog_ac(z, data, &z->huff_ac[ha], z->fast_ac[ha]))
2729                      return 0;
2730                }
2731                // every data block is an MCU, so countdown the restart interval
2732                if (--z->todo <= 0) {
2733                   if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
2734                   if (!STBI__RESTART(z->marker)) return 1;
2735                   stbi__jpeg_reset(z);
2736                }
2737             }
2738          }
2739          return 1;
2740       } else { // interleaved
2741          int i,j,k,x,y;
2742          for (j=0; j < z->img_mcu_y; ++j) {
2743             for (i=0; i < z->img_mcu_x; ++i) {
2744                // scan an interleaved mcu... process scan_n components in order
2745                for (k=0; k < z->scan_n; ++k) {
2746                   int n = z->order[k];
2747                   // scan out an mcu's worth of this component; that's just determined
2748                   // by the basic H and V specified for the component
2749                   for (y=0; y < z->img_comp[n].v; ++y) {
2750                      for (x=0; x < z->img_comp[n].h; ++x) {
2751                         int x2 = (i*z->img_comp[n].h + x);
2752                         int y2 = (j*z->img_comp[n].v + y);
2753                         short *data = z->img_comp[n].coeff + 64 * (x2 + y2 * z->img_comp[n].coeff_w);
2754                         if (!stbi__jpeg_decode_block_prog_dc(z, data, &z->huff_dc[z->img_comp[n].hd], n))
2755                            return 0;
2756                      }
2757                   }
2758                }
2759                // after all interleaved components, that's an interleaved MCU,
2760                // so now count down the restart interval
2761                if (--z->todo <= 0) {
2762                   if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
2763                   if (!STBI__RESTART(z->marker)) return 1;
2764                   stbi__jpeg_reset(z);
2765                }
2766             }
2767          }
2768          return 1;
2769       }
2770    }
2771 }
2772 
stbi__jpeg_dequantize(short * data,stbi__uint16 * dequant)2773 static void stbi__jpeg_dequantize(short *data, stbi__uint16 *dequant)
2774 {
2775    int i;
2776    for (i=0; i < 64; ++i)
2777       data[i] *= dequant[i];
2778 }
2779 
stbi__jpeg_finish(stbi__jpeg * z)2780 static void stbi__jpeg_finish(stbi__jpeg *z)
2781 {
2782    if (z->progressive) {
2783       // dequantize and idct the data
2784       int i,j,n;
2785       for (n=0; n < z->s->img_n; ++n) {
2786          int w = (z->img_comp[n].x+7) >> 3;
2787          int h = (z->img_comp[n].y+7) >> 3;
2788          for (j=0; j < h; ++j) {
2789             for (i=0; i < w; ++i) {
2790                short *data = z->img_comp[n].coeff + 64 * (i + j * z->img_comp[n].coeff_w);
2791                stbi__jpeg_dequantize(data, z->dequant[z->img_comp[n].tq]);
2792                z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*j*8+i*8, z->img_comp[n].w2, data);
2793             }
2794          }
2795       }
2796    }
2797 }
2798 
stbi__process_marker(stbi__jpeg * z,int m)2799 static int stbi__process_marker(stbi__jpeg *z, int m)
2800 {
2801    int L;
2802    switch (m) {
2803       case STBI__MARKER_none: // no marker found
2804          return stbi__err("expected marker","Corrupt JPEG");
2805 
2806       case 0xDD: // DRI - specify restart interval
2807          if (stbi__get16be(z->s) != 4) return stbi__err("bad DRI len","Corrupt JPEG");
2808          z->restart_interval = stbi__get16be(z->s);
2809          return 1;
2810 
2811       case 0xDB: // DQT - define quantization table
2812          L = stbi__get16be(z->s)-2;
2813          while (L > 0) {
2814             int q = stbi__get8(z->s);
2815             int p = q >> 4, sixteen = (p != 0);
2816             int t = q & 15,i;
2817             if (p != 0 && p != 1) return stbi__err("bad DQT type","Corrupt JPEG");
2818             if (t > 3) return stbi__err("bad DQT table","Corrupt JPEG");
2819 
2820             for (i=0; i < 64; ++i)
2821                z->dequant[t][stbi__jpeg_dezigzag[i]] = sixteen ? stbi__get16be(z->s) : stbi__get8(z->s);
2822             L -= (sixteen ? 129 : 65);
2823          }
2824          return L==0;
2825 
2826       case 0xC4: // DHT - define huffman table
2827          L = stbi__get16be(z->s)-2;
2828          while (L > 0) {
2829             stbi_uc *v;
2830             int sizes[16],i,n=0;
2831             int q = stbi__get8(z->s);
2832             int tc = q >> 4;
2833             int th = q & 15;
2834             if (tc > 1 || th > 3) return stbi__err("bad DHT header","Corrupt JPEG");
2835             for (i=0; i < 16; ++i) {
2836                sizes[i] = stbi__get8(z->s);
2837                n += sizes[i];
2838             }
2839             L -= 17;
2840             if (tc == 0) {
2841                if (!stbi__build_huffman(z->huff_dc+th, sizes)) return 0;
2842                v = z->huff_dc[th].values;
2843             } else {
2844                if (!stbi__build_huffman(z->huff_ac+th, sizes)) return 0;
2845                v = z->huff_ac[th].values;
2846             }
2847             for (i=0; i < n; ++i)
2848                v[i] = stbi__get8(z->s);
2849             if (tc != 0)
2850                stbi__build_fast_ac(z->fast_ac[th], z->huff_ac + th);
2851             L -= n;
2852          }
2853          return L==0;
2854    }
2855 
2856    // check for comment block or APP blocks
2857    if ((m >= 0xE0 && m <= 0xEF) || m == 0xFE) {
2858       L = stbi__get16be(z->s);
2859       if (L < 2) {
2860          if (m == 0xFE)
2861             return stbi__err("bad COM len","Corrupt JPEG");
2862          else
2863             return stbi__err("bad APP len","Corrupt JPEG");
2864       }
2865       L -= 2;
2866 
2867       if (m == 0xE0 && L >= 5) { // JFIF APP0 segment
2868          static const unsigned char tag[5] = {'J','F','I','F','\0'};
2869          int ok = 1;
2870          int i;
2871          for (i=0; i < 5; ++i)
2872             if (stbi__get8(z->s) != tag[i])
2873                ok = 0;
2874          L -= 5;
2875          if (ok)
2876             z->jfif = 1;
2877       } else if (m == 0xEE && L >= 12) { // Adobe APP14 segment
2878          static const unsigned char tag[6] = {'A','d','o','b','e','\0'};
2879          int ok = 1;
2880          int i;
2881          for (i=0; i < 6; ++i)
2882             if (stbi__get8(z->s) != tag[i])
2883                ok = 0;
2884          L -= 6;
2885          if (ok) {
2886             stbi__get8(z->s); // version
2887             stbi__get16be(z->s); // flags0
2888             stbi__get16be(z->s); // flags1
2889             z->app14_color_transform = stbi__get8(z->s); // color transform
2890             L -= 6;
2891          }
2892       }
2893 
2894       stbi__skip(z->s, L);
2895       return 1;
2896    }
2897 
2898    return stbi__err("unknown marker","Corrupt JPEG");
2899 }
2900 
2901 // after we see SOS
stbi__process_scan_header(stbi__jpeg * z)2902 static int stbi__process_scan_header(stbi__jpeg *z)
2903 {
2904    int i;
2905    int Ls = stbi__get16be(z->s);
2906    z->scan_n = stbi__get8(z->s);
2907    if (z->scan_n < 1 || z->scan_n > 4 || z->scan_n > (int) z->s->img_n) return stbi__err("bad SOS component count","Corrupt JPEG");
2908    if (Ls != 6+2*z->scan_n) return stbi__err("bad SOS len","Corrupt JPEG");
2909    for (i=0; i < z->scan_n; ++i) {
2910       int id = stbi__get8(z->s), which;
2911       int q = stbi__get8(z->s);
2912       for (which = 0; which < z->s->img_n; ++which)
2913          if (z->img_comp[which].id == id)
2914             break;
2915       if (which == z->s->img_n) return 0; // no match
2916       z->img_comp[which].hd = q >> 4;   if (z->img_comp[which].hd > 3) return stbi__err("bad DC huff","Corrupt JPEG");
2917       z->img_comp[which].ha = q & 15;   if (z->img_comp[which].ha > 3) return stbi__err("bad AC huff","Corrupt JPEG");
2918       z->order[i] = which;
2919    }
2920 
2921    {
2922       int aa;
2923       z->spec_start = stbi__get8(z->s);
2924       z->spec_end   = stbi__get8(z->s); // should be 63, but might be 0
2925       aa = stbi__get8(z->s);
2926       z->succ_high = (aa >> 4);
2927       z->succ_low  = (aa & 15);
2928       if (z->progressive) {
2929          if (z->spec_start > 63 || z->spec_end > 63  || z->spec_start > z->spec_end || z->succ_high > 13 || z->succ_low > 13)
2930             return stbi__err("bad SOS", "Corrupt JPEG");
2931       } else {
2932          if (z->spec_start != 0) return stbi__err("bad SOS","Corrupt JPEG");
2933          if (z->succ_high != 0 || z->succ_low != 0) return stbi__err("bad SOS","Corrupt JPEG");
2934          z->spec_end = 63;
2935       }
2936    }
2937 
2938    return 1;
2939 }
2940 
stbi__free_jpeg_components(stbi__jpeg * z,int ncomp,int why)2941 static int stbi__free_jpeg_components(stbi__jpeg *z, int ncomp, int why)
2942 {
2943    int i;
2944    for (i=0; i < ncomp; ++i) {
2945       if (z->img_comp[i].raw_data) {
2946          STBI_FREE(z->img_comp[i].raw_data);
2947          z->img_comp[i].raw_data = NULL;
2948          z->img_comp[i].data = NULL;
2949       }
2950       if (z->img_comp[i].raw_coeff) {
2951          STBI_FREE(z->img_comp[i].raw_coeff);
2952          z->img_comp[i].raw_coeff = 0;
2953          z->img_comp[i].coeff = 0;
2954       }
2955       if (z->img_comp[i].linebuf) {
2956          STBI_FREE(z->img_comp[i].linebuf);
2957          z->img_comp[i].linebuf = NULL;
2958       }
2959    }
2960    return why;
2961 }
2962 
stbi__process_frame_header(stbi__jpeg * z,int scan)2963 static int stbi__process_frame_header(stbi__jpeg *z, int scan)
2964 {
2965    stbi__context *s = z->s;
2966    int Lf,p,i,q, h_max=1,v_max=1,c;
2967    Lf = stbi__get16be(s);         if (Lf < 11) return stbi__err("bad SOF len","Corrupt JPEG"); // JPEG
2968    p  = stbi__get8(s);            if (p != 8) return stbi__err("only 8-bit","JPEG format not supported: 8-bit only"); // JPEG baseline
2969    s->img_y = stbi__get16be(s);   if (s->img_y == 0) return stbi__err("no header height", "JPEG format not supported: delayed height"); // Legal, but we don't handle it--but neither does IJG
2970    s->img_x = stbi__get16be(s);   if (s->img_x == 0) return stbi__err("0 width","Corrupt JPEG"); // JPEG requires
2971    c = stbi__get8(s);
2972    if (c != 3 && c != 1 && c != 4) return stbi__err("bad component count","Corrupt JPEG");
2973    s->img_n = c;
2974    for (i=0; i < c; ++i) {
2975       z->img_comp[i].data = NULL;
2976       z->img_comp[i].linebuf = NULL;
2977    }
2978 
2979    if (Lf != 8+3*s->img_n) return stbi__err("bad SOF len","Corrupt JPEG");
2980 
2981    z->rgb = 0;
2982    for (i=0; i < s->img_n; ++i) {
2983       static unsigned char rgb[3] = { 'R', 'G', 'B' };
2984       z->img_comp[i].id = stbi__get8(s);
2985       if (s->img_n == 3 && z->img_comp[i].id == rgb[i])
2986          ++z->rgb;
2987       q = stbi__get8(s);
2988       z->img_comp[i].h = (q >> 4);  if (!z->img_comp[i].h || z->img_comp[i].h > 4) return stbi__err("bad H","Corrupt JPEG");
2989       z->img_comp[i].v = q & 15;    if (!z->img_comp[i].v || z->img_comp[i].v > 4) return stbi__err("bad V","Corrupt JPEG");
2990       z->img_comp[i].tq = stbi__get8(s);  if (z->img_comp[i].tq > 3) return stbi__err("bad TQ","Corrupt JPEG");
2991    }
2992 
2993    if (scan != STBI__SCAN_load) return 1;
2994 
2995    if (!stbi__mad3sizes_valid(s->img_x, s->img_y, s->img_n, 0)) return stbi__err("too large", "Image too large to decode");
2996 
2997    for (i=0; i < s->img_n; ++i) {
2998       if (z->img_comp[i].h > h_max) h_max = z->img_comp[i].h;
2999       if (z->img_comp[i].v > v_max) v_max = z->img_comp[i].v;
3000    }
3001 
3002    // compute interleaved mcu info
3003    z->img_h_max = h_max;
3004    z->img_v_max = v_max;
3005    z->img_mcu_w = h_max * 8;
3006    z->img_mcu_h = v_max * 8;
3007    // these sizes can't be more than 17 bits
3008    z->img_mcu_x = (s->img_x + z->img_mcu_w-1) / z->img_mcu_w;
3009    z->img_mcu_y = (s->img_y + z->img_mcu_h-1) / z->img_mcu_h;
3010 
3011    for (i=0; i < s->img_n; ++i) {
3012       // number of effective pixels (e.g. for non-interleaved MCU)
3013       z->img_comp[i].x = (s->img_x * z->img_comp[i].h + h_max-1) / h_max;
3014       z->img_comp[i].y = (s->img_y * z->img_comp[i].v + v_max-1) / v_max;
3015       // to simplify generation, we'll allocate enough memory to decode
3016       // the bogus oversized data from using interleaved MCUs and their
3017       // big blocks (e.g. a 16x16 iMCU on an image of width 33); we won't
3018       // discard the extra data until colorspace conversion
3019       //
3020       // img_mcu_x, img_mcu_y: <=17 bits; comp[i].h and .v are <=4 (checked earlier)
3021       // so these muls can't overflow with 32-bit ints (which we require)
3022       z->img_comp[i].w2 = z->img_mcu_x * z->img_comp[i].h * 8;
3023       z->img_comp[i].h2 = z->img_mcu_y * z->img_comp[i].v * 8;
3024       z->img_comp[i].coeff = 0;
3025       z->img_comp[i].raw_coeff = 0;
3026       z->img_comp[i].linebuf = NULL;
3027       z->img_comp[i].raw_data = stbi__malloc_mad2(z->img_comp[i].w2, z->img_comp[i].h2, 15);
3028       if (z->img_comp[i].raw_data == NULL)
3029          return stbi__free_jpeg_components(z, i+1, stbi__err("outofmem", "Out of memory"));
3030       // align blocks for idct using mmx/sse
3031       z->img_comp[i].data = (stbi_uc*) (((size_t) z->img_comp[i].raw_data + 15) & ~15);
3032       if (z->progressive) {
3033          // w2, h2 are multiples of 8 (see above)
3034          z->img_comp[i].coeff_w = z->img_comp[i].w2 / 8;
3035          z->img_comp[i].coeff_h = z->img_comp[i].h2 / 8;
3036          z->img_comp[i].raw_coeff = stbi__malloc_mad3(z->img_comp[i].w2, z->img_comp[i].h2, sizeof(short), 15);
3037          if (z->img_comp[i].raw_coeff == NULL)
3038             return stbi__free_jpeg_components(z, i+1, stbi__err("outofmem", "Out of memory"));
3039          z->img_comp[i].coeff = (short*) (((size_t) z->img_comp[i].raw_coeff + 15) & ~15);
3040       }
3041    }
3042 
3043    return 1;
3044 }
3045 
3046 // use comparisons since in some cases we handle more than one case (e.g. SOF)
3047 #define stbi__DNL(x)         ((x) == 0xdc)
3048 #define stbi__SOI(x)         ((x) == 0xd8)
3049 #define stbi__EOI(x)         ((x) == 0xd9)
3050 #define stbi__SOF(x)         ((x) == 0xc0 || (x) == 0xc1 || (x) == 0xc2)
3051 #define stbi__SOS(x)         ((x) == 0xda)
3052 
3053 #define stbi__SOF_progressive(x)   ((x) == 0xc2)
3054 
stbi__decode_jpeg_header(stbi__jpeg * z,int scan)3055 static int stbi__decode_jpeg_header(stbi__jpeg *z, int scan)
3056 {
3057    int m;
3058    z->jfif = 0;
3059    z->app14_color_transform = -1; // valid values are 0,1,2
3060    z->marker = STBI__MARKER_none; // initialize cached marker to empty
3061    m = stbi__get_marker(z);
3062    if (!stbi__SOI(m)) return stbi__err("no SOI","Corrupt JPEG");
3063    if (scan == STBI__SCAN_type) return 1;
3064    m = stbi__get_marker(z);
3065    while (!stbi__SOF(m)) {
3066       if (!stbi__process_marker(z,m)) return 0;
3067       m = stbi__get_marker(z);
3068       while (m == STBI__MARKER_none) {
3069          // some files have extra padding after their blocks, so ok, we'll scan
3070          if (stbi__at_eof(z->s)) return stbi__err("no SOF", "Corrupt JPEG");
3071          m = stbi__get_marker(z);
3072       }
3073    }
3074    z->progressive = stbi__SOF_progressive(m);
3075    if (!stbi__process_frame_header(z, scan)) return 0;
3076    return 1;
3077 }
3078 
3079 // decode image to YCbCr format
stbi__decode_jpeg_image(stbi__jpeg * j)3080 static int stbi__decode_jpeg_image(stbi__jpeg *j)
3081 {
3082    int m;
3083    for (m = 0; m < 4; m++) {
3084       j->img_comp[m].raw_data = NULL;
3085       j->img_comp[m].raw_coeff = NULL;
3086    }
3087    j->restart_interval = 0;
3088    if (!stbi__decode_jpeg_header(j, STBI__SCAN_load)) return 0;
3089    m = stbi__get_marker(j);
3090    while (!stbi__EOI(m)) {
3091       if (stbi__SOS(m)) {
3092          if (!stbi__process_scan_header(j)) return 0;
3093          if (!stbi__parse_entropy_coded_data(j)) return 0;
3094          if (j->marker == STBI__MARKER_none ) {
3095             // handle 0s at the end of image data from IP Kamera 9060
3096             while (!stbi__at_eof(j->s)) {
3097                int x = stbi__get8(j->s);
3098                if (x == 255) {
3099                   j->marker = stbi__get8(j->s);
3100                   break;
3101                }
3102             }
3103             // if we reach eof without hitting a marker, stbi__get_marker() below will fail and we'll eventually return 0
3104          }
3105       } else if (stbi__DNL(m)) {
3106          int Ld = stbi__get16be(j->s);
3107          stbi__uint32 NL = stbi__get16be(j->s);
3108          if (Ld != 4) stbi__err("bad DNL len", "Corrupt JPEG");
3109          if (NL != j->s->img_y) stbi__err("bad DNL height", "Corrupt JPEG");
3110       } else {
3111          if (!stbi__process_marker(j, m)) return 0;
3112       }
3113       m = stbi__get_marker(j);
3114    }
3115    if (j->progressive)
3116       stbi__jpeg_finish(j);
3117    return 1;
3118 }
3119 
3120 // static jfif-centered resampling (across block boundaries)
3121 
3122 typedef stbi_uc *(*resample_row_func)(stbi_uc *out, stbi_uc *in0, stbi_uc *in1,
3123                                     int w, int hs);
3124 
3125 #define stbi__div4(x) ((stbi_uc) ((x) >> 2))
3126 
resample_row_1(stbi_uc * out,stbi_uc * in_near,stbi_uc * in_far,int w,int hs)3127 static stbi_uc *resample_row_1(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
3128 {
3129    STBI_NOTUSED(out);
3130    STBI_NOTUSED(in_far);
3131    STBI_NOTUSED(w);
3132    STBI_NOTUSED(hs);
3133    return in_near;
3134 }
3135 
stbi__resample_row_v_2(stbi_uc * out,stbi_uc * in_near,stbi_uc * in_far,int w,int hs)3136 static stbi_uc* stbi__resample_row_v_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
3137 {
3138    // need to generate two samples vertically for every one in input
3139    int i;
3140    STBI_NOTUSED(hs);
3141    for (i=0; i < w; ++i)
3142       out[i] = stbi__div4(3*in_near[i] + in_far[i] + 2);
3143    return out;
3144 }
3145 
stbi__resample_row_h_2(stbi_uc * out,stbi_uc * in_near,stbi_uc * in_far,int w,int hs)3146 static stbi_uc*  stbi__resample_row_h_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
3147 {
3148    // need to generate two samples horizontally for every one in input
3149    int i;
3150    stbi_uc *input = in_near;
3151 
3152    if (w == 1) {
3153       // if only one sample, can't do any interpolation
3154       out[0] = out[1] = input[0];
3155       return out;
3156    }
3157 
3158    out[0] = input[0];
3159    out[1] = stbi__div4(input[0]*3 + input[1] + 2);
3160    for (i=1; i < w-1; ++i) {
3161       int n = 3*input[i]+2;
3162       out[i*2+0] = stbi__div4(n+input[i-1]);
3163       out[i*2+1] = stbi__div4(n+input[i+1]);
3164    }
3165    out[i*2+0] = stbi__div4(input[w-2]*3 + input[w-1] + 2);
3166    out[i*2+1] = input[w-1];
3167 
3168    STBI_NOTUSED(in_far);
3169    STBI_NOTUSED(hs);
3170 
3171    return out;
3172 }
3173 
3174 #define stbi__div16(x) ((stbi_uc) ((x) >> 4))
3175 
stbi__resample_row_hv_2(stbi_uc * out,stbi_uc * in_near,stbi_uc * in_far,int w,int hs)3176 static stbi_uc *stbi__resample_row_hv_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
3177 {
3178    // need to generate 2x2 samples for every one in input
3179    int i,t0,t1;
3180    if (w == 1) {
3181       out[0] = out[1] = stbi__div4(3*in_near[0] + in_far[0] + 2);
3182       return out;
3183    }
3184 
3185    t1 = 3*in_near[0] + in_far[0];
3186    out[0] = stbi__div4(t1+2);
3187    for (i=1; i < w; ++i) {
3188       t0 = t1;
3189       t1 = 3*in_near[i]+in_far[i];
3190       out[i*2-1] = stbi__div16(3*t0 + t1 + 8);
3191       out[i*2  ] = stbi__div16(3*t1 + t0 + 8);
3192    }
3193    out[w*2-1] = stbi__div4(t1+2);
3194 
3195    STBI_NOTUSED(hs);
3196 
3197    return out;
3198 }
3199 
3200 #if defined(STBI_SSE2) || defined(STBI_NEON)
stbi__resample_row_hv_2_simd(stbi_uc * out,stbi_uc * in_near,stbi_uc * in_far,int w,int hs)3201 static stbi_uc *stbi__resample_row_hv_2_simd(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
3202 {
3203    // need to generate 2x2 samples for every one in input
3204    int i=0,t0,t1;
3205 
3206    if (w == 1) {
3207       out[0] = out[1] = stbi__div4(3*in_near[0] + in_far[0] + 2);
3208       return out;
3209    }
3210 
3211    t1 = 3*in_near[0] + in_far[0];
3212    // process groups of 8 pixels for as long as we can.
3213    // note we can't handle the last pixel in a row in this loop
3214    // because we need to handle the filter boundary conditions.
3215    for (; i < ((w-1) & ~7); i += 8) {
3216 #if defined(STBI_SSE2)
3217       // load and perform the vertical filtering pass
3218       // this uses 3*x + y = 4*x + (y - x)
3219       __m128i zero  = _mm_setzero_si128();
3220       __m128i farb  = _mm_loadl_epi64((__m128i *) (in_far + i));
3221       __m128i nearb = _mm_loadl_epi64((__m128i *) (in_near + i));
3222       __m128i farw  = _mm_unpacklo_epi8(farb, zero);
3223       __m128i nearw = _mm_unpacklo_epi8(nearb, zero);
3224       __m128i diff  = _mm_sub_epi16(farw, nearw);
3225       __m128i nears = _mm_slli_epi16(nearw, 2);
3226       __m128i curr  = _mm_add_epi16(nears, diff); // current row
3227 
3228       // horizontal filter works the same based on shifted vers of current
3229       // row. "prev" is current row shifted right by 1 pixel; we need to
3230       // insert the previous pixel value (from t1).
3231       // "next" is current row shifted left by 1 pixel, with first pixel
3232       // of next block of 8 pixels added in.
3233       __m128i prv0 = _mm_slli_si128(curr, 2);
3234       __m128i nxt0 = _mm_srli_si128(curr, 2);
3235       __m128i prev = _mm_insert_epi16(prv0, t1, 0);
3236       __m128i next = _mm_insert_epi16(nxt0, 3*in_near[i+8] + in_far[i+8], 7);
3237 
3238       // horizontal filter, polyphase implementation since it's convenient:
3239       // even pixels = 3*cur + prev = cur*4 + (prev - cur)
3240       // odd  pixels = 3*cur + next = cur*4 + (next - cur)
3241       // note the shared term.
3242       __m128i bias  = _mm_set1_epi16(8);
3243       __m128i curs = _mm_slli_epi16(curr, 2);
3244       __m128i prvd = _mm_sub_epi16(prev, curr);
3245       __m128i nxtd = _mm_sub_epi16(next, curr);
3246       __m128i curb = _mm_add_epi16(curs, bias);
3247       __m128i even = _mm_add_epi16(prvd, curb);
3248       __m128i odd  = _mm_add_epi16(nxtd, curb);
3249 
3250       // interleave even and odd pixels, then undo scaling.
3251       __m128i int0 = _mm_unpacklo_epi16(even, odd);
3252       __m128i int1 = _mm_unpackhi_epi16(even, odd);
3253       __m128i de0  = _mm_srli_epi16(int0, 4);
3254       __m128i de1  = _mm_srli_epi16(int1, 4);
3255 
3256       // pack and write output
3257       __m128i outv = _mm_packus_epi16(de0, de1);
3258       _mm_storeu_si128((__m128i *) (out + i*2), outv);
3259 #elif defined(STBI_NEON)
3260       // load and perform the vertical filtering pass
3261       // this uses 3*x + y = 4*x + (y - x)
3262       uint8x8_t farb  = vld1_u8(in_far + i);
3263       uint8x8_t nearb = vld1_u8(in_near + i);
3264       int16x8_t diff  = vreinterpretq_s16_u16(vsubl_u8(farb, nearb));
3265       int16x8_t nears = vreinterpretq_s16_u16(vshll_n_u8(nearb, 2));
3266       int16x8_t curr  = vaddq_s16(nears, diff); // current row
3267 
3268       // horizontal filter works the same based on shifted vers of current
3269       // row. "prev" is current row shifted right by 1 pixel; we need to
3270       // insert the previous pixel value (from t1).
3271       // "next" is current row shifted left by 1 pixel, with first pixel
3272       // of next block of 8 pixels added in.
3273       int16x8_t prv0 = vextq_s16(curr, curr, 7);
3274       int16x8_t nxt0 = vextq_s16(curr, curr, 1);
3275       int16x8_t prev = vsetq_lane_s16(t1, prv0, 0);
3276       int16x8_t next = vsetq_lane_s16(3*in_near[i+8] + in_far[i+8], nxt0, 7);
3277 
3278       // horizontal filter, polyphase implementation since it's convenient:
3279       // even pixels = 3*cur + prev = cur*4 + (prev - cur)
3280       // odd  pixels = 3*cur + next = cur*4 + (next - cur)
3281       // note the shared term.
3282       int16x8_t curs = vshlq_n_s16(curr, 2);
3283       int16x8_t prvd = vsubq_s16(prev, curr);
3284       int16x8_t nxtd = vsubq_s16(next, curr);
3285       int16x8_t even = vaddq_s16(curs, prvd);
3286       int16x8_t odd  = vaddq_s16(curs, nxtd);
3287 
3288       // undo scaling and round, then store with even/odd phases interleaved
3289       uint8x8x2_t o;
3290       o.val[0] = vqrshrun_n_s16(even, 4);
3291       o.val[1] = vqrshrun_n_s16(odd,  4);
3292       vst2_u8(out + i*2, o);
3293 #endif
3294 
3295       // "previous" value for next iter
3296       t1 = 3*in_near[i+7] + in_far[i+7];
3297    }
3298 
3299    t0 = t1;
3300    t1 = 3*in_near[i] + in_far[i];
3301    out[i*2] = stbi__div16(3*t1 + t0 + 8);
3302 
3303    for (++i; i < w; ++i) {
3304       t0 = t1;
3305       t1 = 3*in_near[i]+in_far[i];
3306       out[i*2-1] = stbi__div16(3*t0 + t1 + 8);
3307       out[i*2  ] = stbi__div16(3*t1 + t0 + 8);
3308    }
3309    out[w*2-1] = stbi__div4(t1+2);
3310 
3311    STBI_NOTUSED(hs);
3312 
3313    return out;
3314 }
3315 #endif
3316 
stbi__resample_row_generic(stbi_uc * out,stbi_uc * in_near,stbi_uc * in_far,int w,int hs)3317 static stbi_uc *stbi__resample_row_generic(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
3318 {
3319    // resample with nearest-neighbor
3320    int i,j;
3321    STBI_NOTUSED(in_far);
3322    for (i=0; i < w; ++i)
3323       for (j=0; j < hs; ++j)
3324          out[i*hs+j] = in_near[i];
3325    return out;
3326 }
3327 
3328 // this is a reduced-precision calculation of YCbCr-to-RGB introduced
3329 // to make sure the code produces the same results in both SIMD and scalar
3330 #define stbi__float2fixed(x)  (((int) ((x) * 4096.0f + 0.5f)) << 8)
stbi__YCbCr_to_RGB_row(stbi_uc * out,const stbi_uc * y,const stbi_uc * pcb,const stbi_uc * pcr,int count,int step)3331 static void stbi__YCbCr_to_RGB_row(stbi_uc *out, const stbi_uc *y, const stbi_uc *pcb, const stbi_uc *pcr, int count, int step)
3332 {
3333    int i;
3334    for (i=0; i < count; ++i) {
3335       int y_fixed = (y[i] << 20) + (1<<19); // rounding
3336       int r,g,b;
3337       int cr = pcr[i] - 128;
3338       int cb = pcb[i] - 128;
3339       r = y_fixed +  cr* stbi__float2fixed(1.40200f);
3340       g = y_fixed + (cr*-stbi__float2fixed(0.71414f)) + ((cb*-stbi__float2fixed(0.34414f)) & 0xffff0000);
3341       b = y_fixed                                     +   cb* stbi__float2fixed(1.77200f);
3342       r >>= 20;
3343       g >>= 20;
3344       b >>= 20;
3345       if ((unsigned) r > 255) { if (r < 0) r = 0; else r = 255; }
3346       if ((unsigned) g > 255) { if (g < 0) g = 0; else g = 255; }
3347       if ((unsigned) b > 255) { if (b < 0) b = 0; else b = 255; }
3348       out[0] = (stbi_uc)r;
3349       out[1] = (stbi_uc)g;
3350       out[2] = (stbi_uc)b;
3351       out[3] = 255;
3352       out += step;
3353    }
3354 }
3355 
3356 #if defined(STBI_SSE2) || defined(STBI_NEON)
stbi__YCbCr_to_RGB_simd(stbi_uc * out,stbi_uc const * y,stbi_uc const * pcb,stbi_uc const * pcr,int count,int step)3357 static void stbi__YCbCr_to_RGB_simd(stbi_uc *out, stbi_uc const *y, stbi_uc const *pcb, stbi_uc const *pcr, int count, int step)
3358 {
3359    int i = 0;
3360 
3361 #ifdef STBI_SSE2
3362    // step == 3 is pretty ugly on the final interleave, and i'm not convinced
3363    // it's useful in practice (you wouldn't use it for textures, for example).
3364    // so just accelerate step == 4 case.
3365    if (step == 4) {
3366       // this is a fairly straightforward implementation and not super-optimized.
3367       __m128i signflip  = _mm_set1_epi8(-0x80);
3368       __m128i cr_const0 = _mm_set1_epi16(   (short) ( 1.40200f*4096.0f+0.5f));
3369       __m128i cr_const1 = _mm_set1_epi16( - (short) ( 0.71414f*4096.0f+0.5f));
3370       __m128i cb_const0 = _mm_set1_epi16( - (short) ( 0.34414f*4096.0f+0.5f));
3371       __m128i cb_const1 = _mm_set1_epi16(   (short) ( 1.77200f*4096.0f+0.5f));
3372       __m128i y_bias = _mm_set1_epi8((char) (unsigned char) 128);
3373       __m128i xw = _mm_set1_epi16(255); // alpha channel
3374 
3375       for (; i+7 < count; i += 8) {
3376          // load
3377          __m128i y_bytes = _mm_loadl_epi64((__m128i *) (y+i));
3378          __m128i cr_bytes = _mm_loadl_epi64((__m128i *) (pcr+i));
3379          __m128i cb_bytes = _mm_loadl_epi64((__m128i *) (pcb+i));
3380          __m128i cr_biased = _mm_xor_si128(cr_bytes, signflip); // -128
3381          __m128i cb_biased = _mm_xor_si128(cb_bytes, signflip); // -128
3382 
3383          // unpack to short (and left-shift cr, cb by 8)
3384          __m128i yw  = _mm_unpacklo_epi8(y_bias, y_bytes);
3385          __m128i crw = _mm_unpacklo_epi8(_mm_setzero_si128(), cr_biased);
3386          __m128i cbw = _mm_unpacklo_epi8(_mm_setzero_si128(), cb_biased);
3387 
3388          // color transform
3389          __m128i yws = _mm_srli_epi16(yw, 4);
3390          __m128i cr0 = _mm_mulhi_epi16(cr_const0, crw);
3391          __m128i cb0 = _mm_mulhi_epi16(cb_const0, cbw);
3392          __m128i cb1 = _mm_mulhi_epi16(cbw, cb_const1);
3393          __m128i cr1 = _mm_mulhi_epi16(crw, cr_const1);
3394          __m128i rws = _mm_add_epi16(cr0, yws);
3395          __m128i gwt = _mm_add_epi16(cb0, yws);
3396          __m128i bws = _mm_add_epi16(yws, cb1);
3397          __m128i gws = _mm_add_epi16(gwt, cr1);
3398 
3399          // descale
3400          __m128i rw = _mm_srai_epi16(rws, 4);
3401          __m128i bw = _mm_srai_epi16(bws, 4);
3402          __m128i gw = _mm_srai_epi16(gws, 4);
3403 
3404          // back to byte, set up for transpose
3405          __m128i brb = _mm_packus_epi16(rw, bw);
3406          __m128i gxb = _mm_packus_epi16(gw, xw);
3407 
3408          // transpose to interleave channels
3409          __m128i t0 = _mm_unpacklo_epi8(brb, gxb);
3410          __m128i t1 = _mm_unpackhi_epi8(brb, gxb);
3411          __m128i o0 = _mm_unpacklo_epi16(t0, t1);
3412          __m128i o1 = _mm_unpackhi_epi16(t0, t1);
3413 
3414          // store
3415          _mm_storeu_si128((__m128i *) (out + 0), o0);
3416          _mm_storeu_si128((__m128i *) (out + 16), o1);
3417          out += 32;
3418       }
3419    }
3420 #endif
3421 
3422 #ifdef STBI_NEON
3423    // in this version, step=3 support would be easy to add. but is there demand?
3424    if (step == 4) {
3425       // this is a fairly straightforward implementation and not super-optimized.
3426       uint8x8_t signflip = vdup_n_u8(0x80);
3427       int16x8_t cr_const0 = vdupq_n_s16(   (short) ( 1.40200f*4096.0f+0.5f));
3428       int16x8_t cr_const1 = vdupq_n_s16( - (short) ( 0.71414f*4096.0f+0.5f));
3429       int16x8_t cb_const0 = vdupq_n_s16( - (short) ( 0.34414f*4096.0f+0.5f));
3430       int16x8_t cb_const1 = vdupq_n_s16(   (short) ( 1.77200f*4096.0f+0.5f));
3431 
3432       for (; i+7 < count; i += 8) {
3433          // load
3434          uint8x8_t y_bytes  = vld1_u8(y + i);
3435          uint8x8_t cr_bytes = vld1_u8(pcr + i);
3436          uint8x8_t cb_bytes = vld1_u8(pcb + i);
3437          int8x8_t cr_biased = vreinterpret_s8_u8(vsub_u8(cr_bytes, signflip));
3438          int8x8_t cb_biased = vreinterpret_s8_u8(vsub_u8(cb_bytes, signflip));
3439 
3440          // expand to s16
3441          int16x8_t yws = vreinterpretq_s16_u16(vshll_n_u8(y_bytes, 4));
3442          int16x8_t crw = vshll_n_s8(cr_biased, 7);
3443          int16x8_t cbw = vshll_n_s8(cb_biased, 7);
3444 
3445          // color transform
3446          int16x8_t cr0 = vqdmulhq_s16(crw, cr_const0);
3447          int16x8_t cb0 = vqdmulhq_s16(cbw, cb_const0);
3448          int16x8_t cr1 = vqdmulhq_s16(crw, cr_const1);
3449          int16x8_t cb1 = vqdmulhq_s16(cbw, cb_const1);
3450          int16x8_t rws = vaddq_s16(yws, cr0);
3451          int16x8_t gws = vaddq_s16(vaddq_s16(yws, cb0), cr1);
3452          int16x8_t bws = vaddq_s16(yws, cb1);
3453 
3454          // undo scaling, round, convert to byte
3455          uint8x8x4_t o;
3456          o.val[0] = vqrshrun_n_s16(rws, 4);
3457          o.val[1] = vqrshrun_n_s16(gws, 4);
3458          o.val[2] = vqrshrun_n_s16(bws, 4);
3459          o.val[3] = vdup_n_u8(255);
3460 
3461          // store, interleaving r/g/b/a
3462          vst4_u8(out, o);
3463          out += 8*4;
3464       }
3465    }
3466 #endif
3467 
3468    for (; i < count; ++i) {
3469       int y_fixed = (y[i] << 20) + (1<<19); // rounding
3470       int r,g,b;
3471       int cr = pcr[i] - 128;
3472       int cb = pcb[i] - 128;
3473       r = y_fixed + cr* stbi__float2fixed(1.40200f);
3474       g = y_fixed + cr*-stbi__float2fixed(0.71414f) + ((cb*-stbi__float2fixed(0.34414f)) & 0xffff0000);
3475       b = y_fixed                                   +   cb* stbi__float2fixed(1.77200f);
3476       r >>= 20;
3477       g >>= 20;
3478       b >>= 20;
3479       if ((unsigned) r > 255) { if (r < 0) r = 0; else r = 255; }
3480       if ((unsigned) g > 255) { if (g < 0) g = 0; else g = 255; }
3481       if ((unsigned) b > 255) { if (b < 0) b = 0; else b = 255; }
3482       out[0] = (stbi_uc)r;
3483       out[1] = (stbi_uc)g;
3484       out[2] = (stbi_uc)b;
3485       out[3] = 255;
3486       out += step;
3487    }
3488 }
3489 #endif
3490 
3491 // set up the kernels
stbi__setup_jpeg(stbi__jpeg * j)3492 static void stbi__setup_jpeg(stbi__jpeg *j)
3493 {
3494    j->idct_block_kernel = stbi__idct_block;
3495    j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_row;
3496    j->resample_row_hv_2_kernel = stbi__resample_row_hv_2;
3497 
3498 #ifdef STBI_SSE2
3499    if (stbi__sse2_available()) {
3500       j->idct_block_kernel = stbi__idct_simd;
3501       j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_simd;
3502       j->resample_row_hv_2_kernel = stbi__resample_row_hv_2_simd;
3503    }
3504 #endif
3505 
3506 #ifdef STBI_NEON
3507    j->idct_block_kernel = stbi__idct_simd;
3508    j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_simd;
3509    j->resample_row_hv_2_kernel = stbi__resample_row_hv_2_simd;
3510 #endif
3511 }
3512 
3513 // clean up the temporary component buffers
stbi__cleanup_jpeg(stbi__jpeg * j)3514 static void stbi__cleanup_jpeg(stbi__jpeg *j)
3515 {
3516    stbi__free_jpeg_components(j, j->s->img_n, 0);
3517 }
3518 
3519 typedef struct
3520 {
3521    resample_row_func resample;
3522    stbi_uc *line0,*line1;
3523    int hs,vs;   // expansion factor in each axis
3524    int w_lores; // horizontal pixels pre-expansion
3525    int ystep;   // how far through vertical expansion we are
3526    int ypos;    // which pre-expansion row we're on
3527 } stbi__resample;
3528 
3529 // fast 0..255 * 0..255 => 0..255 rounded multiplication
stbi__blinn_8x8(stbi_uc x,stbi_uc y)3530 static stbi_uc stbi__blinn_8x8(stbi_uc x, stbi_uc y)
3531 {
3532    unsigned int t = x*y + 128;
3533    return (stbi_uc) ((t + (t >>8)) >> 8);
3534 }
3535 
load_jpeg_image(stbi__jpeg * z,int * out_x,int * out_y,int * comp,int req_comp)3536 static stbi_uc *load_jpeg_image(stbi__jpeg *z, int *out_x, int *out_y, int *comp, int req_comp)
3537 {
3538    int n, decode_n, is_rgb;
3539    z->s->img_n = 0; // make stbi__cleanup_jpeg safe
3540 
3541    // validate req_comp
3542    if (req_comp < 0 || req_comp > 4) return stbi__errpuc("bad req_comp", "Internal error");
3543 
3544    // load a jpeg image from whichever source, but leave in YCbCr format
3545    if (!stbi__decode_jpeg_image(z)) { stbi__cleanup_jpeg(z); return NULL; }
3546 
3547    // determine actual number of components to generate
3548    n = req_comp ? req_comp : z->s->img_n >= 3 ? 3 : 1;
3549 
3550    is_rgb = z->s->img_n == 3 && (z->rgb == 3 || (z->app14_color_transform == 0 && !z->jfif));
3551 
3552    if (z->s->img_n == 3 && n < 3 && !is_rgb)
3553       decode_n = 1;
3554    else
3555       decode_n = z->s->img_n;
3556 
3557    // resample and color-convert
3558    {
3559       int k;
3560       unsigned int i,j;
3561       stbi_uc *output;
3562       stbi_uc *coutput[4];
3563 
3564       stbi__resample res_comp[4];
3565 
3566       for (k=0; k < decode_n; ++k) {
3567          stbi__resample *r = &res_comp[k];
3568 
3569          // allocate line buffer big enough for upsampling off the edges
3570          // with upsample factor of 4
3571          z->img_comp[k].linebuf = (stbi_uc *) stbi__malloc(z->s->img_x + 3);
3572          if (!z->img_comp[k].linebuf) { stbi__cleanup_jpeg(z); return stbi__errpuc("outofmem", "Out of memory"); }
3573 
3574          r->hs      = z->img_h_max / z->img_comp[k].h;
3575          r->vs      = z->img_v_max / z->img_comp[k].v;
3576          r->ystep   = r->vs >> 1;
3577          r->w_lores = (z->s->img_x + r->hs-1) / r->hs;
3578          r->ypos    = 0;
3579          r->line0   = r->line1 = z->img_comp[k].data;
3580 
3581          if      (r->hs == 1 && r->vs == 1) r->resample = resample_row_1;
3582          else if (r->hs == 1 && r->vs == 2) r->resample = stbi__resample_row_v_2;
3583          else if (r->hs == 2 && r->vs == 1) r->resample = stbi__resample_row_h_2;
3584          else if (r->hs == 2 && r->vs == 2) r->resample = z->resample_row_hv_2_kernel;
3585          else                               r->resample = stbi__resample_row_generic;
3586       }
3587 
3588       // can't error after this so, this is safe
3589       output = (stbi_uc *) stbi__malloc_mad3(n, z->s->img_x, z->s->img_y, 1);
3590       if (!output) { stbi__cleanup_jpeg(z); return stbi__errpuc("outofmem", "Out of memory"); }
3591 
3592       // now go ahead and resample
3593       for (j=0; j < z->s->img_y; ++j) {
3594          stbi_uc *out = output + n * z->s->img_x * j;
3595          for (k=0; k < decode_n; ++k) {
3596             stbi__resample *r = &res_comp[k];
3597             int y_bot = r->ystep >= (r->vs >> 1);
3598             coutput[k] = r->resample(z->img_comp[k].linebuf,
3599                                      y_bot ? r->line1 : r->line0,
3600                                      y_bot ? r->line0 : r->line1,
3601                                      r->w_lores, r->hs);
3602             if (++r->ystep >= r->vs) {
3603                r->ystep = 0;
3604                r->line0 = r->line1;
3605                if (++r->ypos < z->img_comp[k].y)
3606                   r->line1 += z->img_comp[k].w2;
3607             }
3608          }
3609          if (n >= 3) {
3610             stbi_uc *y = coutput[0];
3611             if (z->s->img_n == 3) {
3612                if (is_rgb) {
3613                   for (i=0; i < z->s->img_x; ++i) {
3614                      out[0] = y[i];
3615                      out[1] = coutput[1][i];
3616                      out[2] = coutput[2][i];
3617                      out[3] = 255;
3618                      out += n;
3619                   }
3620                } else {
3621                   z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n);
3622                }
3623             } else if (z->s->img_n == 4) {
3624                if (z->app14_color_transform == 0) { // CMYK
3625                   for (i=0; i < z->s->img_x; ++i) {
3626                      stbi_uc k = coutput[3][i];
3627                      out[0] = stbi__blinn_8x8(coutput[0][i], k);
3628                      out[1] = stbi__blinn_8x8(coutput[1][i], k);
3629                      out[2] = stbi__blinn_8x8(coutput[2][i], k);
3630                      out[3] = 255;
3631                      out += n;
3632                   }
3633                } else if (z->app14_color_transform == 2) { // YCCK
3634                   z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n);
3635                   for (i=0; i < z->s->img_x; ++i) {
3636                      stbi_uc k = coutput[3][i];
3637                      out[0] = stbi__blinn_8x8(255 - out[0], k);
3638                      out[1] = stbi__blinn_8x8(255 - out[1], k);
3639                      out[2] = stbi__blinn_8x8(255 - out[2], k);
3640                      out += n;
3641                   }
3642                } else { // YCbCr + alpha?  Ignore the fourth channel for now
3643                   z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n);
3644                }
3645             } else
3646                for (i=0; i < z->s->img_x; ++i) {
3647                   out[0] = out[1] = out[2] = y[i];
3648                   out[3] = 255; // not used if n==3
3649                   out += n;
3650                }
3651          } else {
3652             if (is_rgb) {
3653                if (n == 1)
3654                   for (i=0; i < z->s->img_x; ++i)
3655                      *out++ = stbi__compute_y(coutput[0][i], coutput[1][i], coutput[2][i]);
3656                else {
3657                   for (i=0; i < z->s->img_x; ++i, out += 2) {
3658                      out[0] = stbi__compute_y(coutput[0][i], coutput[1][i], coutput[2][i]);
3659                      out[1] = 255;
3660                   }
3661                }
3662             } else if (z->s->img_n == 4 && z->app14_color_transform == 0) {
3663                for (i=0; i < z->s->img_x; ++i) {
3664                   stbi_uc k = coutput[3][i];
3665                   stbi_uc r = stbi__blinn_8x8(coutput[0][i], k);
3666                   stbi_uc g = stbi__blinn_8x8(coutput[1][i], k);
3667                   stbi_uc b = stbi__blinn_8x8(coutput[2][i], k);
3668                   out[0] = stbi__compute_y(r, g, b);
3669                   out[1] = 255;
3670                   out += n;
3671                }
3672             } else if (z->s->img_n == 4 && z->app14_color_transform == 2) {
3673                for (i=0; i < z->s->img_x; ++i) {
3674                   out[0] = stbi__blinn_8x8(255 - coutput[0][i], coutput[3][i]);
3675                   out[1] = 255;
3676                   out += n;
3677                }
3678             } else {
3679                stbi_uc *y = coutput[0];
3680                if (n == 1)
3681                   for (i=0; i < z->s->img_x; ++i) out[i] = y[i];
3682                else
3683                   for (i=0; i < z->s->img_x; ++i) *out++ = y[i], *out++ = 255;
3684             }
3685          }
3686       }
3687       stbi__cleanup_jpeg(z);
3688       *out_x = z->s->img_x;
3689       *out_y = z->s->img_y;
3690       if (comp) *comp = z->s->img_n >= 3 ? 3 : 1; // report original components, not output
3691       return output;
3692    }
3693 }
3694 
stbi__jpeg_load(stbi__context * s,int * x,int * y,int * comp,int req_comp,stbi__result_info * ri)3695 static void *stbi__jpeg_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
3696 {
3697    unsigned char* result;
3698    stbi__jpeg* j = (stbi__jpeg*) stbi__malloc(sizeof(stbi__jpeg));
3699    STBI_NOTUSED(ri);
3700    j->s = s;
3701    stbi__setup_jpeg(j);
3702    result = load_jpeg_image(j, x,y,comp,req_comp);
3703    STBI_FREE(j);
3704    return result;
3705 }
3706 
stbi__jpeg_test(stbi__context * s)3707 static int stbi__jpeg_test(stbi__context *s)
3708 {
3709    int r;
3710    stbi__jpeg* j = (stbi__jpeg*)stbi__malloc(sizeof(stbi__jpeg));
3711    j->s = s;
3712    stbi__setup_jpeg(j);
3713    r = stbi__decode_jpeg_header(j, STBI__SCAN_type);
3714    stbi__rewind(s);
3715    STBI_FREE(j);
3716    return r;
3717 }
3718 
stbi__jpeg_info_raw(stbi__jpeg * j,int * x,int * y,int * comp)3719 static int stbi__jpeg_info_raw(stbi__jpeg *j, int *x, int *y, int *comp)
3720 {
3721    if (!stbi__decode_jpeg_header(j, STBI__SCAN_header)) {
3722       stbi__rewind( j->s );
3723       return 0;
3724    }
3725    if (x) *x = j->s->img_x;
3726    if (y) *y = j->s->img_y;
3727    if (comp) *comp = j->s->img_n >= 3 ? 3 : 1;
3728    return 1;
3729 }
3730 
stbi__jpeg_info(stbi__context * s,int * x,int * y,int * comp)3731 static int stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp)
3732 {
3733    int result;
3734    stbi__jpeg* j = (stbi__jpeg*) (stbi__malloc(sizeof(stbi__jpeg)));
3735    j->s = s;
3736    result = stbi__jpeg_info_raw(j, x, y, comp);
3737    STBI_FREE(j);
3738    return result;
3739 }
3740 #endif
3741 
3742 // public domain zlib decode    v0.2  Sean Barrett 2006-11-18
3743 //    simple implementation
3744 //      - all input must be provided in an upfront buffer
3745 //      - all output is written to a single output buffer (can malloc/realloc)
3746 //    performance
3747 //      - fast huffman
3748 
3749 #ifndef STBI_NO_ZLIB
3750 
3751 // fast-way is faster to check than jpeg huffman, but slow way is slower
3752 #define STBI__ZFAST_BITS  9 // accelerate all cases in default tables
3753 #define STBI__ZFAST_MASK  ((1 << STBI__ZFAST_BITS) - 1)
3754 
3755 // zlib-style huffman encoding
3756 // (jpegs packs from left, zlib from right, so can't share code)
3757 typedef struct
3758 {
3759    stbi__uint16 fast[1 << STBI__ZFAST_BITS];
3760    stbi__uint16 firstcode[16];
3761    int maxcode[17];
3762    stbi__uint16 firstsymbol[16];
3763    stbi_uc  size[288];
3764    stbi__uint16 value[288];
3765 } stbi__zhuffman;
3766 
stbi__bitreverse16(int n)3767 stbi_inline static int stbi__bitreverse16(int n)
3768 {
3769   n = ((n & 0xAAAA) >>  1) | ((n & 0x5555) << 1);
3770   n = ((n & 0xCCCC) >>  2) | ((n & 0x3333) << 2);
3771   n = ((n & 0xF0F0) >>  4) | ((n & 0x0F0F) << 4);
3772   n = ((n & 0xFF00) >>  8) | ((n & 0x00FF) << 8);
3773   return n;
3774 }
3775 
stbi__bit_reverse(int v,int bits)3776 stbi_inline static int stbi__bit_reverse(int v, int bits)
3777 {
3778    STBI_ASSERT(bits <= 16);
3779    // to bit reverse n bits, reverse 16 and shift
3780    // e.g. 11 bits, bit reverse and shift away 5
3781    return stbi__bitreverse16(v) >> (16-bits);
3782 }
3783 
stbi__zbuild_huffman(stbi__zhuffman * z,const stbi_uc * sizelist,int num)3784 static int stbi__zbuild_huffman(stbi__zhuffman *z, const stbi_uc *sizelist, int num)
3785 {
3786    int i,k=0;
3787    int code, next_code[16], sizes[17];
3788 
3789    // DEFLATE spec for generating codes
3790    memset(sizes, 0, sizeof(sizes));
3791    memset(z->fast, 0, sizeof(z->fast));
3792    for (i=0; i < num; ++i)
3793       ++sizes[sizelist[i]];
3794    sizes[0] = 0;
3795    for (i=1; i < 16; ++i)
3796       if (sizes[i] > (1 << i))
3797          return stbi__err("bad sizes", "Corrupt PNG");
3798    code = 0;
3799    for (i=1; i < 16; ++i) {
3800       next_code[i] = code;
3801       z->firstcode[i] = (stbi__uint16) code;
3802       z->firstsymbol[i] = (stbi__uint16) k;
3803       code = (code + sizes[i]);
3804       if (sizes[i])
3805          if (code-1 >= (1 << i)) return stbi__err("bad codelengths","Corrupt PNG");
3806       z->maxcode[i] = code << (16-i); // preshift for inner loop
3807       code <<= 1;
3808       k += sizes[i];
3809    }
3810    z->maxcode[16] = 0x10000; // sentinel
3811    for (i=0; i < num; ++i) {
3812       int s = sizelist[i];
3813       if (s) {
3814          int c = next_code[s] - z->firstcode[s] + z->firstsymbol[s];
3815          stbi__uint16 fastv = (stbi__uint16) ((s << 9) | i);
3816          z->size [c] = (stbi_uc     ) s;
3817          z->value[c] = (stbi__uint16) i;
3818          if (s <= STBI__ZFAST_BITS) {
3819             int j = stbi__bit_reverse(next_code[s],s);
3820             while (j < (1 << STBI__ZFAST_BITS)) {
3821                z->fast[j] = fastv;
3822                j += (1 << s);
3823             }
3824          }
3825          ++next_code[s];
3826       }
3827    }
3828    return 1;
3829 }
3830 
3831 // zlib-from-memory implementation for PNG reading
3832 //    because PNG allows splitting the zlib stream arbitrarily,
3833 //    and it's annoying structurally to have PNG call ZLIB call PNG,
3834 //    we require PNG read all the IDATs and combine them into a single
3835 //    memory buffer
3836 
3837 typedef struct
3838 {
3839    stbi_uc *zbuffer, *zbuffer_end;
3840    int num_bits;
3841    stbi__uint32 code_buffer;
3842 
3843    char *zout;
3844    char *zout_start;
3845    char *zout_end;
3846    int   z_expandable;
3847 
3848    stbi__zhuffman z_length, z_distance;
3849 } stbi__zbuf;
3850 
stbi__zget8(stbi__zbuf * z)3851 stbi_inline static stbi_uc stbi__zget8(stbi__zbuf *z)
3852 {
3853    if (z->zbuffer >= z->zbuffer_end) return 0;
3854    return *z->zbuffer++;
3855 }
3856 
stbi__fill_bits(stbi__zbuf * z)3857 static void stbi__fill_bits(stbi__zbuf *z)
3858 {
3859    do {
3860       STBI_ASSERT(z->code_buffer < (1U << z->num_bits));
3861       z->code_buffer |= (unsigned int) stbi__zget8(z) << z->num_bits;
3862       z->num_bits += 8;
3863    } while (z->num_bits <= 24);
3864 }
3865 
stbi__zreceive(stbi__zbuf * z,int n)3866 stbi_inline static unsigned int stbi__zreceive(stbi__zbuf *z, int n)
3867 {
3868    unsigned int k;
3869    if (z->num_bits < n) stbi__fill_bits(z);
3870    k = z->code_buffer & ((1 << n) - 1);
3871    z->code_buffer >>= n;
3872    z->num_bits -= n;
3873    return k;
3874 }
3875 
stbi__zhuffman_decode_slowpath(stbi__zbuf * a,stbi__zhuffman * z)3876 static int stbi__zhuffman_decode_slowpath(stbi__zbuf *a, stbi__zhuffman *z)
3877 {
3878    int b,s,k;
3879    // not resolved by fast table, so compute it the slow way
3880    // use jpeg approach, which requires MSbits at top
3881    k = stbi__bit_reverse(a->code_buffer, 16);
3882    for (s=STBI__ZFAST_BITS+1; ; ++s)
3883       if (k < z->maxcode[s])
3884          break;
3885    if (s == 16) return -1; // invalid code!
3886    // code size is s, so:
3887    b = (k >> (16-s)) - z->firstcode[s] + z->firstsymbol[s];
3888    STBI_ASSERT(z->size[b] == s);
3889    a->code_buffer >>= s;
3890    a->num_bits -= s;
3891    return z->value[b];
3892 }
3893 
stbi__zhuffman_decode(stbi__zbuf * a,stbi__zhuffman * z)3894 stbi_inline static int stbi__zhuffman_decode(stbi__zbuf *a, stbi__zhuffman *z)
3895 {
3896    int b,s;
3897    if (a->num_bits < 16) stbi__fill_bits(a);
3898    b = z->fast[a->code_buffer & STBI__ZFAST_MASK];
3899    if (b) {
3900       s = b >> 9;
3901       a->code_buffer >>= s;
3902       a->num_bits -= s;
3903       return b & 511;
3904    }
3905    return stbi__zhuffman_decode_slowpath(a, z);
3906 }
3907 
stbi__zexpand(stbi__zbuf * z,char * zout,int n)3908 static int stbi__zexpand(stbi__zbuf *z, char *zout, int n)  // need to make room for n bytes
3909 {
3910    char *q;
3911    int cur, limit, old_limit;
3912    z->zout = zout;
3913    if (!z->z_expandable) return stbi__err("output buffer limit","Corrupt PNG");
3914    cur   = (int) (z->zout     - z->zout_start);
3915    limit = old_limit = (int) (z->zout_end - z->zout_start);
3916    while (cur + n > limit)
3917       limit *= 2;
3918    q = (char *) STBI_REALLOC_SIZED(z->zout_start, old_limit, limit);
3919    STBI_NOTUSED(old_limit);
3920    if (q == NULL) return stbi__err("outofmem", "Out of memory");
3921    z->zout_start = q;
3922    z->zout       = q + cur;
3923    z->zout_end   = q + limit;
3924    return 1;
3925 }
3926 
3927 static int stbi__zlength_base[31] = {
3928    3,4,5,6,7,8,9,10,11,13,
3929    15,17,19,23,27,31,35,43,51,59,
3930    67,83,99,115,131,163,195,227,258,0,0 };
3931 
3932 static int stbi__zlength_extra[31]=
3933 { 0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0,0,0 };
3934 
3935 static int stbi__zdist_base[32] = { 1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193,
3936 257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577,0,0};
3937 
3938 static int stbi__zdist_extra[32] =
3939 { 0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13};
3940 
stbi__parse_huffman_block(stbi__zbuf * a)3941 static int stbi__parse_huffman_block(stbi__zbuf *a)
3942 {
3943    char *zout = a->zout;
3944    for(;;) {
3945       int z = stbi__zhuffman_decode(a, &a->z_length);
3946       if (z < 256) {
3947          if (z < 0) return stbi__err("bad huffman code","Corrupt PNG"); // error in huffman codes
3948          if (zout >= a->zout_end) {
3949             if (!stbi__zexpand(a, zout, 1)) return 0;
3950             zout = a->zout;
3951          }
3952          *zout++ = (char) z;
3953       } else {
3954          stbi_uc *p;
3955          int len,dist;
3956          if (z == 256) {
3957             a->zout = zout;
3958             return 1;
3959          }
3960          z -= 257;
3961          len = stbi__zlength_base[z];
3962          if (stbi__zlength_extra[z]) len += stbi__zreceive(a, stbi__zlength_extra[z]);
3963          z = stbi__zhuffman_decode(a, &a->z_distance);
3964          if (z < 0) return stbi__err("bad huffman code","Corrupt PNG");
3965          dist = stbi__zdist_base[z];
3966          if (stbi__zdist_extra[z]) dist += stbi__zreceive(a, stbi__zdist_extra[z]);
3967          if (zout - a->zout_start < dist) return stbi__err("bad dist","Corrupt PNG");
3968          if (zout + len > a->zout_end) {
3969             if (!stbi__zexpand(a, zout, len)) return 0;
3970             zout = a->zout;
3971          }
3972          p = (stbi_uc *) (zout - dist);
3973          if (dist == 1) { // run of one byte; common in images.
3974             stbi_uc v = *p;
3975             if (len) { do *zout++ = v; while (--len); }
3976          } else {
3977             if (len) { do *zout++ = *p++; while (--len); }
3978          }
3979       }
3980    }
3981 }
3982 
stbi__compute_huffman_codes(stbi__zbuf * a)3983 static int stbi__compute_huffman_codes(stbi__zbuf *a)
3984 {
3985    static stbi_uc length_dezigzag[19] = { 16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15 };
3986    stbi__zhuffman z_codelength;
3987    stbi_uc lencodes[286+32+137];//padding for maximum single op
3988    stbi_uc codelength_sizes[19];
3989    int i,n;
3990 
3991    int hlit  = stbi__zreceive(a,5) + 257;
3992    int hdist = stbi__zreceive(a,5) + 1;
3993    int hclen = stbi__zreceive(a,4) + 4;
3994    int ntot  = hlit + hdist;
3995 
3996    memset(codelength_sizes, 0, sizeof(codelength_sizes));
3997    for (i=0; i < hclen; ++i) {
3998       int s = stbi__zreceive(a,3);
3999       codelength_sizes[length_dezigzag[i]] = (stbi_uc) s;
4000    }
4001    if (!stbi__zbuild_huffman(&z_codelength, codelength_sizes, 19)) return 0;
4002 
4003    n = 0;
4004    while (n < ntot) {
4005       int c = stbi__zhuffman_decode(a, &z_codelength);
4006       if (c < 0 || c >= 19) return stbi__err("bad codelengths", "Corrupt PNG");
4007       if (c < 16)
4008          lencodes[n++] = (stbi_uc) c;
4009       else {
4010          stbi_uc fill = 0;
4011          if (c == 16) {
4012             c = stbi__zreceive(a,2)+3;
4013             if (n == 0) return stbi__err("bad codelengths", "Corrupt PNG");
4014             fill = lencodes[n-1];
4015          } else if (c == 17)
4016             c = stbi__zreceive(a,3)+3;
4017          else {
4018             STBI_ASSERT(c == 18);
4019             c = stbi__zreceive(a,7)+11;
4020          }
4021          if (ntot - n < c) return stbi__err("bad codelengths", "Corrupt PNG");
4022          memset(lencodes+n, fill, c);
4023          n += c;
4024       }
4025    }
4026    if (n != ntot) return stbi__err("bad codelengths","Corrupt PNG");
4027    if (!stbi__zbuild_huffman(&a->z_length, lencodes, hlit)) return 0;
4028    if (!stbi__zbuild_huffman(&a->z_distance, lencodes+hlit, hdist)) return 0;
4029    return 1;
4030 }
4031 
stbi__parse_uncompressed_block(stbi__zbuf * a)4032 static int stbi__parse_uncompressed_block(stbi__zbuf *a)
4033 {
4034    stbi_uc header[4];
4035    int len,nlen,k;
4036    if (a->num_bits & 7)
4037       stbi__zreceive(a, a->num_bits & 7); // discard
4038    // drain the bit-packed data into header
4039    k = 0;
4040    while (a->num_bits > 0) {
4041       header[k++] = (stbi_uc) (a->code_buffer & 255); // suppress MSVC run-time check
4042       a->code_buffer >>= 8;
4043       a->num_bits -= 8;
4044    }
4045    STBI_ASSERT(a->num_bits == 0);
4046    // now fill header the normal way
4047    while (k < 4)
4048       header[k++] = stbi__zget8(a);
4049    len  = header[1] * 256 + header[0];
4050    nlen = header[3] * 256 + header[2];
4051    if (nlen != (len ^ 0xffff)) return stbi__err("zlib corrupt","Corrupt PNG");
4052    if (a->zbuffer + len > a->zbuffer_end) return stbi__err("read past buffer","Corrupt PNG");
4053    if (a->zout + len > a->zout_end)
4054       if (!stbi__zexpand(a, a->zout, len)) return 0;
4055    memcpy(a->zout, a->zbuffer, len);
4056    a->zbuffer += len;
4057    a->zout += len;
4058    return 1;
4059 }
4060 
stbi__parse_zlib_header(stbi__zbuf * a)4061 static int stbi__parse_zlib_header(stbi__zbuf *a)
4062 {
4063    int cmf   = stbi__zget8(a);
4064    int cm    = cmf & 15;
4065    /* int cinfo = cmf >> 4; */
4066    int flg   = stbi__zget8(a);
4067    if ((cmf*256+flg) % 31 != 0) return stbi__err("bad zlib header","Corrupt PNG"); // zlib spec
4068    if (flg & 32) return stbi__err("no preset dict","Corrupt PNG"); // preset dictionary not allowed in png
4069    if (cm != 8) return stbi__err("bad compression","Corrupt PNG"); // DEFLATE required for png
4070    // window = 1 << (8 + cinfo)... but who cares, we fully buffer output
4071    return 1;
4072 }
4073 
4074 static const stbi_uc stbi__zdefault_length[288] =
4075 {
4076    8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
4077    8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
4078    8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
4079    8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
4080    8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
4081    9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
4082    9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
4083    9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
4084    7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,8,8,8,8,8,8,8,8
4085 };
4086 static const stbi_uc stbi__zdefault_distance[32] =
4087 {
4088    5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5
4089 };
4090 /*
4091 Init algorithm:
4092 {
4093    int i;   // use <= to match clearly with spec
4094    for (i=0; i <= 143; ++i)     stbi__zdefault_length[i]   = 8;
4095    for (   ; i <= 255; ++i)     stbi__zdefault_length[i]   = 9;
4096    for (   ; i <= 279; ++i)     stbi__zdefault_length[i]   = 7;
4097    for (   ; i <= 287; ++i)     stbi__zdefault_length[i]   = 8;
4098 
4099    for (i=0; i <=  31; ++i)     stbi__zdefault_distance[i] = 5;
4100 }
4101 */
4102 
stbi__parse_zlib(stbi__zbuf * a,int parse_header)4103 static int stbi__parse_zlib(stbi__zbuf *a, int parse_header)
4104 {
4105    int final, type;
4106    if (parse_header)
4107       if (!stbi__parse_zlib_header(a)) return 0;
4108    a->num_bits = 0;
4109    a->code_buffer = 0;
4110    do {
4111       final = stbi__zreceive(a,1);
4112       type = stbi__zreceive(a,2);
4113       if (type == 0) {
4114          if (!stbi__parse_uncompressed_block(a)) return 0;
4115       } else if (type == 3) {
4116          return 0;
4117       } else {
4118          if (type == 1) {
4119             // use fixed code lengths
4120             if (!stbi__zbuild_huffman(&a->z_length  , stbi__zdefault_length  , 288)) return 0;
4121             if (!stbi__zbuild_huffman(&a->z_distance, stbi__zdefault_distance,  32)) return 0;
4122          } else {
4123             if (!stbi__compute_huffman_codes(a)) return 0;
4124          }
4125          if (!stbi__parse_huffman_block(a)) return 0;
4126       }
4127    } while (!final);
4128    return 1;
4129 }
4130 
stbi__do_zlib(stbi__zbuf * a,char * obuf,int olen,int exp,int parse_header)4131 static int stbi__do_zlib(stbi__zbuf *a, char *obuf, int olen, int exp, int parse_header)
4132 {
4133    a->zout_start = obuf;
4134    a->zout       = obuf;
4135    a->zout_end   = obuf + olen;
4136    a->z_expandable = exp;
4137 
4138    return stbi__parse_zlib(a, parse_header);
4139 }
4140 
stbi_zlib_decode_malloc_guesssize(const char * buffer,int len,int initial_size,int * outlen)4141 STBIDEF char *stbi_zlib_decode_malloc_guesssize(const char *buffer, int len, int initial_size, int *outlen)
4142 {
4143    stbi__zbuf a;
4144    char *p = (char *) stbi__malloc(initial_size);
4145    if (p == NULL) return NULL;
4146    a.zbuffer = (stbi_uc *) buffer;
4147    a.zbuffer_end = (stbi_uc *) buffer + len;
4148    if (stbi__do_zlib(&a, p, initial_size, 1, 1)) {
4149       if (outlen) *outlen = (int) (a.zout - a.zout_start);
4150       return a.zout_start;
4151    } else {
4152       STBI_FREE(a.zout_start);
4153       return NULL;
4154    }
4155 }
4156 
stbi_zlib_decode_malloc(char const * buffer,int len,int * outlen)4157 STBIDEF char *stbi_zlib_decode_malloc(char const *buffer, int len, int *outlen)
4158 {
4159    return stbi_zlib_decode_malloc_guesssize(buffer, len, 16384, outlen);
4160 }
4161 
stbi_zlib_decode_malloc_guesssize_headerflag(const char * buffer,int len,int initial_size,int * outlen,int parse_header)4162 STBIDEF char *stbi_zlib_decode_malloc_guesssize_headerflag(const char *buffer, int len, int initial_size, int *outlen, int parse_header)
4163 {
4164    stbi__zbuf a;
4165    char *p = (char *) stbi__malloc(initial_size);
4166    if (p == NULL) return NULL;
4167    a.zbuffer = (stbi_uc *) buffer;
4168    a.zbuffer_end = (stbi_uc *) buffer + len;
4169    if (stbi__do_zlib(&a, p, initial_size, 1, parse_header)) {
4170       if (outlen) *outlen = (int) (a.zout - a.zout_start);
4171       return a.zout_start;
4172    } else {
4173       STBI_FREE(a.zout_start);
4174       return NULL;
4175    }
4176 }
4177 
stbi_zlib_decode_buffer(char * obuffer,int olen,char const * ibuffer,int ilen)4178 STBIDEF int stbi_zlib_decode_buffer(char *obuffer, int olen, char const *ibuffer, int ilen)
4179 {
4180    stbi__zbuf a;
4181    a.zbuffer = (stbi_uc *) ibuffer;
4182    a.zbuffer_end = (stbi_uc *) ibuffer + ilen;
4183    if (stbi__do_zlib(&a, obuffer, olen, 0, 1))
4184       return (int) (a.zout - a.zout_start);
4185    else
4186       return -1;
4187 }
4188 
stbi_zlib_decode_noheader_malloc(char const * buffer,int len,int * outlen)4189 STBIDEF char *stbi_zlib_decode_noheader_malloc(char const *buffer, int len, int *outlen)
4190 {
4191    stbi__zbuf a;
4192    char *p = (char *) stbi__malloc(16384);
4193    if (p == NULL) return NULL;
4194    a.zbuffer = (stbi_uc *) buffer;
4195    a.zbuffer_end = (stbi_uc *) buffer+len;
4196    if (stbi__do_zlib(&a, p, 16384, 1, 0)) {
4197       if (outlen) *outlen = (int) (a.zout - a.zout_start);
4198       return a.zout_start;
4199    } else {
4200       STBI_FREE(a.zout_start);
4201       return NULL;
4202    }
4203 }
4204 
stbi_zlib_decode_noheader_buffer(char * obuffer,int olen,const char * ibuffer,int ilen)4205 STBIDEF int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char *ibuffer, int ilen)
4206 {
4207    stbi__zbuf a;
4208    a.zbuffer = (stbi_uc *) ibuffer;
4209    a.zbuffer_end = (stbi_uc *) ibuffer + ilen;
4210    if (stbi__do_zlib(&a, obuffer, olen, 0, 0))
4211       return (int) (a.zout - a.zout_start);
4212    else
4213       return -1;
4214 }
4215 #endif
4216 
4217 // public domain "baseline" PNG decoder   v0.10  Sean Barrett 2006-11-18
4218 //    simple implementation
4219 //      - only 8-bit samples
4220 //      - no CRC checking
4221 //      - allocates lots of intermediate memory
4222 //        - avoids problem of streaming data between subsystems
4223 //        - avoids explicit window management
4224 //    performance
4225 //      - uses stb_zlib, a PD zlib implementation with fast huffman decoding
4226 
4227 #ifndef STBI_NO_PNG
4228 typedef struct
4229 {
4230    stbi__uint32 length;
4231    stbi__uint32 type;
4232 } stbi__pngchunk;
4233 
stbi__get_chunk_header(stbi__context * s)4234 static stbi__pngchunk stbi__get_chunk_header(stbi__context *s)
4235 {
4236    stbi__pngchunk c;
4237    c.length = stbi__get32be(s);
4238    c.type   = stbi__get32be(s);
4239    return c;
4240 }
4241 
stbi__check_png_header(stbi__context * s)4242 static int stbi__check_png_header(stbi__context *s)
4243 {
4244    static stbi_uc png_sig[8] = { 137,80,78,71,13,10,26,10 };
4245    int i;
4246    for (i=0; i < 8; ++i)
4247       if (stbi__get8(s) != png_sig[i]) return stbi__err("bad png sig","Not a PNG");
4248    return 1;
4249 }
4250 
4251 typedef struct
4252 {
4253    stbi__context *s;
4254    stbi_uc *idata, *expanded, *out;
4255    int depth;
4256 } stbi__png;
4257 
4258 
4259 enum {
4260    STBI__F_none=0,
4261    STBI__F_sub=1,
4262    STBI__F_up=2,
4263    STBI__F_avg=3,
4264    STBI__F_paeth=4,
4265    // synthetic filters used for first scanline to avoid needing a dummy row of 0s
4266    STBI__F_avg_first,
4267    STBI__F_paeth_first
4268 };
4269 
4270 static stbi_uc first_row_filter[5] =
4271 {
4272    STBI__F_none,
4273    STBI__F_sub,
4274    STBI__F_none,
4275    STBI__F_avg_first,
4276    STBI__F_paeth_first
4277 };
4278 
stbi__paeth(int a,int b,int c)4279 static int stbi__paeth(int a, int b, int c)
4280 {
4281    int p = a + b - c;
4282    int pa = abs(p-a);
4283    int pb = abs(p-b);
4284    int pc = abs(p-c);
4285    if (pa <= pb && pa <= pc) return a;
4286    if (pb <= pc) return b;
4287    return c;
4288 }
4289 
4290 static stbi_uc stbi__depth_scale_table[9] = { 0, 0xff, 0x55, 0, 0x11, 0,0,0, 0x01 };
4291 
4292 // create the png data from post-deflated data
stbi__create_png_image_raw(stbi__png * a,stbi_uc * raw,stbi__uint32 raw_len,int out_n,stbi__uint32 x,stbi__uint32 y,int depth,int color)4293 static int stbi__create_png_image_raw(stbi__png *a, stbi_uc *raw, stbi__uint32 raw_len, int out_n, stbi__uint32 x, stbi__uint32 y, int depth, int color)
4294 {
4295    int bytes = (depth == 16? 2 : 1);
4296    stbi__context *s = a->s;
4297    stbi__uint32 i,j,stride = x*out_n*bytes;
4298    stbi__uint32 img_len, img_width_bytes;
4299    int k;
4300    int img_n = s->img_n; // copy it into a local for later
4301 
4302    int output_bytes = out_n*bytes;
4303    int filter_bytes = img_n*bytes;
4304    int width = x;
4305 
4306    STBI_ASSERT(out_n == s->img_n || out_n == s->img_n+1);
4307    a->out = (stbi_uc *) stbi__malloc_mad3(x, y, output_bytes, 0); // extra bytes to write off the end into
4308    if (!a->out) return stbi__err("outofmem", "Out of memory");
4309 
4310    img_width_bytes = (((img_n * x * depth) + 7) >> 3);
4311    img_len = (img_width_bytes + 1) * y;
4312    if (s->img_x == x && s->img_y == y) {
4313       if (raw_len != img_len) return stbi__err("not enough pixels","Corrupt PNG");
4314    } else { // interlaced:
4315       if (raw_len < img_len) return stbi__err("not enough pixels","Corrupt PNG");
4316    }
4317 
4318    for (j=0; j < y; ++j) {
4319       stbi_uc *cur = a->out + stride*j;
4320       stbi_uc *prior;
4321       int filter = *raw++;
4322 
4323       if (filter > 4)
4324          return stbi__err("invalid filter","Corrupt PNG");
4325 
4326       if (depth < 8) {
4327          STBI_ASSERT(img_width_bytes <= x);
4328          cur += x*out_n - img_width_bytes; // store output to the rightmost img_len bytes, so we can decode in place
4329          filter_bytes = 1;
4330          width = img_width_bytes;
4331       }
4332       prior = cur - stride; // bugfix: need to compute this after 'cur +=' computation above
4333 
4334       // if first row, use special filter that doesn't sample previous row
4335       if (j == 0) filter = first_row_filter[filter];
4336 
4337       // handle first byte explicitly
4338       for (k=0; k < filter_bytes; ++k) {
4339          switch (filter) {
4340             case STBI__F_none       : cur[k] = raw[k]; break;
4341             case STBI__F_sub        : cur[k] = raw[k]; break;
4342             case STBI__F_up         : cur[k] = STBI__BYTECAST(raw[k] + prior[k]); break;
4343             case STBI__F_avg        : cur[k] = STBI__BYTECAST(raw[k] + (prior[k]>>1)); break;
4344             case STBI__F_paeth      : cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(0,prior[k],0)); break;
4345             case STBI__F_avg_first  : cur[k] = raw[k]; break;
4346             case STBI__F_paeth_first: cur[k] = raw[k]; break;
4347          }
4348       }
4349 
4350       if (depth == 8) {
4351          if (img_n != out_n)
4352             cur[img_n] = 255; // first pixel
4353          raw += img_n;
4354          cur += out_n;
4355          prior += out_n;
4356       } else if (depth == 16) {
4357          if (img_n != out_n) {
4358             cur[filter_bytes]   = 255; // first pixel top byte
4359             cur[filter_bytes+1] = 255; // first pixel bottom byte
4360          }
4361          raw += filter_bytes;
4362          cur += output_bytes;
4363          prior += output_bytes;
4364       } else {
4365          raw += 1;
4366          cur += 1;
4367          prior += 1;
4368       }
4369 
4370       // this is a little gross, so that we don't switch per-pixel or per-component
4371       if (depth < 8 || img_n == out_n) {
4372          int nk = (width - 1)*filter_bytes;
4373          #define STBI__CASE(f) \
4374              case f:     \
4375                 for (k=0; k < nk; ++k)
4376          switch (filter) {
4377             // "none" filter turns into a memcpy here; make that explicit.
4378             case STBI__F_none:         memcpy(cur, raw, nk); break;
4379             STBI__CASE(STBI__F_sub)          { cur[k] = STBI__BYTECAST(raw[k] + cur[k-filter_bytes]); } break;
4380             STBI__CASE(STBI__F_up)           { cur[k] = STBI__BYTECAST(raw[k] + prior[k]); } break;
4381             STBI__CASE(STBI__F_avg)          { cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k-filter_bytes])>>1)); } break;
4382             STBI__CASE(STBI__F_paeth)        { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-filter_bytes],prior[k],prior[k-filter_bytes])); } break;
4383             STBI__CASE(STBI__F_avg_first)    { cur[k] = STBI__BYTECAST(raw[k] + (cur[k-filter_bytes] >> 1)); } break;
4384             STBI__CASE(STBI__F_paeth_first)  { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-filter_bytes],0,0)); } break;
4385          }
4386          #undef STBI__CASE
4387          raw += nk;
4388       } else {
4389          STBI_ASSERT(img_n+1 == out_n);
4390          #define STBI__CASE(f) \
4391              case f:     \
4392                 for (i=x-1; i >= 1; --i, cur[filter_bytes]=255,raw+=filter_bytes,cur+=output_bytes,prior+=output_bytes) \
4393                    for (k=0; k < filter_bytes; ++k)
4394          switch (filter) {
4395             STBI__CASE(STBI__F_none)         { cur[k] = raw[k]; } break;
4396             STBI__CASE(STBI__F_sub)          { cur[k] = STBI__BYTECAST(raw[k] + cur[k- output_bytes]); } break;
4397             STBI__CASE(STBI__F_up)           { cur[k] = STBI__BYTECAST(raw[k] + prior[k]); } break;
4398             STBI__CASE(STBI__F_avg)          { cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k- output_bytes])>>1)); } break;
4399             STBI__CASE(STBI__F_paeth)        { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k- output_bytes],prior[k],prior[k- output_bytes])); } break;
4400             STBI__CASE(STBI__F_avg_first)    { cur[k] = STBI__BYTECAST(raw[k] + (cur[k- output_bytes] >> 1)); } break;
4401             STBI__CASE(STBI__F_paeth_first)  { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k- output_bytes],0,0)); } break;
4402          }
4403          #undef STBI__CASE
4404 
4405          // the loop above sets the high byte of the pixels' alpha, but for
4406          // 16 bit png files we also need the low byte set. we'll do that here.
4407          if (depth == 16) {
4408             cur = a->out + stride*j; // start at the beginning of the row again
4409             for (i=0; i < x; ++i,cur+=output_bytes) {
4410                cur[filter_bytes+1] = 255;
4411             }
4412          }
4413       }
4414    }
4415 
4416    // we make a separate pass to expand bits to pixels; for performance,
4417    // this could run two scanlines behind the above code, so it won't
4418    // intefere with filtering but will still be in the cache.
4419    if (depth < 8) {
4420       for (j=0; j < y; ++j) {
4421          stbi_uc *cur = a->out + stride*j;
4422          stbi_uc *in  = a->out + stride*j + x*out_n - img_width_bytes;
4423          // unpack 1/2/4-bit into a 8-bit buffer. allows us to keep the common 8-bit path optimal at minimal cost for 1/2/4-bit
4424          // png guarante byte alignment, if width is not multiple of 8/4/2 we'll decode dummy trailing data that will be skipped in the later loop
4425          stbi_uc scale = (color == 0) ? stbi__depth_scale_table[depth] : 1; // scale grayscale values to 0..255 range
4426 
4427          // note that the final byte might overshoot and write more data than desired.
4428          // we can allocate enough data that this never writes out of memory, but it
4429          // could also overwrite the next scanline. can it overwrite non-empty data
4430          // on the next scanline? yes, consider 1-pixel-wide scanlines with 1-bit-per-pixel.
4431          // so we need to explicitly clamp the final ones
4432 
4433          if (depth == 4) {
4434             for (k=x*img_n; k >= 2; k-=2, ++in) {
4435                *cur++ = scale * ((*in >> 4)       );
4436                *cur++ = scale * ((*in     ) & 0x0f);
4437             }
4438             if (k > 0) *cur++ = scale * ((*in >> 4)       );
4439          } else if (depth == 2) {
4440             for (k=x*img_n; k >= 4; k-=4, ++in) {
4441                *cur++ = scale * ((*in >> 6)       );
4442                *cur++ = scale * ((*in >> 4) & 0x03);
4443                *cur++ = scale * ((*in >> 2) & 0x03);
4444                *cur++ = scale * ((*in     ) & 0x03);
4445             }
4446             if (k > 0) *cur++ = scale * ((*in >> 6)       );
4447             if (k > 1) *cur++ = scale * ((*in >> 4) & 0x03);
4448             if (k > 2) *cur++ = scale * ((*in >> 2) & 0x03);
4449          } else if (depth == 1) {
4450             for (k=x*img_n; k >= 8; k-=8, ++in) {
4451                *cur++ = scale * ((*in >> 7)       );
4452                *cur++ = scale * ((*in >> 6) & 0x01);
4453                *cur++ = scale * ((*in >> 5) & 0x01);
4454                *cur++ = scale * ((*in >> 4) & 0x01);
4455                *cur++ = scale * ((*in >> 3) & 0x01);
4456                *cur++ = scale * ((*in >> 2) & 0x01);
4457                *cur++ = scale * ((*in >> 1) & 0x01);
4458                *cur++ = scale * ((*in     ) & 0x01);
4459             }
4460             if (k > 0) *cur++ = scale * ((*in >> 7)       );
4461             if (k > 1) *cur++ = scale * ((*in >> 6) & 0x01);
4462             if (k > 2) *cur++ = scale * ((*in >> 5) & 0x01);
4463             if (k > 3) *cur++ = scale * ((*in >> 4) & 0x01);
4464             if (k > 4) *cur++ = scale * ((*in >> 3) & 0x01);
4465             if (k > 5) *cur++ = scale * ((*in >> 2) & 0x01);
4466             if (k > 6) *cur++ = scale * ((*in >> 1) & 0x01);
4467          }
4468          if (img_n != out_n) {
4469             int q;
4470             // insert alpha = 255
4471             cur = a->out + stride*j;
4472             if (img_n == 1) {
4473                for (q=x-1; q >= 0; --q) {
4474                   cur[q*2+1] = 255;
4475                   cur[q*2+0] = cur[q];
4476                }
4477             } else {
4478                STBI_ASSERT(img_n == 3);
4479                for (q=x-1; q >= 0; --q) {
4480                   cur[q*4+3] = 255;
4481                   cur[q*4+2] = cur[q*3+2];
4482                   cur[q*4+1] = cur[q*3+1];
4483                   cur[q*4+0] = cur[q*3+0];
4484                }
4485             }
4486          }
4487       }
4488    } else if (depth == 16) {
4489       // force the image data from big-endian to platform-native.
4490       // this is done in a separate pass due to the decoding relying
4491       // on the data being untouched, but could probably be done
4492       // per-line during decode if care is taken.
4493       stbi_uc *cur = a->out;
4494       stbi__uint16 *cur16 = (stbi__uint16*)cur;
4495 
4496       for(i=0; i < x*y*out_n; ++i,cur16++,cur+=2) {
4497          *cur16 = (cur[0] << 8) | cur[1];
4498       }
4499    }
4500 
4501    return 1;
4502 }
4503 
stbi__create_png_image(stbi__png * a,stbi_uc * image_data,stbi__uint32 image_data_len,int out_n,int depth,int color,int interlaced)4504 static int stbi__create_png_image(stbi__png *a, stbi_uc *image_data, stbi__uint32 image_data_len, int out_n, int depth, int color, int interlaced)
4505 {
4506    int bytes = (depth == 16 ? 2 : 1);
4507    int out_bytes = out_n * bytes;
4508    stbi_uc *final;
4509    int p;
4510    if (!interlaced)
4511       return stbi__create_png_image_raw(a, image_data, image_data_len, out_n, a->s->img_x, a->s->img_y, depth, color);
4512 
4513    // de-interlacing
4514    final = (stbi_uc *) stbi__malloc_mad3(a->s->img_x, a->s->img_y, out_bytes, 0);
4515    for (p=0; p < 7; ++p) {
4516       int xorig[] = { 0,4,0,2,0,1,0 };
4517       int yorig[] = { 0,0,4,0,2,0,1 };
4518       int xspc[]  = { 8,8,4,4,2,2,1 };
4519       int yspc[]  = { 8,8,8,4,4,2,2 };
4520       int i,j,x,y;
4521       // pass1_x[4] = 0, pass1_x[5] = 1, pass1_x[12] = 1
4522       x = (a->s->img_x - xorig[p] + xspc[p]-1) / xspc[p];
4523       y = (a->s->img_y - yorig[p] + yspc[p]-1) / yspc[p];
4524       if (x && y) {
4525          stbi__uint32 img_len = ((((a->s->img_n * x * depth) + 7) >> 3) + 1) * y;
4526          if (!stbi__create_png_image_raw(a, image_data, image_data_len, out_n, x, y, depth, color)) {
4527             STBI_FREE(final);
4528             return 0;
4529          }
4530          for (j=0; j < y; ++j) {
4531             for (i=0; i < x; ++i) {
4532                int out_y = j*yspc[p]+yorig[p];
4533                int out_x = i*xspc[p]+xorig[p];
4534                memcpy(final + out_y*a->s->img_x*out_bytes + out_x*out_bytes,
4535                       a->out + (j*x+i)*out_bytes, out_bytes);
4536             }
4537          }
4538          STBI_FREE(a->out);
4539          image_data += img_len;
4540          image_data_len -= img_len;
4541       }
4542    }
4543    a->out = final;
4544 
4545    return 1;
4546 }
4547 
stbi__compute_transparency(stbi__png * z,stbi_uc tc[3],int out_n)4548 static int stbi__compute_transparency(stbi__png *z, stbi_uc tc[3], int out_n)
4549 {
4550    stbi__context *s = z->s;
4551    stbi__uint32 i, pixel_count = s->img_x * s->img_y;
4552    stbi_uc *p = z->out;
4553 
4554    // compute color-based transparency, assuming we've
4555    // already got 255 as the alpha value in the output
4556    STBI_ASSERT(out_n == 2 || out_n == 4);
4557 
4558    if (out_n == 2) {
4559       for (i=0; i < pixel_count; ++i) {
4560          p[1] = (p[0] == tc[0] ? 0 : 255);
4561          p += 2;
4562       }
4563    } else {
4564       for (i=0; i < pixel_count; ++i) {
4565          if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2])
4566             p[3] = 0;
4567          p += 4;
4568       }
4569    }
4570    return 1;
4571 }
4572 
stbi__compute_transparency16(stbi__png * z,stbi__uint16 tc[3],int out_n)4573 static int stbi__compute_transparency16(stbi__png *z, stbi__uint16 tc[3], int out_n)
4574 {
4575    stbi__context *s = z->s;
4576    stbi__uint32 i, pixel_count = s->img_x * s->img_y;
4577    stbi__uint16 *p = (stbi__uint16*) z->out;
4578 
4579    // compute color-based transparency, assuming we've
4580    // already got 65535 as the alpha value in the output
4581    STBI_ASSERT(out_n == 2 || out_n == 4);
4582 
4583    if (out_n == 2) {
4584       for (i = 0; i < pixel_count; ++i) {
4585          p[1] = (p[0] == tc[0] ? 0 : 65535);
4586          p += 2;
4587       }
4588    } else {
4589       for (i = 0; i < pixel_count; ++i) {
4590          if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2])
4591             p[3] = 0;
4592          p += 4;
4593       }
4594    }
4595    return 1;
4596 }
4597 
stbi__expand_png_palette(stbi__png * a,stbi_uc * palette,int len,int pal_img_n)4598 static int stbi__expand_png_palette(stbi__png *a, stbi_uc *palette, int len, int pal_img_n)
4599 {
4600    stbi__uint32 i, pixel_count = a->s->img_x * a->s->img_y;
4601    stbi_uc *p, *temp_out, *orig = a->out;
4602 
4603    p = (stbi_uc *) stbi__malloc_mad2(pixel_count, pal_img_n, 0);
4604    if (p == NULL) return stbi__err("outofmem", "Out of memory");
4605 
4606    // between here and free(out) below, exitting would leak
4607    temp_out = p;
4608 
4609    if (pal_img_n == 3) {
4610       for (i=0; i < pixel_count; ++i) {
4611          int n = orig[i]*4;
4612          p[0] = palette[n  ];
4613          p[1] = palette[n+1];
4614          p[2] = palette[n+2];
4615          p += 3;
4616       }
4617    } else {
4618       for (i=0; i < pixel_count; ++i) {
4619          int n = orig[i]*4;
4620          p[0] = palette[n  ];
4621          p[1] = palette[n+1];
4622          p[2] = palette[n+2];
4623          p[3] = palette[n+3];
4624          p += 4;
4625       }
4626    }
4627    STBI_FREE(a->out);
4628    a->out = temp_out;
4629 
4630    STBI_NOTUSED(len);
4631 
4632    return 1;
4633 }
4634 
4635 static int stbi__unpremultiply_on_load = 0;
4636 static int stbi__de_iphone_flag = 0;
4637 
stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply)4638 STBIDEF void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply)
4639 {
4640    stbi__unpremultiply_on_load = flag_true_if_should_unpremultiply;
4641 }
4642 
stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert)4643 STBIDEF void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert)
4644 {
4645    stbi__de_iphone_flag = flag_true_if_should_convert;
4646 }
4647 
stbi__de_iphone(stbi__png * z)4648 static void stbi__de_iphone(stbi__png *z)
4649 {
4650    stbi__context *s = z->s;
4651    stbi__uint32 i, pixel_count = s->img_x * s->img_y;
4652    stbi_uc *p = z->out;
4653 
4654    if (s->img_out_n == 3) {  // convert bgr to rgb
4655       for (i=0; i < pixel_count; ++i) {
4656          stbi_uc t = p[0];
4657          p[0] = p[2];
4658          p[2] = t;
4659          p += 3;
4660       }
4661    } else {
4662       STBI_ASSERT(s->img_out_n == 4);
4663       if (stbi__unpremultiply_on_load) {
4664          // convert bgr to rgb and unpremultiply
4665          for (i=0; i < pixel_count; ++i) {
4666             stbi_uc a = p[3];
4667             stbi_uc t = p[0];
4668             if (a) {
4669                p[0] = p[2] * 255 / a;
4670                p[1] = p[1] * 255 / a;
4671                p[2] =  t   * 255 / a;
4672             } else {
4673                p[0] = p[2];
4674                p[2] = t;
4675             }
4676             p += 4;
4677          }
4678       } else {
4679          // convert bgr to rgb
4680          for (i=0; i < pixel_count; ++i) {
4681             stbi_uc t = p[0];
4682             p[0] = p[2];
4683             p[2] = t;
4684             p += 4;
4685          }
4686       }
4687    }
4688 }
4689 
4690 #define STBI__PNG_TYPE(a,b,c,d)  (((a) << 24) + ((b) << 16) + ((c) << 8) + (d))
4691 
stbi__parse_png_file(stbi__png * z,int scan,int req_comp)4692 static int stbi__parse_png_file(stbi__png *z, int scan, int req_comp)
4693 {
4694    stbi_uc palette[1024], pal_img_n=0;
4695    stbi_uc has_trans=0, tc[3];
4696    stbi__uint16 tc16[3];
4697    stbi__uint32 ioff=0, idata_limit=0, i, pal_len=0;
4698    int first=1,k,interlace=0, color=0, is_iphone=0;
4699    stbi__context *s = z->s;
4700 
4701    z->expanded = NULL;
4702    z->idata = NULL;
4703    z->out = NULL;
4704 
4705    if (!stbi__check_png_header(s)) return 0;
4706 
4707    if (scan == STBI__SCAN_type) return 1;
4708 
4709    for (;;) {
4710       stbi__pngchunk c = stbi__get_chunk_header(s);
4711       switch (c.type) {
4712          case STBI__PNG_TYPE('C','g','B','I'):
4713             is_iphone = 1;
4714             stbi__skip(s, c.length);
4715             break;
4716          case STBI__PNG_TYPE('I','H','D','R'): {
4717             int comp,filter;
4718             if (!first) return stbi__err("multiple IHDR","Corrupt PNG");
4719             first = 0;
4720             if (c.length != 13) return stbi__err("bad IHDR len","Corrupt PNG");
4721             s->img_x = stbi__get32be(s); if (s->img_x > (1 << 24)) return stbi__err("too large","Very large image (corrupt?)");
4722             s->img_y = stbi__get32be(s); if (s->img_y > (1 << 24)) return stbi__err("too large","Very large image (corrupt?)");
4723             z->depth = stbi__get8(s);  if (z->depth != 1 && z->depth != 2 && z->depth != 4 && z->depth != 8 && z->depth != 16)  return stbi__err("1/2/4/8/16-bit only","PNG not supported: 1/2/4/8/16-bit only");
4724             color = stbi__get8(s);  if (color > 6)         return stbi__err("bad ctype","Corrupt PNG");
4725             if (color == 3 && z->depth == 16)                  return stbi__err("bad ctype","Corrupt PNG");
4726             if (color == 3) pal_img_n = 3; else if (color & 1) return stbi__err("bad ctype","Corrupt PNG");
4727             comp  = stbi__get8(s);  if (comp) return stbi__err("bad comp method","Corrupt PNG");
4728             filter= stbi__get8(s);  if (filter) return stbi__err("bad filter method","Corrupt PNG");
4729             interlace = stbi__get8(s); if (interlace>1) return stbi__err("bad interlace method","Corrupt PNG");
4730             if (!s->img_x || !s->img_y) return stbi__err("0-pixel image","Corrupt PNG");
4731             if (!pal_img_n) {
4732                s->img_n = (color & 2 ? 3 : 1) + (color & 4 ? 1 : 0);
4733                if ((1 << 30) / s->img_x / s->img_n < s->img_y) return stbi__err("too large", "Image too large to decode");
4734                if (scan == STBI__SCAN_header) return 1;
4735             } else {
4736                // if paletted, then pal_n is our final components, and
4737                // img_n is # components to decompress/filter.
4738                s->img_n = 1;
4739                if ((1 << 30) / s->img_x / 4 < s->img_y) return stbi__err("too large","Corrupt PNG");
4740                // if SCAN_header, have to scan to see if we have a tRNS
4741             }
4742             break;
4743          }
4744 
4745          case STBI__PNG_TYPE('P','L','T','E'):  {
4746             if (first) return stbi__err("first not IHDR", "Corrupt PNG");
4747             if (c.length > 256*3) return stbi__err("invalid PLTE","Corrupt PNG");
4748             pal_len = c.length / 3;
4749             if (pal_len * 3 != c.length) return stbi__err("invalid PLTE","Corrupt PNG");
4750             for (i=0; i < pal_len; ++i) {
4751                palette[i*4+0] = stbi__get8(s);
4752                palette[i*4+1] = stbi__get8(s);
4753                palette[i*4+2] = stbi__get8(s);
4754                palette[i*4+3] = 255;
4755             }
4756             break;
4757          }
4758 
4759          case STBI__PNG_TYPE('t','R','N','S'): {
4760             if (first) return stbi__err("first not IHDR", "Corrupt PNG");
4761             if (z->idata) return stbi__err("tRNS after IDAT","Corrupt PNG");
4762             if (pal_img_n) {
4763                if (scan == STBI__SCAN_header) { s->img_n = 4; return 1; }
4764                if (pal_len == 0) return stbi__err("tRNS before PLTE","Corrupt PNG");
4765                if (c.length > pal_len) return stbi__err("bad tRNS len","Corrupt PNG");
4766                pal_img_n = 4;
4767                for (i=0; i < c.length; ++i)
4768                   palette[i*4+3] = stbi__get8(s);
4769             } else {
4770                if (!(s->img_n & 1)) return stbi__err("tRNS with alpha","Corrupt PNG");
4771                if (c.length != (stbi__uint32) s->img_n*2) return stbi__err("bad tRNS len","Corrupt PNG");
4772                has_trans = 1;
4773                if (z->depth == 16) {
4774                   for (k = 0; k < s->img_n; ++k) tc16[k] = (stbi__uint16)stbi__get16be(s); // copy the values as-is
4775                } else {
4776                   for (k = 0; k < s->img_n; ++k) tc[k] = (stbi_uc)(stbi__get16be(s) & 255) * stbi__depth_scale_table[z->depth]; // non 8-bit images will be larger
4777                }
4778             }
4779             break;
4780          }
4781 
4782          case STBI__PNG_TYPE('I','D','A','T'): {
4783             if (first) return stbi__err("first not IHDR", "Corrupt PNG");
4784             if (pal_img_n && !pal_len) return stbi__err("no PLTE","Corrupt PNG");
4785             if (scan == STBI__SCAN_header) { s->img_n = pal_img_n; return 1; }
4786             if ((int)(ioff + c.length) < (int)ioff) return 0;
4787             if (ioff + c.length > idata_limit) {
4788                stbi__uint32 idata_limit_old = idata_limit;
4789                stbi_uc *p;
4790                if (idata_limit == 0) idata_limit = c.length > 4096 ? c.length : 4096;
4791                while (ioff + c.length > idata_limit)
4792                   idata_limit *= 2;
4793                STBI_NOTUSED(idata_limit_old);
4794                p = (stbi_uc *) STBI_REALLOC_SIZED(z->idata, idata_limit_old, idata_limit); if (p == NULL) return stbi__err("outofmem", "Out of memory");
4795                z->idata = p;
4796             }
4797             if (!stbi__getn(s, z->idata+ioff,c.length)) return stbi__err("outofdata","Corrupt PNG");
4798             ioff += c.length;
4799             break;
4800          }
4801 
4802          case STBI__PNG_TYPE('I','E','N','D'): {
4803             stbi__uint32 raw_len, bpl;
4804             if (first) return stbi__err("first not IHDR", "Corrupt PNG");
4805             if (scan != STBI__SCAN_load) return 1;
4806             if (z->idata == NULL) return stbi__err("no IDAT","Corrupt PNG");
4807             // initial guess for decoded data size to avoid unnecessary reallocs
4808             bpl = (s->img_x * z->depth + 7) / 8; // bytes per line, per component
4809             raw_len = bpl * s->img_y * s->img_n /* pixels */ + s->img_y /* filter mode per row */;
4810             z->expanded = (stbi_uc *) stbi_zlib_decode_malloc_guesssize_headerflag((char *) z->idata, ioff, raw_len, (int *) &raw_len, !is_iphone);
4811             if (z->expanded == NULL) return 0; // zlib should set error
4812             STBI_FREE(z->idata); z->idata = NULL;
4813             if ((req_comp == s->img_n+1 && req_comp != 3 && !pal_img_n) || has_trans)
4814                s->img_out_n = s->img_n+1;
4815             else
4816                s->img_out_n = s->img_n;
4817             if (!stbi__create_png_image(z, z->expanded, raw_len, s->img_out_n, z->depth, color, interlace)) return 0;
4818             if (has_trans) {
4819                if (z->depth == 16) {
4820                   if (!stbi__compute_transparency16(z, tc16, s->img_out_n)) return 0;
4821                } else {
4822                   if (!stbi__compute_transparency(z, tc, s->img_out_n)) return 0;
4823                }
4824             }
4825             if (is_iphone && stbi__de_iphone_flag && s->img_out_n > 2)
4826                stbi__de_iphone(z);
4827             if (pal_img_n) {
4828                // pal_img_n == 3 or 4
4829                s->img_n = pal_img_n; // record the actual colors we had
4830                s->img_out_n = pal_img_n;
4831                if (req_comp >= 3) s->img_out_n = req_comp;
4832                if (!stbi__expand_png_palette(z, palette, pal_len, s->img_out_n))
4833                   return 0;
4834             }
4835             STBI_FREE(z->expanded); z->expanded = NULL;
4836             return 1;
4837          }
4838 
4839          default:
4840             // if critical, fail
4841             if (first) return stbi__err("first not IHDR", "Corrupt PNG");
4842             if ((c.type & (1 << 29)) == 0) {
4843                #ifndef STBI_NO_FAILURE_STRINGS
4844                // not threadsafe
4845                static char invalid_chunk[] = "XXXX PNG chunk not known";
4846                invalid_chunk[0] = STBI__BYTECAST(c.type >> 24);
4847                invalid_chunk[1] = STBI__BYTECAST(c.type >> 16);
4848                invalid_chunk[2] = STBI__BYTECAST(c.type >>  8);
4849                invalid_chunk[3] = STBI__BYTECAST(c.type >>  0);
4850                #endif
4851                return stbi__err(invalid_chunk, "PNG not supported: unknown PNG chunk type");
4852             }
4853             stbi__skip(s, c.length);
4854             break;
4855       }
4856       // end of PNG chunk, read and skip CRC
4857       stbi__get32be(s);
4858    }
4859 }
4860 
stbi__do_png(stbi__png * p,int * x,int * y,int * n,int req_comp,stbi__result_info * ri)4861 static void *stbi__do_png(stbi__png *p, int *x, int *y, int *n, int req_comp, stbi__result_info *ri)
4862 {
4863    void *result=NULL;
4864    if (req_comp < 0 || req_comp > 4) return stbi__errpuc("bad req_comp", "Internal error");
4865    if (stbi__parse_png_file(p, STBI__SCAN_load, req_comp)) {
4866       if (p->depth < 8)
4867          ri->bits_per_channel = 8;
4868       else
4869          ri->bits_per_channel = p->depth;
4870       result = p->out;
4871       p->out = NULL;
4872       if (req_comp && req_comp != p->s->img_out_n) {
4873          if (ri->bits_per_channel == 8)
4874             result = stbi__convert_format((unsigned char *) result, p->s->img_out_n, req_comp, p->s->img_x, p->s->img_y);
4875          else
4876             result = stbi__convert_format16((stbi__uint16 *) result, p->s->img_out_n, req_comp, p->s->img_x, p->s->img_y);
4877          p->s->img_out_n = req_comp;
4878          if (result == NULL) return result;
4879       }
4880       *x = p->s->img_x;
4881       *y = p->s->img_y;
4882       if (n) *n = p->s->img_n;
4883    }
4884    STBI_FREE(p->out);      p->out      = NULL;
4885    STBI_FREE(p->expanded); p->expanded = NULL;
4886    STBI_FREE(p->idata);    p->idata    = NULL;
4887 
4888    return result;
4889 }
4890 
stbi__png_load(stbi__context * s,int * x,int * y,int * comp,int req_comp,stbi__result_info * ri)4891 static void *stbi__png_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
4892 {
4893    stbi__png p;
4894    p.s = s;
4895    return stbi__do_png(&p, x,y,comp,req_comp, ri);
4896 }
4897 
stbi__png_test(stbi__context * s)4898 static int stbi__png_test(stbi__context *s)
4899 {
4900    int r;
4901    r = stbi__check_png_header(s);
4902    stbi__rewind(s);
4903    return r;
4904 }
4905 
stbi__png_info_raw(stbi__png * p,int * x,int * y,int * comp)4906 static int stbi__png_info_raw(stbi__png *p, int *x, int *y, int *comp)
4907 {
4908    if (!stbi__parse_png_file(p, STBI__SCAN_header, 0)) {
4909       stbi__rewind( p->s );
4910       return 0;
4911    }
4912    if (x) *x = p->s->img_x;
4913    if (y) *y = p->s->img_y;
4914    if (comp) *comp = p->s->img_n;
4915    return 1;
4916 }
4917 
stbi__png_info(stbi__context * s,int * x,int * y,int * comp)4918 static int stbi__png_info(stbi__context *s, int *x, int *y, int *comp)
4919 {
4920    stbi__png p;
4921    p.s = s;
4922    return stbi__png_info_raw(&p, x, y, comp);
4923 }
4924 #endif
4925 
4926 // Microsoft/Windows BMP image
4927 
4928 #ifndef STBI_NO_BMP
stbi__bmp_test_raw(stbi__context * s)4929 static int stbi__bmp_test_raw(stbi__context *s)
4930 {
4931    int r;
4932    int sz;
4933    if (stbi__get8(s) != 'B') return 0;
4934    if (stbi__get8(s) != 'M') return 0;
4935    stbi__get32le(s); // discard filesize
4936    stbi__get16le(s); // discard reserved
4937    stbi__get16le(s); // discard reserved
4938    stbi__get32le(s); // discard data offset
4939    sz = stbi__get32le(s);
4940    r = (sz == 12 || sz == 40 || sz == 56 || sz == 108 || sz == 124);
4941    return r;
4942 }
4943 
stbi__bmp_test(stbi__context * s)4944 static int stbi__bmp_test(stbi__context *s)
4945 {
4946    int r = stbi__bmp_test_raw(s);
4947    stbi__rewind(s);
4948    return r;
4949 }
4950 
4951 
4952 // returns 0..31 for the highest set bit
stbi__high_bit(unsigned int z)4953 static int stbi__high_bit(unsigned int z)
4954 {
4955    int n=0;
4956    if (z == 0) return -1;
4957    if (z >= 0x10000) n += 16, z >>= 16;
4958    if (z >= 0x00100) n +=  8, z >>=  8;
4959    if (z >= 0x00010) n +=  4, z >>=  4;
4960    if (z >= 0x00004) n +=  2, z >>=  2;
4961    if (z >= 0x00002) n +=  1, z >>=  1;
4962    return n;
4963 }
4964 
stbi__bitcount(unsigned int a)4965 static int stbi__bitcount(unsigned int a)
4966 {
4967    a = (a & 0x55555555) + ((a >>  1) & 0x55555555); // max 2
4968    a = (a & 0x33333333) + ((a >>  2) & 0x33333333); // max 4
4969    a = (a + (a >> 4)) & 0x0f0f0f0f; // max 8 per 4, now 8 bits
4970    a = (a + (a >> 8)); // max 16 per 8 bits
4971    a = (a + (a >> 16)); // max 32 per 8 bits
4972    return a & 0xff;
4973 }
4974 
stbi__shiftsigned(int v,int shift,int bits)4975 static int stbi__shiftsigned(int v, int shift, int bits)
4976 {
4977    int result;
4978    int z=0;
4979 
4980    if (shift < 0) v <<= -shift;
4981    else v >>= shift;
4982    result = v;
4983 
4984    z = bits;
4985    while (z < 8) {
4986       result += v >> z;
4987       z += bits;
4988    }
4989    return result;
4990 }
4991 
4992 typedef struct
4993 {
4994    int bpp, offset, hsz;
4995    unsigned int mr,mg,mb,ma, all_a;
4996 } stbi__bmp_data;
4997 
stbi__bmp_parse_header(stbi__context * s,stbi__bmp_data * info)4998 static void *stbi__bmp_parse_header(stbi__context *s, stbi__bmp_data *info)
4999 {
5000    int hsz;
5001    if (stbi__get8(s) != 'B' || stbi__get8(s) != 'M') return stbi__errpuc("not BMP", "Corrupt BMP");
5002    stbi__get32le(s); // discard filesize
5003    stbi__get16le(s); // discard reserved
5004    stbi__get16le(s); // discard reserved
5005    info->offset = stbi__get32le(s);
5006    info->hsz = hsz = stbi__get32le(s);
5007    info->mr = info->mg = info->mb = info->ma = 0;
5008 
5009    if (hsz != 12 && hsz != 40 && hsz != 56 && hsz != 108 && hsz != 124) return stbi__errpuc("unknown BMP", "BMP type not supported: unknown");
5010    if (hsz == 12) {
5011       s->img_x = stbi__get16le(s);
5012       s->img_y = stbi__get16le(s);
5013    } else {
5014       s->img_x = stbi__get32le(s);
5015       s->img_y = stbi__get32le(s);
5016    }
5017    if (stbi__get16le(s) != 1) return stbi__errpuc("bad BMP", "bad BMP");
5018    info->bpp = stbi__get16le(s);
5019    if (info->bpp == 1) return stbi__errpuc("monochrome", "BMP type not supported: 1-bit");
5020    if (hsz != 12) {
5021       int compress = stbi__get32le(s);
5022       if (compress == 1 || compress == 2) return stbi__errpuc("BMP RLE", "BMP type not supported: RLE");
5023       stbi__get32le(s); // discard sizeof
5024       stbi__get32le(s); // discard hres
5025       stbi__get32le(s); // discard vres
5026       stbi__get32le(s); // discard colorsused
5027       stbi__get32le(s); // discard max important
5028       if (hsz == 40 || hsz == 56) {
5029          if (hsz == 56) {
5030             stbi__get32le(s);
5031             stbi__get32le(s);
5032             stbi__get32le(s);
5033             stbi__get32le(s);
5034          }
5035          if (info->bpp == 16 || info->bpp == 32) {
5036             if (compress == 0) {
5037                if (info->bpp == 32) {
5038                   info->mr = 0xffu << 16;
5039                   info->mg = 0xffu <<  8;
5040                   info->mb = 0xffu <<  0;
5041                   info->ma = 0xffu << 24;
5042                   info->all_a = 0; // if all_a is 0 at end, then we loaded alpha channel but it was all 0
5043                } else {
5044                   info->mr = 31u << 10;
5045                   info->mg = 31u <<  5;
5046                   info->mb = 31u <<  0;
5047                }
5048             } else if (compress == 3) {
5049                info->mr = stbi__get32le(s);
5050                info->mg = stbi__get32le(s);
5051                info->mb = stbi__get32le(s);
5052                // not documented, but generated by photoshop and handled by mspaint
5053                if (info->mr == info->mg && info->mg == info->mb) {
5054                   // ?!?!?
5055                   return stbi__errpuc("bad BMP", "bad BMP");
5056                }
5057             } else
5058                return stbi__errpuc("bad BMP", "bad BMP");
5059          }
5060       } else {
5061          int i;
5062          if (hsz != 108 && hsz != 124)
5063             return stbi__errpuc("bad BMP", "bad BMP");
5064          info->mr = stbi__get32le(s);
5065          info->mg = stbi__get32le(s);
5066          info->mb = stbi__get32le(s);
5067          info->ma = stbi__get32le(s);
5068          stbi__get32le(s); // discard color space
5069          for (i=0; i < 12; ++i)
5070             stbi__get32le(s); // discard color space parameters
5071          if (hsz == 124) {
5072             stbi__get32le(s); // discard rendering intent
5073             stbi__get32le(s); // discard offset of profile data
5074             stbi__get32le(s); // discard size of profile data
5075             stbi__get32le(s); // discard reserved
5076          }
5077       }
5078    }
5079    return (void *) 1;
5080 }
5081 
5082 
stbi__bmp_load(stbi__context * s,int * x,int * y,int * comp,int req_comp,stbi__result_info * ri)5083 static void *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
5084 {
5085    stbi_uc *out;
5086    unsigned int mr=0,mg=0,mb=0,ma=0, all_a;
5087    stbi_uc pal[256][4];
5088    int psize=0,i,j,width;
5089    int flip_vertically, pad, target;
5090    stbi__bmp_data info;
5091    STBI_NOTUSED(ri);
5092 
5093    info.all_a = 255;
5094    if (stbi__bmp_parse_header(s, &info) == NULL)
5095       return NULL; // error code already set
5096 
5097    flip_vertically = ((int) s->img_y) > 0;
5098    s->img_y = abs((int) s->img_y);
5099 
5100    mr = info.mr;
5101    mg = info.mg;
5102    mb = info.mb;
5103    ma = info.ma;
5104    all_a = info.all_a;
5105 
5106    if (info.hsz == 12) {
5107       if (info.bpp < 24)
5108          psize = (info.offset - 14 - 24) / 3;
5109    } else {
5110       if (info.bpp < 16)
5111          psize = (info.offset - 14 - info.hsz) >> 2;
5112    }
5113 
5114    s->img_n = ma ? 4 : 3;
5115    if (req_comp && req_comp >= 3) // we can directly decode 3 or 4
5116       target = req_comp;
5117    else
5118       target = s->img_n; // if they want monochrome, we'll post-convert
5119 
5120    // sanity-check size
5121    if (!stbi__mad3sizes_valid(target, s->img_x, s->img_y, 0))
5122       return stbi__errpuc("too large", "Corrupt BMP");
5123 
5124    out = (stbi_uc *) stbi__malloc_mad3(target, s->img_x, s->img_y, 0);
5125    if (!out) return stbi__errpuc("outofmem", "Out of memory");
5126    if (info.bpp < 16) {
5127       int z=0;
5128       if (psize == 0 || psize > 256) { STBI_FREE(out); return stbi__errpuc("invalid", "Corrupt BMP"); }
5129       for (i=0; i < psize; ++i) {
5130          pal[i][2] = stbi__get8(s);
5131          pal[i][1] = stbi__get8(s);
5132          pal[i][0] = stbi__get8(s);
5133          if (info.hsz != 12) stbi__get8(s);
5134          pal[i][3] = 255;
5135       }
5136       stbi__skip(s, info.offset - 14 - info.hsz - psize * (info.hsz == 12 ? 3 : 4));
5137       if (info.bpp == 4) width = (s->img_x + 1) >> 1;
5138       else if (info.bpp == 8) width = s->img_x;
5139       else { STBI_FREE(out); return stbi__errpuc("bad bpp", "Corrupt BMP"); }
5140       pad = (-width)&3;
5141       for (j=0; j < (int) s->img_y; ++j) {
5142          for (i=0; i < (int) s->img_x; i += 2) {
5143             int v=stbi__get8(s),v2=0;
5144             if (info.bpp == 4) {
5145                v2 = v & 15;
5146                v >>= 4;
5147             }
5148             out[z++] = pal[v][0];
5149             out[z++] = pal[v][1];
5150             out[z++] = pal[v][2];
5151             if (target == 4) out[z++] = 255;
5152             if (i+1 == (int) s->img_x) break;
5153             v = (info.bpp == 8) ? stbi__get8(s) : v2;
5154             out[z++] = pal[v][0];
5155             out[z++] = pal[v][1];
5156             out[z++] = pal[v][2];
5157             if (target == 4) out[z++] = 255;
5158          }
5159          stbi__skip(s, pad);
5160       }
5161    } else {
5162       int rshift=0,gshift=0,bshift=0,ashift=0,rcount=0,gcount=0,bcount=0,acount=0;
5163       int z = 0;
5164       int easy=0;
5165       stbi__skip(s, info.offset - 14 - info.hsz);
5166       if (info.bpp == 24) width = 3 * s->img_x;
5167       else if (info.bpp == 16) width = 2*s->img_x;
5168       else /* bpp = 32 and pad = 0 */ width=0;
5169       pad = (-width) & 3;
5170       if (info.bpp == 24) {
5171          easy = 1;
5172       } else if (info.bpp == 32) {
5173          if (mb == 0xff && mg == 0xff00 && mr == 0x00ff0000 && ma == 0xff000000)
5174             easy = 2;
5175       }
5176       if (!easy) {
5177          if (!mr || !mg || !mb) { STBI_FREE(out); return stbi__errpuc("bad masks", "Corrupt BMP"); }
5178          // right shift amt to put high bit in position #7
5179          rshift = stbi__high_bit(mr)-7; rcount = stbi__bitcount(mr);
5180          gshift = stbi__high_bit(mg)-7; gcount = stbi__bitcount(mg);
5181          bshift = stbi__high_bit(mb)-7; bcount = stbi__bitcount(mb);
5182          ashift = stbi__high_bit(ma)-7; acount = stbi__bitcount(ma);
5183       }
5184       for (j=0; j < (int) s->img_y; ++j) {
5185          if (easy) {
5186             for (i=0; i < (int) s->img_x; ++i) {
5187                unsigned char a;
5188                out[z+2] = stbi__get8(s);
5189                out[z+1] = stbi__get8(s);
5190                out[z+0] = stbi__get8(s);
5191                z += 3;
5192                a = (easy == 2 ? stbi__get8(s) : 255);
5193                all_a |= a;
5194                if (target == 4) out[z++] = a;
5195             }
5196          } else {
5197             int bpp = info.bpp;
5198             for (i=0; i < (int) s->img_x; ++i) {
5199                stbi__uint32 v = (bpp == 16 ? (stbi__uint32) stbi__get16le(s) : stbi__get32le(s));
5200                int a;
5201                out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mr, rshift, rcount));
5202                out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mg, gshift, gcount));
5203                out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mb, bshift, bcount));
5204                a = (ma ? stbi__shiftsigned(v & ma, ashift, acount) : 255);
5205                all_a |= a;
5206                if (target == 4) out[z++] = STBI__BYTECAST(a);
5207             }
5208          }
5209          stbi__skip(s, pad);
5210       }
5211    }
5212 
5213    // if alpha channel is all 0s, replace with all 255s
5214    if (target == 4 && all_a == 0)
5215       for (i=4*s->img_x*s->img_y-1; i >= 0; i -= 4)
5216          out[i] = 255;
5217 
5218    if (flip_vertically) {
5219       stbi_uc t;
5220       for (j=0; j < (int) s->img_y>>1; ++j) {
5221          stbi_uc *p1 = out +      j     *s->img_x*target;
5222          stbi_uc *p2 = out + (s->img_y-1-j)*s->img_x*target;
5223          for (i=0; i < (int) s->img_x*target; ++i) {
5224             t = p1[i], p1[i] = p2[i], p2[i] = t;
5225          }
5226       }
5227    }
5228 
5229    if (req_comp && req_comp != target) {
5230       out = stbi__convert_format(out, target, req_comp, s->img_x, s->img_y);
5231       if (out == NULL) return out; // stbi__convert_format frees input on failure
5232    }
5233 
5234    *x = s->img_x;
5235    *y = s->img_y;
5236    if (comp) *comp = s->img_n;
5237    return out;
5238 }
5239 #endif
5240 
5241 // Targa Truevision - TGA
5242 // by Jonathan Dummer
5243 #ifndef STBI_NO_TGA
5244 // returns STBI_rgb or whatever, 0 on error
stbi__tga_get_comp(int bits_per_pixel,int is_grey,int * is_rgb16)5245 static int stbi__tga_get_comp(int bits_per_pixel, int is_grey, int* is_rgb16)
5246 {
5247    // only RGB or RGBA (incl. 16bit) or grey allowed
5248    if(is_rgb16) *is_rgb16 = 0;
5249    switch(bits_per_pixel) {
5250       case 8:  return STBI_grey;
5251       case 16: if(is_grey) return STBI_grey_alpha;
5252             // else: fall-through
5253       case 15: if(is_rgb16) *is_rgb16 = 1;
5254             return STBI_rgb;
5255       case 24: // fall-through
5256       case 32: return bits_per_pixel/8;
5257       default: return 0;
5258    }
5259 }
5260 
stbi__tga_info(stbi__context * s,int * x,int * y,int * comp)5261 static int stbi__tga_info(stbi__context *s, int *x, int *y, int *comp)
5262 {
5263     int tga_w, tga_h, tga_comp, tga_image_type, tga_bits_per_pixel, tga_colormap_bpp;
5264     int sz, tga_colormap_type;
5265     stbi__get8(s);                   // discard Offset
5266     tga_colormap_type = stbi__get8(s); // colormap type
5267     if( tga_colormap_type > 1 ) {
5268         stbi__rewind(s);
5269         return 0;      // only RGB or indexed allowed
5270     }
5271     tga_image_type = stbi__get8(s); // image type
5272     if ( tga_colormap_type == 1 ) { // colormapped (paletted) image
5273         if (tga_image_type != 1 && tga_image_type != 9) {
5274             stbi__rewind(s);
5275             return 0;
5276         }
5277         stbi__skip(s,4);       // skip index of first colormap entry and number of entries
5278         sz = stbi__get8(s);    //   check bits per palette color entry
5279         if ( (sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32) ) {
5280             stbi__rewind(s);
5281             return 0;
5282         }
5283         stbi__skip(s,4);       // skip image x and y origin
5284         tga_colormap_bpp = sz;
5285     } else { // "normal" image w/o colormap - only RGB or grey allowed, +/- RLE
5286         if ( (tga_image_type != 2) && (tga_image_type != 3) && (tga_image_type != 10) && (tga_image_type != 11) ) {
5287             stbi__rewind(s);
5288             return 0; // only RGB or grey allowed, +/- RLE
5289         }
5290         stbi__skip(s,9); // skip colormap specification and image x/y origin
5291         tga_colormap_bpp = 0;
5292     }
5293     tga_w = stbi__get16le(s);
5294     if( tga_w < 1 ) {
5295         stbi__rewind(s);
5296         return 0;   // test width
5297     }
5298     tga_h = stbi__get16le(s);
5299     if( tga_h < 1 ) {
5300         stbi__rewind(s);
5301         return 0;   // test height
5302     }
5303     tga_bits_per_pixel = stbi__get8(s); // bits per pixel
5304     stbi__get8(s); // ignore alpha bits
5305     if (tga_colormap_bpp != 0) {
5306         if((tga_bits_per_pixel != 8) && (tga_bits_per_pixel != 16)) {
5307             // when using a colormap, tga_bits_per_pixel is the size of the indexes
5308             // I don't think anything but 8 or 16bit indexes makes sense
5309             stbi__rewind(s);
5310             return 0;
5311         }
5312         tga_comp = stbi__tga_get_comp(tga_colormap_bpp, 0, NULL);
5313     } else {
5314         tga_comp = stbi__tga_get_comp(tga_bits_per_pixel, (tga_image_type == 3) || (tga_image_type == 11), NULL);
5315     }
5316     if(!tga_comp) {
5317       stbi__rewind(s);
5318       return 0;
5319     }
5320     if (x) *x = tga_w;
5321     if (y) *y = tga_h;
5322     if (comp) *comp = tga_comp;
5323     return 1;                   // seems to have passed everything
5324 }
5325 
stbi__tga_test(stbi__context * s)5326 static int stbi__tga_test(stbi__context *s)
5327 {
5328    int res = 0;
5329    int sz, tga_color_type;
5330    stbi__get8(s);      //   discard Offset
5331    tga_color_type = stbi__get8(s);   //   color type
5332    if ( tga_color_type > 1 ) goto errorEnd;   //   only RGB or indexed allowed
5333    sz = stbi__get8(s);   //   image type
5334    if ( tga_color_type == 1 ) { // colormapped (paletted) image
5335       if (sz != 1 && sz != 9) goto errorEnd; // colortype 1 demands image type 1 or 9
5336       stbi__skip(s,4);       // skip index of first colormap entry and number of entries
5337       sz = stbi__get8(s);    //   check bits per palette color entry
5338       if ( (sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32) ) goto errorEnd;
5339       stbi__skip(s,4);       // skip image x and y origin
5340    } else { // "normal" image w/o colormap
5341       if ( (sz != 2) && (sz != 3) && (sz != 10) && (sz != 11) ) goto errorEnd; // only RGB or grey allowed, +/- RLE
5342       stbi__skip(s,9); // skip colormap specification and image x/y origin
5343    }
5344    if ( stbi__get16le(s) < 1 ) goto errorEnd;      //   test width
5345    if ( stbi__get16le(s) < 1 ) goto errorEnd;      //   test height
5346    sz = stbi__get8(s);   //   bits per pixel
5347    if ( (tga_color_type == 1) && (sz != 8) && (sz != 16) ) goto errorEnd; // for colormapped images, bpp is size of an index
5348    if ( (sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32) ) goto errorEnd;
5349 
5350    res = 1; // if we got this far, everything's good and we can return 1 instead of 0
5351 
5352 errorEnd:
5353    stbi__rewind(s);
5354    return res;
5355 }
5356 
5357 // read 16bit value and convert to 24bit RGB
stbi__tga_read_rgb16(stbi__context * s,stbi_uc * out)5358 static void stbi__tga_read_rgb16(stbi__context *s, stbi_uc* out)
5359 {
5360    stbi__uint16 px = (stbi__uint16)stbi__get16le(s);
5361    stbi__uint16 fiveBitMask = 31;
5362    // we have 3 channels with 5bits each
5363    int r = (px >> 10) & fiveBitMask;
5364    int g = (px >> 5) & fiveBitMask;
5365    int b = px & fiveBitMask;
5366    // Note that this saves the data in RGB(A) order, so it doesn't need to be swapped later
5367    out[0] = (stbi_uc)((r * 255)/31);
5368    out[1] = (stbi_uc)((g * 255)/31);
5369    out[2] = (stbi_uc)((b * 255)/31);
5370 
5371    // some people claim that the most significant bit might be used for alpha
5372    // (possibly if an alpha-bit is set in the "image descriptor byte")
5373    // but that only made 16bit test images completely translucent..
5374    // so let's treat all 15 and 16bit TGAs as RGB with no alpha.
5375 }
5376 
stbi__tga_load(stbi__context * s,int * x,int * y,int * comp,int req_comp,stbi__result_info * ri)5377 static void *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
5378 {
5379    //   read in the TGA header stuff
5380    int tga_offset = stbi__get8(s);
5381    int tga_indexed = stbi__get8(s);
5382    int tga_image_type = stbi__get8(s);
5383    int tga_is_RLE = 0;
5384    int tga_palette_start = stbi__get16le(s);
5385    int tga_palette_len = stbi__get16le(s);
5386    int tga_palette_bits = stbi__get8(s);
5387    int tga_x_origin = stbi__get16le(s);
5388    int tga_y_origin = stbi__get16le(s);
5389    int tga_width = stbi__get16le(s);
5390    int tga_height = stbi__get16le(s);
5391    int tga_bits_per_pixel = stbi__get8(s);
5392    int tga_comp, tga_rgb16=0;
5393    int tga_inverted = stbi__get8(s);
5394    // int tga_alpha_bits = tga_inverted & 15; // the 4 lowest bits - unused (useless?)
5395    //   image data
5396    unsigned char *tga_data;
5397    unsigned char *tga_palette = NULL;
5398    int i, j;
5399    unsigned char raw_data[4] = {0};
5400    int RLE_count = 0;
5401    int RLE_repeating = 0;
5402    int read_next_pixel = 1;
5403    STBI_NOTUSED(ri);
5404 
5405    //   do a tiny bit of precessing
5406    if ( tga_image_type >= 8 )
5407    {
5408       tga_image_type -= 8;
5409       tga_is_RLE = 1;
5410    }
5411    tga_inverted = 1 - ((tga_inverted >> 5) & 1);
5412 
5413    //   If I'm paletted, then I'll use the number of bits from the palette
5414    if ( tga_indexed ) tga_comp = stbi__tga_get_comp(tga_palette_bits, 0, &tga_rgb16);
5415    else tga_comp = stbi__tga_get_comp(tga_bits_per_pixel, (tga_image_type == 3), &tga_rgb16);
5416 
5417    if(!tga_comp) // shouldn't really happen, stbi__tga_test() should have ensured basic consistency
5418       return stbi__errpuc("bad format", "Can't find out TGA pixelformat");
5419 
5420    //   tga info
5421    *x = tga_width;
5422    *y = tga_height;
5423    if (comp) *comp = tga_comp;
5424 
5425    if (!stbi__mad3sizes_valid(tga_width, tga_height, tga_comp, 0))
5426       return stbi__errpuc("too large", "Corrupt TGA");
5427 
5428    tga_data = (unsigned char*)stbi__malloc_mad3(tga_width, tga_height, tga_comp, 0);
5429    if (!tga_data) return stbi__errpuc("outofmem", "Out of memory");
5430 
5431    // skip to the data's starting position (offset usually = 0)
5432    stbi__skip(s, tga_offset );
5433 
5434    if ( !tga_indexed && !tga_is_RLE && !tga_rgb16 ) {
5435       for (i=0; i < tga_height; ++i) {
5436          int row = tga_inverted ? tga_height -i - 1 : i;
5437          stbi_uc *tga_row = tga_data + row*tga_width*tga_comp;
5438          stbi__getn(s, tga_row, tga_width * tga_comp);
5439       }
5440    } else  {
5441       //   do I need to load a palette?
5442       if ( tga_indexed)
5443       {
5444          //   any data to skip? (offset usually = 0)
5445          stbi__skip(s, tga_palette_start );
5446          //   load the palette
5447          tga_palette = (unsigned char*)stbi__malloc_mad2(tga_palette_len, tga_comp, 0);
5448          if (!tga_palette) {
5449             STBI_FREE(tga_data);
5450             return stbi__errpuc("outofmem", "Out of memory");
5451          }
5452          if (tga_rgb16) {
5453             stbi_uc *pal_entry = tga_palette;
5454             STBI_ASSERT(tga_comp == STBI_rgb);
5455             for (i=0; i < tga_palette_len; ++i) {
5456                stbi__tga_read_rgb16(s, pal_entry);
5457                pal_entry += tga_comp;
5458             }
5459          } else if (!stbi__getn(s, tga_palette, tga_palette_len * tga_comp)) {
5460                STBI_FREE(tga_data);
5461                STBI_FREE(tga_palette);
5462                return stbi__errpuc("bad palette", "Corrupt TGA");
5463          }
5464       }
5465       //   load the data
5466       for (i=0; i < tga_width * tga_height; ++i)
5467       {
5468          //   if I'm in RLE mode, do I need to get a RLE stbi__pngchunk?
5469          if ( tga_is_RLE )
5470          {
5471             if ( RLE_count == 0 )
5472             {
5473                //   yep, get the next byte as a RLE command
5474                int RLE_cmd = stbi__get8(s);
5475                RLE_count = 1 + (RLE_cmd & 127);
5476                RLE_repeating = RLE_cmd >> 7;
5477                read_next_pixel = 1;
5478             } else if ( !RLE_repeating )
5479             {
5480                read_next_pixel = 1;
5481             }
5482          } else
5483          {
5484             read_next_pixel = 1;
5485          }
5486          //   OK, if I need to read a pixel, do it now
5487          if ( read_next_pixel )
5488          {
5489             //   load however much data we did have
5490             if ( tga_indexed )
5491             {
5492                // read in index, then perform the lookup
5493                int pal_idx = (tga_bits_per_pixel == 8) ? stbi__get8(s) : stbi__get16le(s);
5494                if ( pal_idx >= tga_palette_len ) {
5495                   // invalid index
5496                   pal_idx = 0;
5497                }
5498                pal_idx *= tga_comp;
5499                for (j = 0; j < tga_comp; ++j) {
5500                   raw_data[j] = tga_palette[pal_idx+j];
5501                }
5502             } else if(tga_rgb16) {
5503                STBI_ASSERT(tga_comp == STBI_rgb);
5504                stbi__tga_read_rgb16(s, raw_data);
5505             } else {
5506                //   read in the data raw
5507                for (j = 0; j < tga_comp; ++j) {
5508                   raw_data[j] = stbi__get8(s);
5509                }
5510             }
5511             //   clear the reading flag for the next pixel
5512             read_next_pixel = 0;
5513          } // end of reading a pixel
5514 
5515          // copy data
5516          for (j = 0; j < tga_comp; ++j)
5517            tga_data[i*tga_comp+j] = raw_data[j];
5518 
5519          //   in case we're in RLE mode, keep counting down
5520          --RLE_count;
5521       }
5522       //   do I need to invert the image?
5523       if ( tga_inverted )
5524       {
5525          for (j = 0; j*2 < tga_height; ++j)
5526          {
5527             int index1 = j * tga_width * tga_comp;
5528             int index2 = (tga_height - 1 - j) * tga_width * tga_comp;
5529             for (i = tga_width * tga_comp; i > 0; --i)
5530             {
5531                unsigned char temp = tga_data[index1];
5532                tga_data[index1] = tga_data[index2];
5533                tga_data[index2] = temp;
5534                ++index1;
5535                ++index2;
5536             }
5537          }
5538       }
5539       //   clear my palette, if I had one
5540       if ( tga_palette != NULL )
5541       {
5542          STBI_FREE( tga_palette );
5543       }
5544    }
5545 
5546    // swap RGB - if the source data was RGB16, it already is in the right order
5547    if (tga_comp >= 3 && !tga_rgb16)
5548    {
5549       unsigned char* tga_pixel = tga_data;
5550       for (i=0; i < tga_width * tga_height; ++i)
5551       {
5552          unsigned char temp = tga_pixel[0];
5553          tga_pixel[0] = tga_pixel[2];
5554          tga_pixel[2] = temp;
5555          tga_pixel += tga_comp;
5556       }
5557    }
5558 
5559    // convert to target component count
5560    if (req_comp && req_comp != tga_comp)
5561       tga_data = stbi__convert_format(tga_data, tga_comp, req_comp, tga_width, tga_height);
5562 
5563    //   the things I do to get rid of an error message, and yet keep
5564    //   Microsoft's C compilers happy... [8^(
5565    tga_palette_start = tga_palette_len = tga_palette_bits =
5566          tga_x_origin = tga_y_origin = 0;
5567    //   OK, done
5568    return tga_data;
5569 }
5570 #endif
5571 
5572 // *************************************************************************************************
5573 // Photoshop PSD loader -- PD by Thatcher Ulrich, integration by Nicolas Schulz, tweaked by STB
5574 
5575 #ifndef STBI_NO_PSD
stbi__psd_test(stbi__context * s)5576 static int stbi__psd_test(stbi__context *s)
5577 {
5578    int r = (stbi__get32be(s) == 0x38425053);
5579    stbi__rewind(s);
5580    return r;
5581 }
5582 
stbi__psd_decode_rle(stbi__context * s,stbi_uc * p,int pixelCount)5583 static int stbi__psd_decode_rle(stbi__context *s, stbi_uc *p, int pixelCount)
5584 {
5585    int count, nleft, len;
5586 
5587    count = 0;
5588    while ((nleft = pixelCount - count) > 0) {
5589       len = stbi__get8(s);
5590       if (len == 128) {
5591          // No-op.
5592       } else if (len < 128) {
5593          // Copy next len+1 bytes literally.
5594          len++;
5595          if (len > nleft) return 0; // corrupt data
5596          count += len;
5597          while (len) {
5598             *p = stbi__get8(s);
5599             p += 4;
5600             len--;
5601          }
5602       } else if (len > 128) {
5603          stbi_uc   val;
5604          // Next -len+1 bytes in the dest are replicated from next source byte.
5605          // (Interpret len as a negative 8-bit int.)
5606          len = 257 - len;
5607          if (len > nleft) return 0; // corrupt data
5608          val = stbi__get8(s);
5609          count += len;
5610          while (len) {
5611             *p = val;
5612             p += 4;
5613             len--;
5614          }
5615       }
5616    }
5617 
5618    return 1;
5619 }
5620 
stbi__psd_load(stbi__context * s,int * x,int * y,int * comp,int req_comp,stbi__result_info * ri,int bpc)5621 static void *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc)
5622 {
5623    int pixelCount;
5624    int channelCount, compression;
5625    int channel, i;
5626    int bitdepth;
5627    int w,h;
5628    stbi_uc *out;
5629    STBI_NOTUSED(ri);
5630 
5631    // Check identifier
5632    if (stbi__get32be(s) != 0x38425053)   // "8BPS"
5633       return stbi__errpuc("not PSD", "Corrupt PSD image");
5634 
5635    // Check file type version.
5636    if (stbi__get16be(s) != 1)
5637       return stbi__errpuc("wrong version", "Unsupported version of PSD image");
5638 
5639    // Skip 6 reserved bytes.
5640    stbi__skip(s, 6 );
5641 
5642    // Read the number of channels (R, G, B, A, etc).
5643    channelCount = stbi__get16be(s);
5644    if (channelCount < 0 || channelCount > 16)
5645       return stbi__errpuc("wrong channel count", "Unsupported number of channels in PSD image");
5646 
5647    // Read the rows and columns of the image.
5648    h = stbi__get32be(s);
5649    w = stbi__get32be(s);
5650 
5651    // Make sure the depth is 8 bits.
5652    bitdepth = stbi__get16be(s);
5653    if (bitdepth != 8 && bitdepth != 16)
5654       return stbi__errpuc("unsupported bit depth", "PSD bit depth is not 8 or 16 bit");
5655 
5656    // Make sure the color mode is RGB.
5657    // Valid options are:
5658    //   0: Bitmap
5659    //   1: Grayscale
5660    //   2: Indexed color
5661    //   3: RGB color
5662    //   4: CMYK color
5663    //   7: Multichannel
5664    //   8: Duotone
5665    //   9: Lab color
5666    if (stbi__get16be(s) != 3)
5667       return stbi__errpuc("wrong color format", "PSD is not in RGB color format");
5668 
5669    // Skip the Mode Data.  (It's the palette for indexed color; other info for other modes.)
5670    stbi__skip(s,stbi__get32be(s) );
5671 
5672    // Skip the image resources.  (resolution, pen tool paths, etc)
5673    stbi__skip(s, stbi__get32be(s) );
5674 
5675    // Skip the reserved data.
5676    stbi__skip(s, stbi__get32be(s) );
5677 
5678    // Find out if the data is compressed.
5679    // Known values:
5680    //   0: no compression
5681    //   1: RLE compressed
5682    compression = stbi__get16be(s);
5683    if (compression > 1)
5684       return stbi__errpuc("bad compression", "PSD has an unknown compression format");
5685 
5686    // Check size
5687    if (!stbi__mad3sizes_valid(4, w, h, 0))
5688       return stbi__errpuc("too large", "Corrupt PSD");
5689 
5690    // Create the destination image.
5691 
5692    if (!compression && bitdepth == 16 && bpc == 16) {
5693       out = (stbi_uc *) stbi__malloc_mad3(8, w, h, 0);
5694       ri->bits_per_channel = 16;
5695    } else
5696       out = (stbi_uc *) stbi__malloc(4 * w*h);
5697 
5698    if (!out) return stbi__errpuc("outofmem", "Out of memory");
5699    pixelCount = w*h;
5700 
5701    // Initialize the data to zero.
5702    //memset( out, 0, pixelCount * 4 );
5703 
5704    // Finally, the image data.
5705    if (compression) {
5706       // RLE as used by .PSD and .TIFF
5707       // Loop until you get the number of unpacked bytes you are expecting:
5708       //     Read the next source byte into n.
5709       //     If n is between 0 and 127 inclusive, copy the next n+1 bytes literally.
5710       //     Else if n is between -127 and -1 inclusive, copy the next byte -n+1 times.
5711       //     Else if n is 128, noop.
5712       // Endloop
5713 
5714       // The RLE-compressed data is preceeded by a 2-byte data count for each row in the data,
5715       // which we're going to just skip.
5716       stbi__skip(s, h * channelCount * 2 );
5717 
5718       // Read the RLE data by channel.
5719       for (channel = 0; channel < 4; channel++) {
5720          stbi_uc *p;
5721 
5722          p = out+channel;
5723          if (channel >= channelCount) {
5724             // Fill this channel with default data.
5725             for (i = 0; i < pixelCount; i++, p += 4)
5726                *p = (channel == 3 ? 255 : 0);
5727          } else {
5728             // Read the RLE data.
5729             if (!stbi__psd_decode_rle(s, p, pixelCount)) {
5730                STBI_FREE(out);
5731                return stbi__errpuc("corrupt", "bad RLE data");
5732             }
5733          }
5734       }
5735 
5736    } else {
5737       // We're at the raw image data.  It's each channel in order (Red, Green, Blue, Alpha, ...)
5738       // where each channel consists of an 8-bit (or 16-bit) value for each pixel in the image.
5739 
5740       // Read the data by channel.
5741       for (channel = 0; channel < 4; channel++) {
5742          if (channel >= channelCount) {
5743             // Fill this channel with default data.
5744             if (bitdepth == 16 && bpc == 16) {
5745                stbi__uint16 *q = ((stbi__uint16 *) out) + channel;
5746                stbi__uint16 val = channel == 3 ? 65535 : 0;
5747                for (i = 0; i < pixelCount; i++, q += 4)
5748                   *q = val;
5749             } else {
5750                stbi_uc *p = out+channel;
5751                stbi_uc val = channel == 3 ? 255 : 0;
5752                for (i = 0; i < pixelCount; i++, p += 4)
5753                   *p = val;
5754             }
5755          } else {
5756             if (ri->bits_per_channel == 16) {    // output bpc
5757                stbi__uint16 *q = ((stbi__uint16 *) out) + channel;
5758                for (i = 0; i < pixelCount; i++, q += 4)
5759                   *q = (stbi__uint16) stbi__get16be(s);
5760             } else {
5761                stbi_uc *p = out+channel;
5762                if (bitdepth == 16) {  // input bpc
5763                   for (i = 0; i < pixelCount; i++, p += 4)
5764                      *p = (stbi_uc) (stbi__get16be(s) >> 8);
5765                } else {
5766                   for (i = 0; i < pixelCount; i++, p += 4)
5767                      *p = stbi__get8(s);
5768                }
5769             }
5770          }
5771       }
5772    }
5773 
5774    // remove weird white matte from PSD
5775    if (channelCount >= 4) {
5776       if (ri->bits_per_channel == 16) {
5777          for (i=0; i < w*h; ++i) {
5778             stbi__uint16 *pixel = (stbi__uint16 *) out + 4*i;
5779             if (pixel[3] != 0 && pixel[3] != 65535) {
5780                float a = pixel[3] / 65535.0f;
5781                float ra = 1.0f / a;
5782                float inv_a = 65535.0f * (1 - ra);
5783                pixel[0] = (stbi__uint16) (pixel[0]*ra + inv_a);
5784                pixel[1] = (stbi__uint16) (pixel[1]*ra + inv_a);
5785                pixel[2] = (stbi__uint16) (pixel[2]*ra + inv_a);
5786             }
5787          }
5788       } else {
5789          for (i=0; i < w*h; ++i) {
5790             unsigned char *pixel = out + 4*i;
5791             if (pixel[3] != 0 && pixel[3] != 255) {
5792                float a = pixel[3] / 255.0f;
5793                float ra = 1.0f / a;
5794                float inv_a = 255.0f * (1 - ra);
5795                pixel[0] = (unsigned char) (pixel[0]*ra + inv_a);
5796                pixel[1] = (unsigned char) (pixel[1]*ra + inv_a);
5797                pixel[2] = (unsigned char) (pixel[2]*ra + inv_a);
5798             }
5799          }
5800       }
5801    }
5802 
5803    // convert to desired output format
5804    if (req_comp && req_comp != 4) {
5805       if (ri->bits_per_channel == 16)
5806          out = (stbi_uc *) stbi__convert_format16((stbi__uint16 *) out, 4, req_comp, w, h);
5807       else
5808          out = stbi__convert_format(out, 4, req_comp, w, h);
5809       if (out == NULL) return out; // stbi__convert_format frees input on failure
5810    }
5811 
5812    if (comp) *comp = 4;
5813    *y = h;
5814    *x = w;
5815 
5816    return out;
5817 }
5818 #endif
5819 
5820 // *************************************************************************************************
5821 // Softimage PIC loader
5822 // by Tom Seddon
5823 //
5824 // See http://softimage.wiki.softimage.com/index.php/INFO:_PIC_file_format
5825 // See http://ozviz.wasp.uwa.edu.au/~pbourke/dataformats/softimagepic/
5826 
5827 #ifndef STBI_NO_PIC
stbi__pic_is4(stbi__context * s,const char * str)5828 static int stbi__pic_is4(stbi__context *s,const char *str)
5829 {
5830    int i;
5831    for (i=0; i<4; ++i)
5832       if (stbi__get8(s) != (stbi_uc)str[i])
5833          return 0;
5834 
5835    return 1;
5836 }
5837 
stbi__pic_test_core(stbi__context * s)5838 static int stbi__pic_test_core(stbi__context *s)
5839 {
5840    int i;
5841 
5842    if (!stbi__pic_is4(s,"\x53\x80\xF6\x34"))
5843       return 0;
5844 
5845    for(i=0;i<84;++i)
5846       stbi__get8(s);
5847 
5848    if (!stbi__pic_is4(s,"PICT"))
5849       return 0;
5850 
5851    return 1;
5852 }
5853 
5854 typedef struct
5855 {
5856    stbi_uc size,type,channel;
5857 } stbi__pic_packet;
5858 
stbi__readval(stbi__context * s,int channel,stbi_uc * dest)5859 static stbi_uc *stbi__readval(stbi__context *s, int channel, stbi_uc *dest)
5860 {
5861    int mask=0x80, i;
5862 
5863    for (i=0; i<4; ++i, mask>>=1) {
5864       if (channel & mask) {
5865          if (stbi__at_eof(s)) return stbi__errpuc("bad file","PIC file too short");
5866          dest[i]=stbi__get8(s);
5867       }
5868    }
5869 
5870    return dest;
5871 }
5872 
stbi__copyval(int channel,stbi_uc * dest,const stbi_uc * src)5873 static void stbi__copyval(int channel,stbi_uc *dest,const stbi_uc *src)
5874 {
5875    int mask=0x80,i;
5876 
5877    for (i=0;i<4; ++i, mask>>=1)
5878       if (channel&mask)
5879          dest[i]=src[i];
5880 }
5881 
stbi__pic_load_core(stbi__context * s,int width,int height,int * comp,stbi_uc * result)5882 static stbi_uc *stbi__pic_load_core(stbi__context *s,int width,int height,int *comp, stbi_uc *result)
5883 {
5884    int act_comp=0,num_packets=0,y,chained;
5885    stbi__pic_packet packets[10];
5886 
5887    // this will (should...) cater for even some bizarre stuff like having data
5888     // for the same channel in multiple packets.
5889    do {
5890       stbi__pic_packet *packet;
5891 
5892       if (num_packets==sizeof(packets)/sizeof(packets[0]))
5893          return stbi__errpuc("bad format","too many packets");
5894 
5895       packet = &packets[num_packets++];
5896 
5897       chained = stbi__get8(s);
5898       packet->size    = stbi__get8(s);
5899       packet->type    = stbi__get8(s);
5900       packet->channel = stbi__get8(s);
5901 
5902       act_comp |= packet->channel;
5903 
5904       if (stbi__at_eof(s))          return stbi__errpuc("bad file","file too short (reading packets)");
5905       if (packet->size != 8)  return stbi__errpuc("bad format","packet isn't 8bpp");
5906    } while (chained);
5907 
5908    *comp = (act_comp & 0x10 ? 4 : 3); // has alpha channel?
5909 
5910    for(y=0; y<height; ++y) {
5911       int packet_idx;
5912 
5913       for(packet_idx=0; packet_idx < num_packets; ++packet_idx) {
5914          stbi__pic_packet *packet = &packets[packet_idx];
5915          stbi_uc *dest = result+y*width*4;
5916 
5917          switch (packet->type) {
5918             default:
5919                return stbi__errpuc("bad format","packet has bad compression type");
5920 
5921             case 0: {//uncompressed
5922                int x;
5923 
5924                for(x=0;x<width;++x, dest+=4)
5925                   if (!stbi__readval(s,packet->channel,dest))
5926                      return 0;
5927                break;
5928             }
5929 
5930             case 1://Pure RLE
5931                {
5932                   int left=width, i;
5933 
5934                   while (left>0) {
5935                      stbi_uc count,value[4];
5936 
5937                      count=stbi__get8(s);
5938                      if (stbi__at_eof(s))   return stbi__errpuc("bad file","file too short (pure read count)");
5939 
5940                      if (count > left)
5941                         count = (stbi_uc) left;
5942 
5943                      if (!stbi__readval(s,packet->channel,value))  return 0;
5944 
5945                      for(i=0; i<count; ++i,dest+=4)
5946                         stbi__copyval(packet->channel,dest,value);
5947                      left -= count;
5948                   }
5949                }
5950                break;
5951 
5952             case 2: {//Mixed RLE
5953                int left=width;
5954                while (left>0) {
5955                   int count = stbi__get8(s), i;
5956                   if (stbi__at_eof(s))  return stbi__errpuc("bad file","file too short (mixed read count)");
5957 
5958                   if (count >= 128) { // Repeated
5959                      stbi_uc value[4];
5960 
5961                      if (count==128)
5962                         count = stbi__get16be(s);
5963                      else
5964                         count -= 127;
5965                      if (count > left)
5966                         return stbi__errpuc("bad file","scanline overrun");
5967 
5968                      if (!stbi__readval(s,packet->channel,value))
5969                         return 0;
5970 
5971                      for(i=0;i<count;++i, dest += 4)
5972                         stbi__copyval(packet->channel,dest,value);
5973                   } else { // Raw
5974                      ++count;
5975                      if (count>left) return stbi__errpuc("bad file","scanline overrun");
5976 
5977                      for(i=0;i<count;++i, dest+=4)
5978                         if (!stbi__readval(s,packet->channel,dest))
5979                            return 0;
5980                   }
5981                   left-=count;
5982                }
5983                break;
5984             }
5985          }
5986       }
5987    }
5988 
5989    return result;
5990 }
5991 
stbi__pic_load(stbi__context * s,int * px,int * py,int * comp,int req_comp,stbi__result_info * ri)5992 static void *stbi__pic_load(stbi__context *s,int *px,int *py,int *comp,int req_comp, stbi__result_info *ri)
5993 {
5994    stbi_uc *result;
5995    int i, x,y, internal_comp;
5996    STBI_NOTUSED(ri);
5997 
5998    if (!comp) comp = &internal_comp;
5999 
6000    for (i=0; i<92; ++i)
6001       stbi__get8(s);
6002 
6003    x = stbi__get16be(s);
6004    y = stbi__get16be(s);
6005    if (stbi__at_eof(s))  return stbi__errpuc("bad file","file too short (pic header)");
6006    if (!stbi__mad3sizes_valid(x, y, 4, 0)) return stbi__errpuc("too large", "PIC image too large to decode");
6007 
6008    stbi__get32be(s); //skip `ratio'
6009    stbi__get16be(s); //skip `fields'
6010    stbi__get16be(s); //skip `pad'
6011 
6012    // intermediate buffer is RGBA
6013    result = (stbi_uc *) stbi__malloc_mad3(x, y, 4, 0);
6014    memset(result, 0xff, x*y*4);
6015 
6016    if (!stbi__pic_load_core(s,x,y,comp, result)) {
6017       STBI_FREE(result);
6018       result=0;
6019    }
6020    *px = x;
6021    *py = y;
6022    if (req_comp == 0) req_comp = *comp;
6023    result=stbi__convert_format(result,4,req_comp,x,y);
6024 
6025    return result;
6026 }
6027 
stbi__pic_test(stbi__context * s)6028 static int stbi__pic_test(stbi__context *s)
6029 {
6030    int r = stbi__pic_test_core(s);
6031    stbi__rewind(s);
6032    return r;
6033 }
6034 #endif
6035 
6036 // *************************************************************************************************
6037 // GIF loader -- public domain by Jean-Marc Lienher -- simplified/shrunk by stb
6038 
6039 #ifndef STBI_NO_GIF
6040 typedef struct
6041 {
6042    stbi__int16 prefix;
6043    stbi_uc first;
6044    stbi_uc suffix;
6045 } stbi__gif_lzw;
6046 
6047 typedef struct
6048 {
6049    int w,h;
6050    stbi_uc *out, *old_out;             // output buffer (always 4 components)
6051    int flags, bgindex, ratio, transparent, eflags, delay;
6052    stbi_uc  pal[256][4];
6053    stbi_uc lpal[256][4];
6054    stbi__gif_lzw codes[4096];
6055    stbi_uc *color_table;
6056    int parse, step;
6057    int lflags;
6058    int start_x, start_y;
6059    int max_x, max_y;
6060    int cur_x, cur_y;
6061    int line_size;
6062 } stbi__gif;
6063 
stbi__gif_test_raw(stbi__context * s)6064 static int stbi__gif_test_raw(stbi__context *s)
6065 {
6066    int sz;
6067    if (stbi__get8(s) != 'G' || stbi__get8(s) != 'I' || stbi__get8(s) != 'F' || stbi__get8(s) != '8') return 0;
6068    sz = stbi__get8(s);
6069    if (sz != '9' && sz != '7') return 0;
6070    if (stbi__get8(s) != 'a') return 0;
6071    return 1;
6072 }
6073 
stbi__gif_test(stbi__context * s)6074 static int stbi__gif_test(stbi__context *s)
6075 {
6076    int r = stbi__gif_test_raw(s);
6077    stbi__rewind(s);
6078    return r;
6079 }
6080 
stbi__gif_parse_colortable(stbi__context * s,stbi_uc pal[256][4],int num_entries,int transp)6081 static void stbi__gif_parse_colortable(stbi__context *s, stbi_uc pal[256][4], int num_entries, int transp)
6082 {
6083    int i;
6084    for (i=0; i < num_entries; ++i) {
6085       pal[i][2] = stbi__get8(s);
6086       pal[i][1] = stbi__get8(s);
6087       pal[i][0] = stbi__get8(s);
6088       pal[i][3] = transp == i ? 0 : 255;
6089    }
6090 }
6091 
stbi__gif_header(stbi__context * s,stbi__gif * g,int * comp,int is_info)6092 static int stbi__gif_header(stbi__context *s, stbi__gif *g, int *comp, int is_info)
6093 {
6094    stbi_uc version;
6095    if (stbi__get8(s) != 'G' || stbi__get8(s) != 'I' || stbi__get8(s) != 'F' || stbi__get8(s) != '8')
6096       return stbi__err("not GIF", "Corrupt GIF");
6097 
6098    version = stbi__get8(s);
6099    if (version != '7' && version != '9')    return stbi__err("not GIF", "Corrupt GIF");
6100    if (stbi__get8(s) != 'a')                return stbi__err("not GIF", "Corrupt GIF");
6101 
6102    stbi__g_failure_reason = "";
6103    g->w = stbi__get16le(s);
6104    g->h = stbi__get16le(s);
6105    g->flags = stbi__get8(s);
6106    g->bgindex = stbi__get8(s);
6107    g->ratio = stbi__get8(s);
6108    g->transparent = -1;
6109 
6110    if (comp != 0) *comp = 4;  // can't actually tell whether it's 3 or 4 until we parse the comments
6111 
6112    if (is_info) return 1;
6113 
6114    if (g->flags & 0x80)
6115       stbi__gif_parse_colortable(s,g->pal, 2 << (g->flags & 7), -1);
6116 
6117    return 1;
6118 }
6119 
stbi__gif_info_raw(stbi__context * s,int * x,int * y,int * comp)6120 static int stbi__gif_info_raw(stbi__context *s, int *x, int *y, int *comp)
6121 {
6122    stbi__gif* g = (stbi__gif*) stbi__malloc(sizeof(stbi__gif));
6123    if (!stbi__gif_header(s, g, comp, 1)) {
6124       STBI_FREE(g);
6125       stbi__rewind( s );
6126       return 0;
6127    }
6128    if (x) *x = g->w;
6129    if (y) *y = g->h;
6130    STBI_FREE(g);
6131    return 1;
6132 }
6133 
stbi__out_gif_code(stbi__gif * g,stbi__uint16 code)6134 static void stbi__out_gif_code(stbi__gif *g, stbi__uint16 code)
6135 {
6136    stbi_uc *p, *c;
6137 
6138    // recurse to decode the prefixes, since the linked-list is backwards,
6139    // and working backwards through an interleaved image would be nasty
6140    if (g->codes[code].prefix >= 0)
6141       stbi__out_gif_code(g, g->codes[code].prefix);
6142 
6143    if (g->cur_y >= g->max_y) return;
6144 
6145    p = &g->out[g->cur_x + g->cur_y];
6146    c = &g->color_table[g->codes[code].suffix * 4];
6147 
6148    if (c[3] >= 128) {
6149       p[0] = c[2];
6150       p[1] = c[1];
6151       p[2] = c[0];
6152       p[3] = c[3];
6153    }
6154    g->cur_x += 4;
6155 
6156    if (g->cur_x >= g->max_x) {
6157       g->cur_x = g->start_x;
6158       g->cur_y += g->step;
6159 
6160       while (g->cur_y >= g->max_y && g->parse > 0) {
6161          g->step = (1 << g->parse) * g->line_size;
6162          g->cur_y = g->start_y + (g->step >> 1);
6163          --g->parse;
6164       }
6165    }
6166 }
6167 
stbi__process_gif_raster(stbi__context * s,stbi__gif * g)6168 static stbi_uc *stbi__process_gif_raster(stbi__context *s, stbi__gif *g)
6169 {
6170    stbi_uc lzw_cs;
6171    stbi__int32 len, init_code;
6172    stbi__uint32 first;
6173    stbi__int32 codesize, codemask, avail, oldcode, bits, valid_bits, clear;
6174    stbi__gif_lzw *p;
6175 
6176    lzw_cs = stbi__get8(s);
6177    if (lzw_cs > 12) return NULL;
6178    clear = 1 << lzw_cs;
6179    first = 1;
6180    codesize = lzw_cs + 1;
6181    codemask = (1 << codesize) - 1;
6182    bits = 0;
6183    valid_bits = 0;
6184    for (init_code = 0; init_code < clear; init_code++) {
6185       g->codes[init_code].prefix = -1;
6186       g->codes[init_code].first = (stbi_uc) init_code;
6187       g->codes[init_code].suffix = (stbi_uc) init_code;
6188    }
6189 
6190    // support no starting clear code
6191    avail = clear+2;
6192    oldcode = -1;
6193 
6194    len = 0;
6195    for(;;) {
6196       if (valid_bits < codesize) {
6197          if (len == 0) {
6198             len = stbi__get8(s); // start new block
6199             if (len == 0)
6200                return g->out;
6201          }
6202          --len;
6203          bits |= (stbi__int32) stbi__get8(s) << valid_bits;
6204          valid_bits += 8;
6205       } else {
6206          stbi__int32 code = bits & codemask;
6207          bits >>= codesize;
6208          valid_bits -= codesize;
6209          // @OPTIMIZE: is there some way we can accelerate the non-clear path?
6210          if (code == clear) {  // clear code
6211             codesize = lzw_cs + 1;
6212             codemask = (1 << codesize) - 1;
6213             avail = clear + 2;
6214             oldcode = -1;
6215             first = 0;
6216          } else if (code == clear + 1) { // end of stream code
6217             stbi__skip(s, len);
6218             while ((len = stbi__get8(s)) > 0)
6219                stbi__skip(s,len);
6220             return g->out;
6221          } else if (code <= avail) {
6222             if (first) return stbi__errpuc("no clear code", "Corrupt GIF");
6223 
6224             if (oldcode >= 0) {
6225                p = &g->codes[avail++];
6226                if (avail > 4096)        return stbi__errpuc("too many codes", "Corrupt GIF");
6227                p->prefix = (stbi__int16) oldcode;
6228                p->first = g->codes[oldcode].first;
6229                p->suffix = (code == avail) ? p->first : g->codes[code].first;
6230             } else if (code == avail)
6231                return stbi__errpuc("illegal code in raster", "Corrupt GIF");
6232 
6233             stbi__out_gif_code(g, (stbi__uint16) code);
6234 
6235             if ((avail & codemask) == 0 && avail <= 0x0FFF) {
6236                codesize++;
6237                codemask = (1 << codesize) - 1;
6238             }
6239 
6240             oldcode = code;
6241          } else {
6242             return stbi__errpuc("illegal code in raster", "Corrupt GIF");
6243          }
6244       }
6245    }
6246 }
6247 
stbi__fill_gif_background(stbi__gif * g,int x0,int y0,int x1,int y1)6248 static void stbi__fill_gif_background(stbi__gif *g, int x0, int y0, int x1, int y1)
6249 {
6250    int x, y;
6251    stbi_uc *c = g->pal[g->bgindex];
6252    for (y = y0; y < y1; y += 4 * g->w) {
6253       for (x = x0; x < x1; x += 4) {
6254          stbi_uc *p  = &g->out[y + x];
6255          p[0] = c[2];
6256          p[1] = c[1];
6257          p[2] = c[0];
6258          p[3] = 0;
6259       }
6260    }
6261 }
6262 
6263 // this function is designed to support animated gifs, although stb_image doesn't support it
stbi__gif_load_next(stbi__context * s,stbi__gif * g,int * comp,int req_comp)6264 static stbi_uc *stbi__gif_load_next(stbi__context *s, stbi__gif *g, int *comp, int req_comp)
6265 {
6266    int i;
6267    stbi_uc *prev_out = 0;
6268 
6269    if (g->out == 0 && !stbi__gif_header(s, g, comp,0))
6270       return 0; // stbi__g_failure_reason set by stbi__gif_header
6271 
6272    if (!stbi__mad3sizes_valid(g->w, g->h, 4, 0))
6273       return stbi__errpuc("too large", "GIF too large");
6274 
6275    prev_out = g->out;
6276    g->out = (stbi_uc *) stbi__malloc_mad3(4, g->w, g->h, 0);
6277    if (g->out == 0) return stbi__errpuc("outofmem", "Out of memory");
6278 
6279    switch ((g->eflags & 0x1C) >> 2) {
6280       case 0: // unspecified (also always used on 1st frame)
6281          stbi__fill_gif_background(g, 0, 0, 4 * g->w, 4 * g->w * g->h);
6282          break;
6283       case 1: // do not dispose
6284          if (prev_out) memcpy(g->out, prev_out, 4 * g->w * g->h);
6285          g->old_out = prev_out;
6286          break;
6287       case 2: // dispose to background
6288          if (prev_out) memcpy(g->out, prev_out, 4 * g->w * g->h);
6289          stbi__fill_gif_background(g, g->start_x, g->start_y, g->max_x, g->max_y);
6290          break;
6291       case 3: // dispose to previous
6292          if (g->old_out) {
6293             for (i = g->start_y; i < g->max_y; i += 4 * g->w)
6294                memcpy(&g->out[i + g->start_x], &g->old_out[i + g->start_x], g->max_x - g->start_x);
6295          }
6296          break;
6297    }
6298 
6299    for (;;) {
6300       switch (stbi__get8(s)) {
6301          case 0x2C: /* Image Descriptor */
6302          {
6303             int prev_trans = -1;
6304             stbi__int32 x, y, w, h;
6305             stbi_uc *o;
6306 
6307             x = stbi__get16le(s);
6308             y = stbi__get16le(s);
6309             w = stbi__get16le(s);
6310             h = stbi__get16le(s);
6311             if (((x + w) > (g->w)) || ((y + h) > (g->h)))
6312                return stbi__errpuc("bad Image Descriptor", "Corrupt GIF");
6313 
6314             g->line_size = g->w * 4;
6315             g->start_x = x * 4;
6316             g->start_y = y * g->line_size;
6317             g->max_x   = g->start_x + w * 4;
6318             g->max_y   = g->start_y + h * g->line_size;
6319             g->cur_x   = g->start_x;
6320             g->cur_y   = g->start_y;
6321 
6322             g->lflags = stbi__get8(s);
6323 
6324             if (g->lflags & 0x40) {
6325                g->step = 8 * g->line_size; // first interlaced spacing
6326                g->parse = 3;
6327             } else {
6328                g->step = g->line_size;
6329                g->parse = 0;
6330             }
6331 
6332             if (g->lflags & 0x80) {
6333                stbi__gif_parse_colortable(s,g->lpal, 2 << (g->lflags & 7), g->eflags & 0x01 ? g->transparent : -1);
6334                g->color_table = (stbi_uc *) g->lpal;
6335             } else if (g->flags & 0x80) {
6336                if (g->transparent >= 0 && (g->eflags & 0x01)) {
6337                   prev_trans = g->pal[g->transparent][3];
6338                   g->pal[g->transparent][3] = 0;
6339                }
6340                g->color_table = (stbi_uc *) g->pal;
6341             } else
6342                return stbi__errpuc("missing color table", "Corrupt GIF");
6343 
6344             o = stbi__process_gif_raster(s, g);
6345             if (o == NULL) return NULL;
6346 
6347             if (prev_trans != -1)
6348                g->pal[g->transparent][3] = (stbi_uc) prev_trans;
6349 
6350             return o;
6351          }
6352 
6353          case 0x21: // Comment Extension.
6354          {
6355             int len;
6356             if (stbi__get8(s) == 0xF9) { // Graphic Control Extension.
6357                len = stbi__get8(s);
6358                if (len == 4) {
6359                   g->eflags = stbi__get8(s);
6360                   g->delay = stbi__get16le(s);
6361                   g->transparent = stbi__get8(s);
6362                } else {
6363                   stbi__skip(s, len);
6364                   break;
6365                }
6366             }
6367             while ((len = stbi__get8(s)) != 0)
6368                stbi__skip(s, len);
6369             break;
6370          }
6371 
6372          case 0x3B: // gif stream termination code
6373             return (stbi_uc *) s; // using '1' causes warning on some compilers
6374 
6375          default:
6376             return stbi__errpuc("unknown code", "Corrupt GIF");
6377       }
6378    }
6379 
6380    STBI_NOTUSED(req_comp);
6381 }
6382 
stbi__gif_load(stbi__context * s,int * x,int * y,int * comp,int req_comp,stbi__result_info * ri)6383 static void *stbi__gif_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
6384 {
6385    stbi_uc *u = 0;
6386    stbi__gif* g = (stbi__gif*) stbi__malloc(sizeof(stbi__gif));
6387    memset(g, 0, sizeof(*g));
6388    STBI_NOTUSED(ri);
6389 
6390    u = stbi__gif_load_next(s, g, comp, req_comp);
6391    if (u == (stbi_uc *) s) u = 0;  // end of animated gif marker
6392    if (u) {
6393       *x = g->w;
6394       *y = g->h;
6395       if (req_comp && req_comp != 4)
6396          u = stbi__convert_format(u, 4, req_comp, g->w, g->h);
6397    }
6398    else if (g->out)
6399       STBI_FREE(g->out);
6400    STBI_FREE(g);
6401    return u;
6402 }
6403 
stbi__gif_info(stbi__context * s,int * x,int * y,int * comp)6404 static int stbi__gif_info(stbi__context *s, int *x, int *y, int *comp)
6405 {
6406    return stbi__gif_info_raw(s,x,y,comp);
6407 }
6408 #endif
6409 
6410 // *************************************************************************************************
6411 // Radiance RGBE HDR loader
6412 // originally by Nicolas Schulz
6413 #ifndef STBI_NO_HDR
stbi__hdr_test_core(stbi__context * s,const char * signature)6414 static int stbi__hdr_test_core(stbi__context *s, const char *signature)
6415 {
6416    int i;
6417    for (i=0; signature[i]; ++i)
6418       if (stbi__get8(s) != signature[i])
6419           return 0;
6420    stbi__rewind(s);
6421    return 1;
6422 }
6423 
stbi__hdr_test(stbi__context * s)6424 static int stbi__hdr_test(stbi__context* s)
6425 {
6426    int r = stbi__hdr_test_core(s, "#?RADIANCE\n");
6427    stbi__rewind(s);
6428    if(!r) {
6429        r = stbi__hdr_test_core(s, "#?RGBE\n");
6430        stbi__rewind(s);
6431    }
6432    return r;
6433 }
6434 
6435 #define STBI__HDR_BUFLEN  1024
stbi__hdr_gettoken(stbi__context * z,char * buffer)6436 static char *stbi__hdr_gettoken(stbi__context *z, char *buffer)
6437 {
6438    int len=0;
6439    char c = '\0';
6440 
6441    c = (char) stbi__get8(z);
6442 
6443    while (!stbi__at_eof(z) && c != '\n') {
6444       buffer[len++] = c;
6445       if (len == STBI__HDR_BUFLEN-1) {
6446          // flush to end of line
6447          while (!stbi__at_eof(z) && stbi__get8(z) != '\n')
6448             ;
6449          break;
6450       }
6451       c = (char) stbi__get8(z);
6452    }
6453 
6454    buffer[len] = 0;
6455    return buffer;
6456 }
6457 
stbi__hdr_convert(float * output,stbi_uc * input,int req_comp)6458 static void stbi__hdr_convert(float *output, stbi_uc *input, int req_comp)
6459 {
6460    if ( input[3] != 0 ) {
6461       float f1;
6462       // Exponent
6463       f1 = (float) ldexp(1.0f, input[3] - (int)(128 + 8));
6464       if (req_comp <= 2)
6465          output[0] = (input[0] + input[1] + input[2]) * f1 / 3;
6466       else {
6467          output[0] = input[0] * f1;
6468          output[1] = input[1] * f1;
6469          output[2] = input[2] * f1;
6470       }
6471       if (req_comp == 2) output[1] = 1;
6472       if (req_comp == 4) output[3] = 1;
6473    } else {
6474       switch (req_comp) {
6475          case 4: output[3] = 1; /* fallthrough */
6476          case 3: output[0] = output[1] = output[2] = 0;
6477                  break;
6478          case 2: output[1] = 1; /* fallthrough */
6479          case 1: output[0] = 0;
6480                  break;
6481       }
6482    }
6483 }
6484 
stbi__hdr_load(stbi__context * s,int * x,int * y,int * comp,int req_comp,stbi__result_info * ri)6485 static float *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
6486 {
6487    char buffer[STBI__HDR_BUFLEN];
6488    char *token;
6489    int valid = 0;
6490    int width, height;
6491    stbi_uc *scanline;
6492    float *hdr_data;
6493    int len;
6494    unsigned char count, value;
6495    int i, j, k, c1,c2, z;
6496    const char *headerToken;
6497    STBI_NOTUSED(ri);
6498 
6499    // Check identifier
6500    headerToken = stbi__hdr_gettoken(s,buffer);
6501    if (strcmp(headerToken, "#?RADIANCE") != 0 && strcmp(headerToken, "#?RGBE") != 0)
6502       return stbi__errpf("not HDR", "Corrupt HDR image");
6503 
6504    // Parse header
6505    for(;;) {
6506       token = stbi__hdr_gettoken(s,buffer);
6507       if (token[0] == 0) break;
6508       if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0) valid = 1;
6509    }
6510 
6511    if (!valid)    return stbi__errpf("unsupported format", "Unsupported HDR format");
6512 
6513    // Parse width and height
6514    // can't use sscanf() if we're not using stdio!
6515    token = stbi__hdr_gettoken(s,buffer);
6516    if (strncmp(token, "-Y ", 3))  return stbi__errpf("unsupported data layout", "Unsupported HDR format");
6517    token += 3;
6518    height = (int) strtol(token, &token, 10);
6519    while (*token == ' ') ++token;
6520    if (strncmp(token, "+X ", 3))  return stbi__errpf("unsupported data layout", "Unsupported HDR format");
6521    token += 3;
6522    width = (int) strtol(token, NULL, 10);
6523 
6524    *x = width;
6525    *y = height;
6526 
6527    if (comp) *comp = 3;
6528    if (req_comp == 0) req_comp = 3;
6529 
6530    if (!stbi__mad4sizes_valid(width, height, req_comp, sizeof(float), 0))
6531       return stbi__errpf("too large", "HDR image is too large");
6532 
6533    // Read data
6534    hdr_data = (float *) stbi__malloc_mad4(width, height, req_comp, sizeof(float), 0);
6535    if (!hdr_data)
6536       return stbi__errpf("outofmem", "Out of memory");
6537 
6538    // Load image data
6539    // image data is stored as some number of sca
6540    if ( width < 8 || width >= 32768) {
6541       // Read flat data
6542       for (j=0; j < height; ++j) {
6543          for (i=0; i < width; ++i) {
6544             stbi_uc rgbe[4];
6545            main_decode_loop:
6546             stbi__getn(s, rgbe, 4);
6547             stbi__hdr_convert(hdr_data + j * width * req_comp + i * req_comp, rgbe, req_comp);
6548          }
6549       }
6550    } else {
6551       // Read RLE-encoded data
6552       scanline = NULL;
6553 
6554       for (j = 0; j < height; ++j) {
6555          c1 = stbi__get8(s);
6556          c2 = stbi__get8(s);
6557          len = stbi__get8(s);
6558          if (c1 != 2 || c2 != 2 || (len & 0x80)) {
6559             // not run-length encoded, so we have to actually use THIS data as a decoded
6560             // pixel (note this can't be a valid pixel--one of RGB must be >= 128)
6561             stbi_uc rgbe[4];
6562             rgbe[0] = (stbi_uc) c1;
6563             rgbe[1] = (stbi_uc) c2;
6564             rgbe[2] = (stbi_uc) len;
6565             rgbe[3] = (stbi_uc) stbi__get8(s);
6566             stbi__hdr_convert(hdr_data, rgbe, req_comp);
6567             i = 1;
6568             j = 0;
6569             STBI_FREE(scanline);
6570             goto main_decode_loop; // yes, this makes no sense
6571          }
6572          len <<= 8;
6573          len |= stbi__get8(s);
6574          if (len != width) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("invalid decoded scanline length", "corrupt HDR"); }
6575          if (scanline == NULL) {
6576             scanline = (stbi_uc *) stbi__malloc_mad2(width, 4, 0);
6577             if (!scanline) {
6578                STBI_FREE(hdr_data);
6579                return stbi__errpf("outofmem", "Out of memory");
6580             }
6581          }
6582 
6583          for (k = 0; k < 4; ++k) {
6584             int nleft;
6585             i = 0;
6586             while ((nleft = width - i) > 0) {
6587                count = stbi__get8(s);
6588                if (count > 128) {
6589                   // Run
6590                   value = stbi__get8(s);
6591                   count -= 128;
6592                   if (count > nleft) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("corrupt", "bad RLE data in HDR"); }
6593                   for (z = 0; z < count; ++z)
6594                      scanline[i++ * 4 + k] = value;
6595                } else {
6596                   // Dump
6597                   if (count > nleft) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("corrupt", "bad RLE data in HDR"); }
6598                   for (z = 0; z < count; ++z)
6599                      scanline[i++ * 4 + k] = stbi__get8(s);
6600                }
6601             }
6602          }
6603          for (i=0; i < width; ++i)
6604             stbi__hdr_convert(hdr_data+(j*width + i)*req_comp, scanline + i*4, req_comp);
6605       }
6606       if (scanline)
6607          STBI_FREE(scanline);
6608    }
6609 
6610    return hdr_data;
6611 }
6612 
stbi__hdr_info(stbi__context * s,int * x,int * y,int * comp)6613 static int stbi__hdr_info(stbi__context *s, int *x, int *y, int *comp)
6614 {
6615    char buffer[STBI__HDR_BUFLEN];
6616    char *token;
6617    int valid = 0;
6618    int dummy;
6619 
6620    if (!x) x = &dummy;
6621    if (!y) y = &dummy;
6622    if (!comp) comp = &dummy;
6623 
6624    if (stbi__hdr_test(s) == 0) {
6625        stbi__rewind( s );
6626        return 0;
6627    }
6628 
6629    for(;;) {
6630       token = stbi__hdr_gettoken(s,buffer);
6631       if (token[0] == 0) break;
6632       if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0) valid = 1;
6633    }
6634 
6635    if (!valid) {
6636        stbi__rewind( s );
6637        return 0;
6638    }
6639    token = stbi__hdr_gettoken(s,buffer);
6640    if (strncmp(token, "-Y ", 3)) {
6641        stbi__rewind( s );
6642        return 0;
6643    }
6644    token += 3;
6645    *y = (int) strtol(token, &token, 10);
6646    while (*token == ' ') ++token;
6647    if (strncmp(token, "+X ", 3)) {
6648        stbi__rewind( s );
6649        return 0;
6650    }
6651    token += 3;
6652    *x = (int) strtol(token, NULL, 10);
6653    *comp = 3;
6654    return 1;
6655 }
6656 #endif // STBI_NO_HDR
6657 
6658 #ifndef STBI_NO_BMP
stbi__bmp_info(stbi__context * s,int * x,int * y,int * comp)6659 static int stbi__bmp_info(stbi__context *s, int *x, int *y, int *comp)
6660 {
6661    void *p;
6662    stbi__bmp_data info;
6663 
6664    info.all_a = 255;
6665    p = stbi__bmp_parse_header(s, &info);
6666    stbi__rewind( s );
6667    if (p == NULL)
6668       return 0;
6669    if (x) *x = s->img_x;
6670    if (y) *y = s->img_y;
6671    if (comp) *comp = info.ma ? 4 : 3;
6672    return 1;
6673 }
6674 #endif
6675 
6676 #ifndef STBI_NO_PSD
stbi__psd_info(stbi__context * s,int * x,int * y,int * comp)6677 static int stbi__psd_info(stbi__context *s, int *x, int *y, int *comp)
6678 {
6679    int channelCount, dummy;
6680    if (!x) x = &dummy;
6681    if (!y) y = &dummy;
6682    if (!comp) comp = &dummy;
6683    if (stbi__get32be(s) != 0x38425053) {
6684        stbi__rewind( s );
6685        return 0;
6686    }
6687    if (stbi__get16be(s) != 1) {
6688        stbi__rewind( s );
6689        return 0;
6690    }
6691    stbi__skip(s, 6);
6692    channelCount = stbi__get16be(s);
6693    if (channelCount < 0 || channelCount > 16) {
6694        stbi__rewind( s );
6695        return 0;
6696    }
6697    *y = stbi__get32be(s);
6698    *x = stbi__get32be(s);
6699    if (stbi__get16be(s) != 8) {
6700        stbi__rewind( s );
6701        return 0;
6702    }
6703    if (stbi__get16be(s) != 3) {
6704        stbi__rewind( s );
6705        return 0;
6706    }
6707    *comp = 4;
6708    return 1;
6709 }
6710 #endif
6711 
6712 #ifndef STBI_NO_PIC
stbi__pic_info(stbi__context * s,int * x,int * y,int * comp)6713 static int stbi__pic_info(stbi__context *s, int *x, int *y, int *comp)
6714 {
6715    int act_comp=0,num_packets=0,chained,dummy;
6716    stbi__pic_packet packets[10];
6717 
6718    if (!x) x = &dummy;
6719    if (!y) y = &dummy;
6720    if (!comp) comp = &dummy;
6721 
6722    if (!stbi__pic_is4(s,"\x53\x80\xF6\x34")) {
6723       stbi__rewind(s);
6724       return 0;
6725    }
6726 
6727    stbi__skip(s, 88);
6728 
6729    *x = stbi__get16be(s);
6730    *y = stbi__get16be(s);
6731    if (stbi__at_eof(s)) {
6732       stbi__rewind( s);
6733       return 0;
6734    }
6735    if ( (*x) != 0 && (1 << 28) / (*x) < (*y)) {
6736       stbi__rewind( s );
6737       return 0;
6738    }
6739 
6740    stbi__skip(s, 8);
6741 
6742    do {
6743       stbi__pic_packet *packet;
6744 
6745       if (num_packets==sizeof(packets)/sizeof(packets[0]))
6746          return 0;
6747 
6748       packet = &packets[num_packets++];
6749       chained = stbi__get8(s);
6750       packet->size    = stbi__get8(s);
6751       packet->type    = stbi__get8(s);
6752       packet->channel = stbi__get8(s);
6753       act_comp |= packet->channel;
6754 
6755       if (stbi__at_eof(s)) {
6756           stbi__rewind( s );
6757           return 0;
6758       }
6759       if (packet->size != 8) {
6760           stbi__rewind( s );
6761           return 0;
6762       }
6763    } while (chained);
6764 
6765    *comp = (act_comp & 0x10 ? 4 : 3);
6766 
6767    return 1;
6768 }
6769 #endif
6770 
6771 // *************************************************************************************************
6772 // Portable Gray Map and Portable Pixel Map loader
6773 // by Ken Miller
6774 //
6775 // PGM: http://netpbm.sourceforge.net/doc/pgm.html
6776 // PPM: http://netpbm.sourceforge.net/doc/ppm.html
6777 //
6778 // Known limitations:
6779 //    Does not support comments in the header section
6780 //    Does not support ASCII image data (formats P2 and P3)
6781 //    Does not support 16-bit-per-channel
6782 
6783 #ifndef STBI_NO_PNM
6784 
stbi__pnm_test(stbi__context * s)6785 static int      stbi__pnm_test(stbi__context *s)
6786 {
6787    char p, t;
6788    p = (char) stbi__get8(s);
6789    t = (char) stbi__get8(s);
6790    if (p != 'P' || (t != '5' && t != '6')) {
6791        stbi__rewind( s );
6792        return 0;
6793    }
6794    return 1;
6795 }
6796 
stbi__pnm_load(stbi__context * s,int * x,int * y,int * comp,int req_comp,stbi__result_info * ri)6797 static void *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
6798 {
6799    stbi_uc *out;
6800    STBI_NOTUSED(ri);
6801 
6802    if (!stbi__pnm_info(s, (int *)&s->img_x, (int *)&s->img_y, (int *)&s->img_n))
6803       return 0;
6804 
6805    *x = s->img_x;
6806    *y = s->img_y;
6807    if (comp) *comp = s->img_n;
6808 
6809    if (!stbi__mad3sizes_valid(s->img_n, s->img_x, s->img_y, 0))
6810       return stbi__errpuc("too large", "PNM too large");
6811 
6812    out = (stbi_uc *) stbi__malloc_mad3(s->img_n, s->img_x, s->img_y, 0);
6813    if (!out) return stbi__errpuc("outofmem", "Out of memory");
6814    stbi__getn(s, out, s->img_n * s->img_x * s->img_y);
6815 
6816    if (req_comp && req_comp != s->img_n) {
6817       out = stbi__convert_format(out, s->img_n, req_comp, s->img_x, s->img_y);
6818       if (out == NULL) return out; // stbi__convert_format frees input on failure
6819    }
6820    return out;
6821 }
6822 
stbi__pnm_isspace(char c)6823 static int      stbi__pnm_isspace(char c)
6824 {
6825    return c == ' ' || c == '\t' || c == '\n' || c == '\v' || c == '\f' || c == '\r';
6826 }
6827 
stbi__pnm_skip_whitespace(stbi__context * s,char * c)6828 static void     stbi__pnm_skip_whitespace(stbi__context *s, char *c)
6829 {
6830    for (;;) {
6831       while (!stbi__at_eof(s) && stbi__pnm_isspace(*c))
6832          *c = (char) stbi__get8(s);
6833 
6834       if (stbi__at_eof(s) || *c != '#')
6835          break;
6836 
6837       while (!stbi__at_eof(s) && *c != '\n' && *c != '\r' )
6838          *c = (char) stbi__get8(s);
6839    }
6840 }
6841 
stbi__pnm_isdigit(char c)6842 static int      stbi__pnm_isdigit(char c)
6843 {
6844    return c >= '0' && c <= '9';
6845 }
6846 
stbi__pnm_getinteger(stbi__context * s,char * c)6847 static int      stbi__pnm_getinteger(stbi__context *s, char *c)
6848 {
6849    int value = 0;
6850 
6851    while (!stbi__at_eof(s) && stbi__pnm_isdigit(*c)) {
6852       value = value*10 + (*c - '0');
6853       *c = (char) stbi__get8(s);
6854    }
6855 
6856    return value;
6857 }
6858 
stbi__pnm_info(stbi__context * s,int * x,int * y,int * comp)6859 static int      stbi__pnm_info(stbi__context *s, int *x, int *y, int *comp)
6860 {
6861    int maxv, dummy;
6862    char c, p, t;
6863 
6864    if (!x) x = &dummy;
6865    if (!y) y = &dummy;
6866    if (!comp) comp = &dummy;
6867 
6868    stbi__rewind(s);
6869 
6870    // Get identifier
6871    p = (char) stbi__get8(s);
6872    t = (char) stbi__get8(s);
6873    if (p != 'P' || (t != '5' && t != '6')) {
6874        stbi__rewind(s);
6875        return 0;
6876    }
6877 
6878    *comp = (t == '6') ? 3 : 1;  // '5' is 1-component .pgm; '6' is 3-component .ppm
6879 
6880    c = (char) stbi__get8(s);
6881    stbi__pnm_skip_whitespace(s, &c);
6882 
6883    *x = stbi__pnm_getinteger(s, &c); // read width
6884    stbi__pnm_skip_whitespace(s, &c);
6885 
6886    *y = stbi__pnm_getinteger(s, &c); // read height
6887    stbi__pnm_skip_whitespace(s, &c);
6888 
6889    maxv = stbi__pnm_getinteger(s, &c);  // read max value
6890 
6891    if (maxv > 255)
6892       return stbi__err("max value > 255", "PPM image not 8-bit");
6893    else
6894       return 1;
6895 }
6896 #endif
6897 
stbi__info_main(stbi__context * s,int * x,int * y,int * comp)6898 static int stbi__info_main(stbi__context *s, int *x, int *y, int *comp)
6899 {
6900    #ifndef STBI_NO_JPEG
6901    if (stbi__jpeg_info(s, x, y, comp)) return 1;
6902    #endif
6903 
6904    #ifndef STBI_NO_PNG
6905    if (stbi__png_info(s, x, y, comp))  return 1;
6906    #endif
6907 
6908    #ifndef STBI_NO_GIF
6909    if (stbi__gif_info(s, x, y, comp))  return 1;
6910    #endif
6911 
6912    #ifndef STBI_NO_BMP
6913    if (stbi__bmp_info(s, x, y, comp))  return 1;
6914    #endif
6915 
6916    #ifndef STBI_NO_PSD
6917    if (stbi__psd_info(s, x, y, comp))  return 1;
6918    #endif
6919 
6920    #ifndef STBI_NO_PIC
6921    if (stbi__pic_info(s, x, y, comp))  return 1;
6922    #endif
6923 
6924    #ifndef STBI_NO_PNM
6925    if (stbi__pnm_info(s, x, y, comp))  return 1;
6926    #endif
6927 
6928    #ifndef STBI_NO_HDR
6929    if (stbi__hdr_info(s, x, y, comp))  return 1;
6930    #endif
6931 
6932    // test tga last because it's a crappy test!
6933    #ifndef STBI_NO_TGA
6934    if (stbi__tga_info(s, x, y, comp))
6935        return 1;
6936    #endif
6937    return stbi__err("unknown image type", "Image not of any known type, or corrupt");
6938 }
6939 
6940 #ifndef STBI_NO_STDIO
stbi_info(char const * filename,int * x,int * y,int * comp)6941 STBIDEF int stbi_info(char const *filename, int *x, int *y, int *comp)
6942 {
6943     FILE *f = stbi__fopen(filename, "rb");
6944     int result;
6945     if (!f) return stbi__err("can't fopen", "Unable to open file");
6946     result = stbi_info_from_file(f, x, y, comp);
6947     fclose(f);
6948     return result;
6949 }
6950 
stbi_info_from_file(FILE * f,int * x,int * y,int * comp)6951 STBIDEF int stbi_info_from_file(FILE *f, int *x, int *y, int *comp)
6952 {
6953    int r;
6954    stbi__context s;
6955    long pos = ftell(f);
6956    stbi__start_file(&s, f);
6957    r = stbi__info_main(&s,x,y,comp);
6958    fseek(f,pos,SEEK_SET);
6959    return r;
6960 }
6961 #endif // !STBI_NO_STDIO
6962 
stbi_info_from_memory(stbi_uc const * buffer,int len,int * x,int * y,int * comp)6963 STBIDEF int stbi_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp)
6964 {
6965    stbi__context s;
6966    stbi__start_mem(&s,buffer,len);
6967    return stbi__info_main(&s,x,y,comp);
6968 }
6969 
stbi_info_from_callbacks(stbi_io_callbacks const * c,void * user,int * x,int * y,int * comp)6970 STBIDEF int stbi_info_from_callbacks(stbi_io_callbacks const *c, void *user, int *x, int *y, int *comp)
6971 {
6972    stbi__context s;
6973    stbi__start_callbacks(&s, (stbi_io_callbacks *) c, user);
6974    return stbi__info_main(&s,x,y,comp);
6975 }
6976 
6977 #endif // STB_IMAGE_IMPLEMENTATION
6978 
6979 /*
6980    revision history:
6981       2.15  (2017-03-18) fix png-1,2,4 bug; now all Imagenet JPGs decode;
6982                          warning fixes; disable run-time SSE detection on gcc;
6983                          uniform handling of optional "return" values;
6984                          thread-safe initialization of zlib tables
6985       2.14  (2017-03-03) remove deprecated STBI_JPEG_OLD; fixes for Imagenet JPGs
6986       2.13  (2016-11-29) add 16-bit API, only supported for PNG right now
6987       2.12  (2016-04-02) fix typo in 2.11 PSD fix that caused crashes
6988       2.11  (2016-04-02) allocate large structures on the stack
6989                          remove white matting for transparent PSD
6990                          fix reported channel count for PNG & BMP
6991                          re-enable SSE2 in non-gcc 64-bit
6992                          support RGB-formatted JPEG
6993                          read 16-bit PNGs (only as 8-bit)
6994       2.10  (2016-01-22) avoid warning introduced in 2.09 by STBI_REALLOC_SIZED
6995       2.09  (2016-01-16) allow comments in PNM files
6996                          16-bit-per-pixel TGA (not bit-per-component)
6997                          info() for TGA could break due to .hdr handling
6998                          info() for BMP to shares code instead of sloppy parse
6999                          can use STBI_REALLOC_SIZED if allocator doesn't support realloc
7000                          code cleanup
7001       2.08  (2015-09-13) fix to 2.07 cleanup, reading RGB PSD as RGBA
7002       2.07  (2015-09-13) fix compiler warnings
7003                          partial animated GIF support
7004                          limited 16-bpc PSD support
7005                          #ifdef unused functions
7006                          bug with < 92 byte PIC,PNM,HDR,TGA
7007       2.06  (2015-04-19) fix bug where PSD returns wrong '*comp' value
7008       2.05  (2015-04-19) fix bug in progressive JPEG handling, fix warning
7009       2.04  (2015-04-15) try to re-enable SIMD on MinGW 64-bit
7010       2.03  (2015-04-12) extra corruption checking (mmozeiko)
7011                          stbi_set_flip_vertically_on_load (nguillemot)
7012                          fix NEON support; fix mingw support
7013       2.02  (2015-01-19) fix incorrect assert, fix warning
7014       2.01  (2015-01-17) fix various warnings; suppress SIMD on gcc 32-bit without -msse2
7015       2.00b (2014-12-25) fix STBI_MALLOC in progressive JPEG
7016       2.00  (2014-12-25) optimize JPG, including x86 SSE2 & NEON SIMD (ryg)
7017                          progressive JPEG (stb)
7018                          PGM/PPM support (Ken Miller)
7019                          STBI_MALLOC,STBI_REALLOC,STBI_FREE
7020                          GIF bugfix -- seemingly never worked
7021                          STBI_NO_*, STBI_ONLY_*
7022       1.48  (2014-12-14) fix incorrectly-named assert()
7023       1.47  (2014-12-14) 1/2/4-bit PNG support, both direct and paletted (Omar Cornut & stb)
7024                          optimize PNG (ryg)
7025                          fix bug in interlaced PNG with user-specified channel count (stb)
7026       1.46  (2014-08-26)
7027               fix broken tRNS chunk (colorkey-style transparency) in non-paletted PNG
7028       1.45  (2014-08-16)
7029               fix MSVC-ARM internal compiler error by wrapping malloc
7030       1.44  (2014-08-07)
7031               various warning fixes from Ronny Chevalier
7032       1.43  (2014-07-15)
7033               fix MSVC-only compiler problem in code changed in 1.42
7034       1.42  (2014-07-09)
7035               don't define _CRT_SECURE_NO_WARNINGS (affects user code)
7036               fixes to stbi__cleanup_jpeg path
7037               added STBI_ASSERT to avoid requiring assert.h
7038       1.41  (2014-06-25)
7039               fix search&replace from 1.36 that messed up comments/error messages
7040       1.40  (2014-06-22)
7041               fix gcc struct-initialization warning
7042       1.39  (2014-06-15)
7043               fix to TGA optimization when req_comp != number of components in TGA;
7044               fix to GIF loading because BMP wasn't rewinding (whoops, no GIFs in my test suite)
7045               add support for BMP version 5 (more ignored fields)
7046       1.38  (2014-06-06)
7047               suppress MSVC warnings on integer casts truncating values
7048               fix accidental rename of 'skip' field of I/O
7049       1.37  (2014-06-04)
7050               remove duplicate typedef
7051       1.36  (2014-06-03)
7052               convert to header file single-file library
7053               if de-iphone isn't set, load iphone images color-swapped instead of returning NULL
7054       1.35  (2014-05-27)
7055               various warnings
7056               fix broken STBI_SIMD path
7057               fix bug where stbi_load_from_file no longer left file pointer in correct place
7058               fix broken non-easy path for 32-bit BMP (possibly never used)
7059               TGA optimization by Arseny Kapoulkine
7060       1.34  (unknown)
7061               use STBI_NOTUSED in stbi__resample_row_generic(), fix one more leak in tga failure case
7062       1.33  (2011-07-14)
7063               make stbi_is_hdr work in STBI_NO_HDR (as specified), minor compiler-friendly improvements
7064       1.32  (2011-07-13)
7065               support for "info" function for all supported filetypes (SpartanJ)
7066       1.31  (2011-06-20)
7067               a few more leak fixes, bug in PNG handling (SpartanJ)
7068       1.30  (2011-06-11)
7069               added ability to load files via callbacks to accomidate custom input streams (Ben Wenger)
7070               removed deprecated format-specific test/load functions
7071               removed support for installable file formats (stbi_loader) -- would have been broken for IO callbacks anyway
7072               error cases in bmp and tga give messages and don't leak (Raymond Barbiero, grisha)
7073               fix inefficiency in decoding 32-bit BMP (David Woo)
7074       1.29  (2010-08-16)
7075               various warning fixes from Aurelien Pocheville
7076       1.28  (2010-08-01)
7077               fix bug in GIF palette transparency (SpartanJ)
7078       1.27  (2010-08-01)
7079               cast-to-stbi_uc to fix warnings
7080       1.26  (2010-07-24)
7081               fix bug in file buffering for PNG reported by SpartanJ
7082       1.25  (2010-07-17)
7083               refix trans_data warning (Won Chun)
7084       1.24  (2010-07-12)
7085               perf improvements reading from files on platforms with lock-heavy fgetc()
7086               minor perf improvements for jpeg
7087               deprecated type-specific functions so we'll get feedback if they're needed
7088               attempt to fix trans_data warning (Won Chun)
7089       1.23    fixed bug in iPhone support
7090       1.22  (2010-07-10)
7091               removed image *writing* support
7092               stbi_info support from Jetro Lauha
7093               GIF support from Jean-Marc Lienher
7094               iPhone PNG-extensions from James Brown
7095               warning-fixes from Nicolas Schulz and Janez Zemva (i.stbi__err. Janez (U+017D)emva)
7096       1.21    fix use of 'stbi_uc' in header (reported by jon blow)
7097       1.20    added support for Softimage PIC, by Tom Seddon
7098       1.19    bug in interlaced PNG corruption check (found by ryg)
7099       1.18  (2008-08-02)
7100               fix a threading bug (local mutable static)
7101       1.17    support interlaced PNG
7102       1.16    major bugfix - stbi__convert_format converted one too many pixels
7103       1.15    initialize some fields for thread safety
7104       1.14    fix threadsafe conversion bug
7105               header-file-only version (#define STBI_HEADER_FILE_ONLY before including)
7106       1.13    threadsafe
7107       1.12    const qualifiers in the API
7108       1.11    Support installable IDCT, colorspace conversion routines
7109       1.10    Fixes for 64-bit (don't use "unsigned long")
7110               optimized upsampling by Fabian "ryg" Giesen
7111       1.09    Fix format-conversion for PSD code (bad global variables!)
7112       1.08    Thatcher Ulrich's PSD code integrated by Nicolas Schulz
7113       1.07    attempt to fix C++ warning/errors again
7114       1.06    attempt to fix C++ warning/errors again
7115       1.05    fix TGA loading to return correct *comp and use good luminance calc
7116       1.04    default float alpha is 1, not 255; use 'void *' for stbi_image_free
7117       1.03    bugfixes to STBI_NO_STDIO, STBI_NO_HDR
7118       1.02    support for (subset of) HDR files, float interface for preferred access to them
7119       1.01    fix bug: possible bug in handling right-side up bmps... not sure
7120               fix bug: the stbi__bmp_load() and stbi__tga_load() functions didn't work at all
7121       1.00    interface to zlib that skips zlib header
7122       0.99    correct handling of alpha in palette
7123       0.98    TGA loader by lonesock; dynamically add loaders (untested)
7124       0.97    jpeg errors on too large a file; also catch another malloc failure
7125       0.96    fix detection of invalid v value - particleman@mollyrocket forum
7126       0.95    during header scan, seek to markers in case of padding
7127       0.94    STBI_NO_STDIO to disable stdio usage; rename all #defines the same
7128       0.93    handle jpegtran output; verbose errors
7129       0.92    read 4,8,16,24,32-bit BMP files of several formats
7130       0.91    output 24-bit Windows 3.0 BMP files
7131       0.90    fix a few more warnings; bump version number to approach 1.0
7132       0.61    bugfixes due to Marc LeBlanc, Christopher Lloyd
7133       0.60    fix compiling as c++
7134       0.59    fix warnings: merge Dave Moore's -Wall fixes
7135       0.58    fix bug: zlib uncompressed mode len/nlen was wrong endian
7136       0.57    fix bug: jpg last huffman symbol before marker was >9 bits but less than 16 available
7137       0.56    fix bug: zlib uncompressed mode len vs. nlen
7138       0.55    fix bug: restart_interval not initialized to 0
7139       0.54    allow NULL for 'int *comp'
7140       0.53    fix bug in png 3->4; speedup png decoding
7141       0.52    png handles req_comp=3,4 directly; minor cleanup; jpeg comments
7142       0.51    obey req_comp requests, 1-component jpegs return as 1-component,
7143               on 'test' only check type, not whether we support this variant
7144       0.50  (2006-11-19)
7145               first released version
7146 */
7147 
7148 
7149 /*
7150 ------------------------------------------------------------------------------
7151 This software is available under 2 licenses -- choose whichever you prefer.
7152 ------------------------------------------------------------------------------
7153 ALTERNATIVE A - MIT License
7154 Copyright (c) 2017 Sean Barrett
7155 Permission is hereby granted, free of charge, to any person obtaining a copy of
7156 this software and associated documentation files (the "Software"), to deal in
7157 the Software without restriction, including without limitation the rights to
7158 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
7159 of the Software, and to permit persons to whom the Software is furnished to do
7160 so, subject to the following conditions:
7161 The above copyright notice and this permission notice shall be included in all
7162 copies or substantial portions of the Software.
7163 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
7164 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
7165 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
7166 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
7167 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
7168 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
7169 SOFTWARE.
7170 ------------------------------------------------------------------------------
7171 ALTERNATIVE B - Public Domain (www.unlicense.org)
7172 This is free and unencumbered software released into the public domain.
7173 Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
7174 software, either in source code form or as a compiled binary, for any purpose,
7175 commercial or non-commercial, and by any means.
7176 In jurisdictions that recognize copyright laws, the author or authors of this
7177 software dedicate any and all copyright interest in the software to the public
7178 domain. We make this dedication for the benefit of the public at large and to
7179 the detriment of our heirs and successors. We intend this dedication to be an
7180 overt act of relinquishment in perpetuity of all present and future rights to
7181 this software under copyright law.
7182 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
7183 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
7184 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
7185 AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
7186 ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
7187 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
7188 ------------------------------------------------------------------------------
7189 */
7190