1 /* stb_image - v2.16 - public domain image loader - http://nothings.org/stb_image.h
2                                      no warranty implied; use at your own risk
3 
4    Do this:
5       #define STB_IMAGE_IMPLEMENTATION
6    before you include this file in *one* C or C++ file to create the implementation.
7 
8    // i.e. it should look like this:
9    #include ...
10    #include ...
11    #include ...
12    #define STB_IMAGE_IMPLEMENTATION
13    #include "stb_image.h"
14 
15    You can #define STBI_ASSERT(x) before the #include to avoid using assert.h.
16    And #define STBI_MALLOC, STBI_REALLOC, and STBI_FREE to avoid using malloc,realloc,free
17 
18 
19    QUICK NOTES:
20       Primarily of interest to game developers and other people who can
21           avoid problematic images and only need the trivial interface
22 
23       JPEG baseline & progressive (12 bpc/arithmetic not supported, same as stock IJG lib)
24       PNG 1/2/4/8/16-bit-per-channel
25 
26       TGA (not sure what subset, if a subset)
27       BMP non-1bpp, non-RLE
28       PSD (composited view only, no extra channels, 8/16 bit-per-channel)
29 
30       GIF (*comp always reports as 4-channel)
31       HDR (radiance rgbE format)
32       PIC (Softimage PIC)
33       PNM (PPM and PGM binary only)
34 
35       Animated GIF still needs a proper API, but here's one way to do it:
36           http://gist.github.com/urraka/685d9a6340b26b830d49
37 
38       - decode from memory or through FILE (define STBI_NO_STDIO to remove code)
39       - decode from arbitrary I/O callbacks
40       - SIMD acceleration on x86/x64 (SSE2) and ARM (NEON)
41 
42    Full documentation under "DOCUMENTATION" below.
43 
44 
45 LICENSE
46 
47   See end of file for license information.
48 
49 RECENT REVISION HISTORY:
50 
51       2.16  (2017-07-23) all functions have 16-bit variants; optimizations; bugfixes
52       2.15  (2017-03-18) fix png-1,2,4; all Imagenet JPGs; no runtime SSE detection on GCC
53       2.14  (2017-03-03) remove deprecated STBI_JPEG_OLD; fixes for Imagenet JPGs
54       2.13  (2016-12-04) experimental 16-bit API, only for PNG so far; fixes
55       2.12  (2016-04-02) fix typo in 2.11 PSD fix that caused crashes
56       2.11  (2016-04-02) 16-bit PNGS; enable SSE2 in non-gcc x64
57                          RGB-format JPEG; remove white matting in PSD;
58                          allocate large structures on the stack;
59                          correct channel count for PNG & BMP
60       2.10  (2016-01-22) avoid warning introduced in 2.09
61       2.09  (2016-01-16) 16-bit TGA; comments in PNM files; STBI_REALLOC_SIZED
62 
63    See end of file for full revision history.
64 
65 
66  ============================    Contributors    =========================
67 
68  Image formats                          Extensions, features
69     Sean Barrett (jpeg, png, bmp)          Jetro Lauha (stbi_info)
70     Nicolas Schulz (hdr, psd)              Martin "SpartanJ" Golini (stbi_info)
71     Jonathan Dummer (tga)                  James "moose2000" Brown (iPhone PNG)
72     Jean-Marc Lienher (gif)                Ben "Disch" Wenger (io callbacks)
73     Tom Seddon (pic)                       Omar Cornut (1/2/4-bit PNG)
74     Thatcher Ulrich (psd)                  Nicolas Guillemot (vertical flip)
75     Ken Miller (pgm, ppm)                  Richard Mitton (16-bit PSD)
76     github:urraka (animated gif)           Junggon Kim (PNM comments)
77                                            Daniel Gibson (16-bit TGA)
78                                            socks-the-fox (16-bit PNG)
79                                            Jeremy Sawicki (handle all ImageNet JPGs)
80  Optimizations & bugfixes
81     Fabian "ryg" Giesen
82     Arseny Kapoulkine
83     John-Mark Allen
84 
85  Bug & warning fixes
86     Marc LeBlanc            David Woo          Guillaume George   Martins Mozeiko
87     Christpher Lloyd        Jerry Jansson      Joseph Thomson     Phil Jordan
88     Dave Moore              Roy Eltham         Hayaki Saito       Nathan Reed
89     Won Chun                Luke Graham        Johan Duparc       Nick Verigakis
90     the Horde3D community   Thomas Ruf         Ronny Chevalier    Baldur Karlsson
91     Janez Zemva             John Bartholomew   Michal Cichon      github:rlyeh
92     Jonathan Blow           Ken Hamada         Tero Hanninen      github:romigrou
93     Laurent Gomila          Cort Stratton      Sergio Gonzalez    github:svdijk
94     Aruelien Pocheville     Thibault Reuille   Cass Everitt       github:snagar
95     Ryamond Barbiero        Paul Du Bois       Engin Manap        github:Zelex
96     Michaelangel007@github  Philipp Wiesemann  Dale Weiler        github:grim210
97     Oriol Ferrer Mesia      Josh Tobin         Matthew Gregan     github:sammyhw
98     Blazej Dariusz Roszkowski                  Gregory Mullen     github:phprus
99     Christian Floisand      Kevin Schmidt                         github:poppolopoppo
100 */
101 
102 #ifndef STBI_INCLUDE_STB_IMAGE_H
103 #define STBI_INCLUDE_STB_IMAGE_H
104 
105 // DOCUMENTATION
106 //
107 // Limitations:
108 //    - no 16-bit-per-channel PNG
109 //    - no 12-bit-per-channel JPEG
110 //    - no JPEGs with arithmetic coding
111 //    - no 1-bit BMP
112 //    - GIF always returns *comp=4
113 //
114 // Basic usage (see HDR discussion below for HDR usage):
115 //    int x,y,n;
116 //    unsigned char *data = stbi_load(filename, &x, &y, &n, 0);
117 //    // ... process data if not NULL ...
118 //    // ... x = width, y = height, n = # 8-bit components per pixel ...
119 //    // ... replace '0' with '1'..'4' to force that many components per pixel
120 //    // ... but 'n' will always be the number that it would have been if you said 0
121 //    stbi_image_free(data)
122 //
123 // Standard parameters:
124 //    int *x                 -- outputs image width in pixels
125 //    int *y                 -- outputs image height in pixels
126 //    int *channels_in_file  -- outputs # of image components in image file
127 //    int desired_channels   -- if non-zero, # of image components requested in result
128 //
129 // The return value from an image loader is an 'unsigned char *' which points
130 // to the pixel data, or NULL on an allocation failure or if the image is
131 // corrupt or invalid. The pixel data consists of *y scanlines of *x pixels,
132 // with each pixel consisting of N interleaved 8-bit components; the first
133 // pixel pointed to is top-left-most in the image. There is no padding between
134 // image scanlines or between pixels, regardless of format. The number of
135 // components N is 'desired_channels' if desired_channels is non-zero, or
136 // *channels_in_file otherwise. If desired_channels is non-zero,
137 // *channels_in_file has the number of components that _would_ have been
138 // output otherwise. E.g. if you set desired_channels to 4, you will always
139 // get RGBA output, but you can check *channels_in_file to see if it's trivially
140 // opaque because e.g. there were only 3 channels in the source image.
141 //
142 // An output image with N components has the following components interleaved
143 // in this order in each pixel:
144 //
145 //     N=#comp     components
146 //       1           grey
147 //       2           grey, alpha
148 //       3           red, green, blue
149 //       4           red, green, blue, alpha
150 //
151 // If image loading fails for any reason, the return value will be NULL,
152 // and *x, *y, *channels_in_file will be unchanged. The function
153 // stbi_failure_reason() can be queried for an extremely brief, end-user
154 // unfriendly explanation of why the load failed. Define STBI_NO_FAILURE_STRINGS
155 // to avoid compiling these strings at all, and STBI_FAILURE_USERMSG to get slightly
156 // more user-friendly ones.
157 //
158 // Paletted PNG, BMP, GIF, and PIC images are automatically depalettized.
159 //
160 // ===========================================================================
161 //
162 // Philosophy
163 //
164 // stb libraries are designed with the following priorities:
165 //
166 //    1. easy to use
167 //    2. easy to maintain
168 //    3. good performance
169 //
170 // Sometimes I let "good performance" creep up in priority over "easy to maintain",
171 // and for best performance I may provide less-easy-to-use APIs that give higher
172 // performance, in addition to the easy to use ones. Nevertheless, it's important
173 // to keep in mind that from the standpoint of you, a client of this library,
174 // all you care about is #1 and #3, and stb libraries DO NOT emphasize #3 above all.
175 //
176 // Some secondary priorities arise directly from the first two, some of which
177 // make more explicit reasons why performance can't be emphasized.
178 //
179 //    - Portable ("ease of use")
180 //    - Small source code footprint ("easy to maintain")
181 //    - No dependencies ("ease of use")
182 //
183 // ===========================================================================
184 //
185 // I/O callbacks
186 //
187 // I/O callbacks allow you to read from arbitrary sources, like packaged
188 // files or some other source. Data read from callbacks are processed
189 // through a small internal buffer (currently 128 bytes) to try to reduce
190 // overhead.
191 //
192 // The three functions you must define are "read" (reads some bytes of data),
193 // "skip" (skips some bytes of data), "eof" (reports if the stream is at the end).
194 //
195 // ===========================================================================
196 //
197 // SIMD support
198 //
199 // The JPEG decoder will try to automatically use SIMD kernels on x86 when
200 // supported by the compiler. For ARM Neon support, you must explicitly
201 // request it.
202 //
203 // (The old do-it-yourself SIMD API is no longer supported in the current
204 // code.)
205 //
206 // On x86, SSE2 will automatically be used when available based on a run-time
207 // test; if not, the generic C versions are used as a fall-back. On ARM targets,
208 // the typical path is to have separate builds for NEON and non-NEON devices
209 // (at least this is true for iOS and Android). Therefore, the NEON support is
210 // toggled by a build flag: define STBI_NEON to get NEON loops.
211 //
212 // If for some reason you do not want to use any of SIMD code, or if
213 // you have issues compiling it, you can disable it entirely by
214 // defining STBI_NO_SIMD.
215 //
216 // ===========================================================================
217 //
218 // HDR image support   (disable by defining STBI_NO_HDR)
219 //
220 // stb_image now supports loading HDR images in general, and currently
221 // the Radiance .HDR file format, although the support is provided
222 // generically. You can still load any file through the existing interface;
223 // if you attempt to load an HDR file, it will be automatically remapped to
224 // LDR, assuming gamma 2.2 and an arbitrary scale factor defaulting to 1;
225 // both of these constants can be reconfigured through this interface:
226 //
227 //     stbi_hdr_to_ldr_gamma(2.2f);
228 //     stbi_hdr_to_ldr_scale(1.0f);
229 //
230 // (note, do not use _inverse_ constants; stbi_image will invert them
231 // appropriately).
232 //
233 // Additionally, there is a new, parallel interface for loading files as
234 // (linear) floats to preserve the full dynamic range:
235 //
236 //    float *data = stbi_loadf(filename, &x, &y, &n, 0);
237 //
238 // If you load LDR images through this interface, those images will
239 // be promoted to floating point values, run through the inverse of
240 // constants corresponding to the above:
241 //
242 //     stbi_ldr_to_hdr_scale(1.0f);
243 //     stbi_ldr_to_hdr_gamma(2.2f);
244 //
245 // Finally, given a filename (or an open file or memory block--see header
246 // file for details) containing image data, you can query for the "most
247 // appropriate" interface to use (that is, whether the image is HDR or
248 // not), using:
249 //
250 //     stbi_is_hdr(char *filename);
251 //
252 // ===========================================================================
253 //
254 // iPhone PNG support:
255 //
256 // By default we convert iphone-formatted PNGs back to RGB, even though
257 // they are internally encoded differently. You can disable this conversion
258 // by by calling stbi_convert_iphone_png_to_rgb(0), in which case
259 // you will always just get the native iphone "format" through (which
260 // is BGR stored in RGB).
261 //
262 // Call stbi_set_unpremultiply_on_load(1) as well to force a divide per
263 // pixel to remove any premultiplied alpha *only* if the image file explicitly
264 // says there's premultiplied data (currently only happens in iPhone images,
265 // and only if iPhone convert-to-rgb processing is on).
266 //
267 // ===========================================================================
268 //
269 // ADDITIONAL CONFIGURATION
270 //
271 //  - You can suppress implementation of any of the decoders to reduce
272 //    your code footprint by #defining one or more of the following
273 //    symbols before creating the implementation.
274 //
275 //        STBI_NO_JPEG
276 //        STBI_NO_PNG
277 //        STBI_NO_BMP
278 //        STBI_NO_PSD
279 //        STBI_NO_TGA
280 //        STBI_NO_GIF
281 //        STBI_NO_HDR
282 //        STBI_NO_PIC
283 //        STBI_NO_PNM   (.ppm and .pgm)
284 //
285 //  - You can request *only* certain decoders and suppress all other ones
286 //    (this will be more forward-compatible, as addition of new decoders
287 //    doesn't require you to disable them explicitly):
288 //
289 //        STBI_ONLY_JPEG
290 //        STBI_ONLY_PNG
291 //        STBI_ONLY_BMP
292 //        STBI_ONLY_PSD
293 //        STBI_ONLY_TGA
294 //        STBI_ONLY_GIF
295 //        STBI_ONLY_HDR
296 //        STBI_ONLY_PIC
297 //        STBI_ONLY_PNM   (.ppm and .pgm)
298 //
299 //   - If you use STBI_NO_PNG (or _ONLY_ without PNG), and you still
300 //     want the zlib decoder to be available, #define STBI_SUPPORT_ZLIB
301 //
302 
303 
304 #ifndef STBI_NO_STDIO
305 #include <stdio.h>
306 #endif // STBI_NO_STDIO
307 
308 #define STBI_VERSION 1
309 
310 enum
311 {
312    STBI_default = 0, // only used for desired_channels
313 
314    STBI_grey       = 1,
315    STBI_grey_alpha = 2,
316    STBI_rgb        = 3,
317    STBI_rgb_alpha  = 4
318 };
319 
320 typedef unsigned char stbi_uc;
321 typedef unsigned short stbi_us;
322 
323 #ifdef __cplusplus
324 extern "C" {
325 #endif
326 
327 #ifdef STB_IMAGE_STATIC
328 #define STBIDEF static
329 #else
330 #define STBIDEF extern
331 #endif
332 
333 //////////////////////////////////////////////////////////////////////////////
334 //
335 // PRIMARY API - works on images of any type
336 //
337 
338 //
339 // load image by filename, open file, or memory buffer
340 //
341 
342 typedef struct
343 {
344    int      (*read)  (void *user,char *data,int size);   // fill 'data' with 'size' bytes.  return number of bytes actually read
345    void     (*skip)  (void *user,int n);                 // skip the next 'n' bytes, or 'unget' the last -n bytes if negative
346    int      (*eof)   (void *user);                       // returns nonzero if we are at end of file/data
347 } stbi_io_callbacks;
348 
349 ////////////////////////////////////
350 //
351 // 8-bits-per-channel interface
352 //
353 
354 STBIDEF stbi_uc *stbi_load_from_memory   (stbi_uc           const *buffer, int len   , int *x, int *y, int *channels_in_file, int desired_channels);
355 STBIDEF stbi_uc *stbi_load_from_callbacks(stbi_io_callbacks const *clbk  , void *user, int *x, int *y, int *channels_in_file, int desired_channels);
356 
357 #ifndef STBI_NO_STDIO
358 STBIDEF stbi_uc *stbi_load            (char const *filename, int *x, int *y, int *channels_in_file, int desired_channels);
359 STBIDEF stbi_uc *stbi_load_from_file  (FILE *f, int *x, int *y, int *channels_in_file, int desired_channels);
360 // for stbi_load_from_file, file pointer is left pointing immediately after image
361 #endif
362 
363 ////////////////////////////////////
364 //
365 // 16-bits-per-channel interface
366 //
367 
368 STBIDEF stbi_us *stbi_load_16_from_memory   (stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels);
369 STBIDEF stbi_us *stbi_load_16_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *channels_in_file, int desired_channels);
370 
371 #ifndef STBI_NO_STDIO
372 STBIDEF stbi_us *stbi_load_16          (char const *filename, int *x, int *y, int *channels_in_file, int desired_channels);
373 STBIDEF stbi_us *stbi_load_from_file_16(FILE *f, int *x, int *y, int *channels_in_file, int desired_channels);
374 #endif
375 
376 ////////////////////////////////////
377 //
378 // float-per-channel interface
379 //
380 #ifndef STBI_NO_LINEAR
381    STBIDEF float *stbi_loadf_from_memory     (stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels);
382    STBIDEF float *stbi_loadf_from_callbacks  (stbi_io_callbacks const *clbk, void *user, int *x, int *y,  int *channels_in_file, int desired_channels);
383 
384    #ifndef STBI_NO_STDIO
385    STBIDEF float *stbi_loadf            (char const *filename, int *x, int *y, int *channels_in_file, int desired_channels);
386    STBIDEF float *stbi_loadf_from_file  (FILE *f, int *x, int *y, int *channels_in_file, int desired_channels);
387    #endif
388 #endif
389 
390 #ifndef STBI_NO_HDR
391    STBIDEF void   stbi_hdr_to_ldr_gamma(float gamma);
392    STBIDEF void   stbi_hdr_to_ldr_scale(float scale);
393 #endif // STBI_NO_HDR
394 
395 #ifndef STBI_NO_LINEAR
396    STBIDEF void   stbi_ldr_to_hdr_gamma(float gamma);
397    STBIDEF void   stbi_ldr_to_hdr_scale(float scale);
398 #endif // STBI_NO_LINEAR
399 
400 // stbi_is_hdr is always defined, but always returns false if STBI_NO_HDR
401 STBIDEF int    stbi_is_hdr_from_callbacks(stbi_io_callbacks const *clbk, void *user);
402 STBIDEF int    stbi_is_hdr_from_memory(stbi_uc const *buffer, int len);
403 #ifndef STBI_NO_STDIO
404 STBIDEF int      stbi_is_hdr          (char const *filename);
405 STBIDEF int      stbi_is_hdr_from_file(FILE *f);
406 #endif // STBI_NO_STDIO
407 
408 
409 // get a VERY brief reason for failure
410 // NOT THREADSAFE
411 STBIDEF const char *stbi_failure_reason  (void);
412 
413 // free the loaded image -- this is just free()
414 STBIDEF void     stbi_image_free      (void *retval_from_stbi_load);
415 
416 // get image dimensions & components without fully decoding
417 STBIDEF int      stbi_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp);
418 STBIDEF int      stbi_info_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp);
419 
420 #ifndef STBI_NO_STDIO
421 STBIDEF int      stbi_info            (char const *filename,     int *x, int *y, int *comp);
422 STBIDEF int      stbi_info_from_file  (FILE *f,                  int *x, int *y, int *comp);
423 
424 #endif
425 
426 
427 
428 // for image formats that explicitly notate that they have premultiplied alpha,
429 // we just return the colors as stored in the file. set this flag to force
430 // unpremultiplication. results are undefined if the unpremultiply overflow.
431 STBIDEF void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply);
432 
433 // indicate whether we should process iphone images back to canonical format,
434 // or just pass them through "as-is"
435 STBIDEF void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert);
436 
437 // flip the image vertically, so the first pixel in the output array is the bottom left
438 STBIDEF void stbi_set_flip_vertically_on_load(int flag_true_if_should_flip);
439 
440 // ZLIB client - used by PNG, available for other purposes
441 
442 STBIDEF char *stbi_zlib_decode_malloc_guesssize(const char *buffer, int len, int initial_size, int *outlen);
443 STBIDEF char *stbi_zlib_decode_malloc_guesssize_headerflag(const char *buffer, int len, int initial_size, int *outlen, int parse_header);
444 STBIDEF char *stbi_zlib_decode_malloc(const char *buffer, int len, int *outlen);
445 STBIDEF int   stbi_zlib_decode_buffer(char *obuffer, int olen, const char *ibuffer, int ilen);
446 
447 STBIDEF char *stbi_zlib_decode_noheader_malloc(const char *buffer, int len, int *outlen);
448 STBIDEF int   stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char *ibuffer, int ilen);
449 
450 
451 #ifdef __cplusplus
452 }
453 #endif
454 
455 //
456 //
457 ////   end header file   /////////////////////////////////////////////////////
458 #endif // STBI_INCLUDE_STB_IMAGE_H
459 
460 #ifdef STB_IMAGE_IMPLEMENTATION
461 
462 #if defined(STBI_ONLY_JPEG) || defined(STBI_ONLY_PNG) || defined(STBI_ONLY_BMP) \
463   || defined(STBI_ONLY_TGA) || defined(STBI_ONLY_GIF) || defined(STBI_ONLY_PSD) \
464   || defined(STBI_ONLY_HDR) || defined(STBI_ONLY_PIC) || defined(STBI_ONLY_PNM) \
465   || defined(STBI_ONLY_ZLIB)
466    #ifndef STBI_ONLY_JPEG
467    #define STBI_NO_JPEG
468    #endif
469    #ifndef STBI_ONLY_PNG
470    #define STBI_NO_PNG
471    #endif
472    #ifndef STBI_ONLY_BMP
473    #define STBI_NO_BMP
474    #endif
475    #ifndef STBI_ONLY_PSD
476    #define STBI_NO_PSD
477    #endif
478    #ifndef STBI_ONLY_TGA
479    #define STBI_NO_TGA
480    #endif
481    #ifndef STBI_ONLY_GIF
482    #define STBI_NO_GIF
483    #endif
484    #ifndef STBI_ONLY_HDR
485    #define STBI_NO_HDR
486    #endif
487    #ifndef STBI_ONLY_PIC
488    #define STBI_NO_PIC
489    #endif
490    #ifndef STBI_ONLY_PNM
491    #define STBI_NO_PNM
492    #endif
493 #endif
494 
495 #if defined(STBI_NO_PNG) && !defined(STBI_SUPPORT_ZLIB) && !defined(STBI_NO_ZLIB)
496 #define STBI_NO_ZLIB
497 #endif
498 
499 
500 #include <stdarg.h>
501 #include <stddef.h> // ptrdiff_t on osx
502 #include <stdlib.h>
503 #include <string.h>
504 #include <limits.h>
505 
506 #if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR)
507 #include <math.h>  // ldexp
508 #endif
509 
510 #ifndef STBI_NO_STDIO
511 #include <stdio.h>
512 #endif
513 
514 #ifndef STBI_ASSERT
515 #include <assert.h>
516 #define STBI_ASSERT(x) assert(x)
517 #endif
518 
519 
520 #ifndef _MSC_VER
521    #ifdef __cplusplus
522    #define stbi_inline inline
523    #else
524    #define stbi_inline
525    #endif
526 #else
527    #define stbi_inline __forceinline
528 #endif
529 
530 
531 #ifdef _MSC_VER
532 typedef unsigned short stbi__uint16;
533 typedef   signed short stbi__int16;
534 typedef unsigned int   stbi__uint32;
535 typedef   signed int   stbi__int32;
536 #else
537 #include <stdint.h>
538 typedef uint16_t stbi__uint16;
539 typedef int16_t  stbi__int16;
540 typedef uint32_t stbi__uint32;
541 typedef int32_t  stbi__int32;
542 #endif
543 
544 // should produce compiler error if size is wrong
545 typedef unsigned char validate_uint32[sizeof(stbi__uint32)==4 ? 1 : -1];
546 
547 #ifdef _MSC_VER
548 #define STBI_NOTUSED(v)  (void)(v)
549 #else
550 #define STBI_NOTUSED(v)  (void)sizeof(v)
551 #endif
552 
553 #ifdef _MSC_VER
554 #define STBI_HAS_LROTL
555 #endif
556 
557 #ifdef STBI_HAS_LROTL
558    #define stbi_lrot(x,y)  _lrotl(x,y)
559 #else
560    #define stbi_lrot(x,y)  (((x) << (y)) | ((x) >> (32 - (y))))
561 #endif
562 
563 #if defined(STBI_MALLOC) && defined(STBI_FREE) && (defined(STBI_REALLOC) || defined(STBI_REALLOC_SIZED))
564 // ok
565 #elif !defined(STBI_MALLOC) && !defined(STBI_FREE) && !defined(STBI_REALLOC) && !defined(STBI_REALLOC_SIZED)
566 // ok
567 #else
568 #error "Must define all or none of STBI_MALLOC, STBI_FREE, and STBI_REALLOC (or STBI_REALLOC_SIZED)."
569 #endif
570 
571 #ifndef STBI_MALLOC
572 #define STBI_MALLOC(sz)           malloc(sz)
573 #define STBI_REALLOC(p,newsz)     realloc(p,newsz)
574 #define STBI_FREE(p)              free(p)
575 #endif
576 
577 #ifndef STBI_REALLOC_SIZED
578 #define STBI_REALLOC_SIZED(p,oldsz,newsz) STBI_REALLOC(p,newsz)
579 #endif
580 
581 // x86/x64 detection
582 #if defined(__x86_64__) || defined(_M_X64)
583 #define STBI__X64_TARGET
584 #elif defined(__i386) || defined(_M_IX86)
585 #define STBI__X86_TARGET
586 #endif
587 
588 #if defined(__GNUC__) && defined(STBI__X86_TARGET) && !defined(__SSE2__) && !defined(STBI_NO_SIMD)
589 // gcc doesn't support sse2 intrinsics unless you compile with -msse2,
590 // which in turn means it gets to use SSE2 everywhere. This is unfortunate,
591 // but previous attempts to provide the SSE2 functions with runtime
592 // detection caused numerous issues. The way architecture extensions are
593 // exposed in GCC/Clang is, sadly, not really suited for one-file libs.
594 // New behavior: if compiled with -msse2, we use SSE2 without any
595 // detection; if not, we don't use it at all.
596 #define STBI_NO_SIMD
597 #endif
598 
599 #if defined(__MINGW32__) && defined(STBI__X86_TARGET) && !defined(STBI_MINGW_ENABLE_SSE2) && !defined(STBI_NO_SIMD)
600 // Note that __MINGW32__ doesn't actually mean 32-bit, so we have to avoid STBI__X64_TARGET
601 //
602 // 32-bit MinGW wants ESP to be 16-byte aligned, but this is not in the
603 // Windows ABI and VC++ as well as Windows DLLs don't maintain that invariant.
604 // As a result, enabling SSE2 on 32-bit MinGW is dangerous when not
605 // simultaneously enabling "-mstackrealign".
606 //
607 // See https://github.com/nothings/stb/issues/81 for more information.
608 //
609 // So default to no SSE2 on 32-bit MinGW. If you've read this far and added
610 // -mstackrealign to your build settings, feel free to #define STBI_MINGW_ENABLE_SSE2.
611 #define STBI_NO_SIMD
612 #endif
613 
614 #if !defined(STBI_NO_SIMD) && (defined(STBI__X86_TARGET) || defined(STBI__X64_TARGET))
615 #define STBI_SSE2
616 #include <emmintrin.h>
617 
618 #ifdef _MSC_VER
619 
620 #if _MSC_VER >= 1400  // not VC6
621 #include <intrin.h> // __cpuid
stbi__cpuid3(void)622 static int stbi__cpuid3(void)
623 {
624    int info[4];
625    __cpuid(info,1);
626    return info[3];
627 }
628 #else
stbi__cpuid3(void)629 static int stbi__cpuid3(void)
630 {
631    int res;
632    __asm {
633       mov  eax,1
634       cpuid
635       mov  res,edx
636    }
637    return res;
638 }
639 #endif
640 
641 #define STBI_SIMD_ALIGN(type, name) __declspec(align(16)) type name
642 
stbi__sse2_available(void)643 static int stbi__sse2_available(void)
644 {
645    int info3 = stbi__cpuid3();
646    return ((info3 >> 26) & 1) != 0;
647 }
648 #else // assume GCC-style if not VC++
649 #define STBI_SIMD_ALIGN(type, name) type name __attribute__((aligned(16)))
650 
stbi__sse2_available(void)651 static int stbi__sse2_available(void)
652 {
653    // If we're even attempting to compile this on GCC/Clang, that means
654    // -msse2 is on, which means the compiler is allowed to use SSE2
655    // instructions at will, and so are we.
656    return 1;
657 }
658 #endif
659 #endif
660 
661 // ARM NEON
662 #if defined(STBI_NO_SIMD) && defined(STBI_NEON)
663 #undef STBI_NEON
664 #endif
665 
666 #ifdef STBI_NEON
667 #include <arm_neon.h>
668 // assume GCC or Clang on ARM targets
669 #define STBI_SIMD_ALIGN(type, name) type name __attribute__((aligned(16)))
670 #endif
671 
672 #ifndef STBI_SIMD_ALIGN
673 #define STBI_SIMD_ALIGN(type, name) type name
674 #endif
675 
676 #if __GNUC__ >= 7
677 #define FALLTHROUGH __attribute__ ((fallthrough))
678 #else
679 #define FALLTHROUGH
680 #endif
681 
682 ///////////////////////////////////////////////
683 //
684 //  stbi__context struct and start_xxx functions
685 
686 // stbi__context structure is our basic context used by all images, so it
687 // contains all the IO context, plus some basic image information
688 typedef struct
689 {
690    stbi__uint32 img_x, img_y;
691    int img_n, img_out_n;
692 
693    stbi_io_callbacks io;
694    void *io_user_data;
695 
696    int read_from_callbacks;
697    int buflen;
698    stbi_uc buffer_start[128];
699 
700    stbi_uc *img_buffer, *img_buffer_end;
701    stbi_uc *img_buffer_original, *img_buffer_original_end;
702 } stbi__context;
703 
704 
705 static void stbi__refill_buffer(stbi__context *s);
706 
707 // initialize a memory-decode context
stbi__start_mem(stbi__context * s,stbi_uc const * buffer,int len)708 static void stbi__start_mem(stbi__context *s, stbi_uc const *buffer, int len)
709 {
710    s->io.read = NULL;
711    s->read_from_callbacks = 0;
712    s->img_buffer = s->img_buffer_original = (stbi_uc *) buffer;
713    s->img_buffer_end = s->img_buffer_original_end = (stbi_uc *) buffer+len;
714 }
715 
716 // initialize a callback-based context
stbi__start_callbacks(stbi__context * s,stbi_io_callbacks * c,void * user)717 static void stbi__start_callbacks(stbi__context *s, stbi_io_callbacks *c, void *user)
718 {
719    s->io = *c;
720    s->io_user_data = user;
721    s->buflen = sizeof(s->buffer_start);
722    s->read_from_callbacks = 1;
723    s->img_buffer_original = s->buffer_start;
724    stbi__refill_buffer(s);
725    s->img_buffer_original_end = s->img_buffer_end;
726 }
727 
728 #ifndef STBI_NO_STDIO
729 
stbi__stdio_read(void * user,char * data,int size)730 static int stbi__stdio_read(void *user, char *data, int size)
731 {
732    return (int) fread(data,1,size,(FILE*) user);
733 }
734 
stbi__stdio_skip(void * user,int n)735 static void stbi__stdio_skip(void *user, int n)
736 {
737    fseek((FILE*) user, n, SEEK_CUR);
738 }
739 
stbi__stdio_eof(void * user)740 static int stbi__stdio_eof(void *user)
741 {
742    return feof((FILE*) user);
743 }
744 
745 static stbi_io_callbacks stbi__stdio_callbacks =
746 {
747    stbi__stdio_read,
748    stbi__stdio_skip,
749    stbi__stdio_eof,
750 };
751 
stbi__start_file(stbi__context * s,FILE * f)752 static void stbi__start_file(stbi__context *s, FILE *f)
753 {
754    stbi__start_callbacks(s, &stbi__stdio_callbacks, (void *) f);
755 }
756 
757 //static void stop_file(stbi__context *s) { }
758 
759 #endif // !STBI_NO_STDIO
760 
stbi__rewind(stbi__context * s)761 static void stbi__rewind(stbi__context *s)
762 {
763    // conceptually rewind SHOULD rewind to the beginning of the stream,
764    // but we just rewind to the beginning of the initial buffer, because
765    // we only use it after doing 'test', which only ever looks at at most 92 bytes
766    s->img_buffer = s->img_buffer_original;
767    s->img_buffer_end = s->img_buffer_original_end;
768 }
769 
770 enum
771 {
772    STBI_ORDER_RGB,
773    STBI_ORDER_BGR
774 };
775 
776 typedef struct
777 {
778    int bits_per_channel;
779    int num_channels;
780    int channel_order;
781 } stbi__result_info;
782 
783 #ifndef STBI_NO_JPEG
784 static int      stbi__jpeg_test(stbi__context *s);
785 static void    *stbi__jpeg_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
786 static int      stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp);
787 #endif
788 
789 #ifndef STBI_NO_PNG
790 static int      stbi__png_test(stbi__context *s);
791 static void    *stbi__png_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
792 static int      stbi__png_info(stbi__context *s, int *x, int *y, int *comp);
793 #endif
794 
795 #ifndef STBI_NO_BMP
796 static int      stbi__bmp_test(stbi__context *s);
797 static void    *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
798 static int      stbi__bmp_info(stbi__context *s, int *x, int *y, int *comp);
799 #endif
800 
801 #ifndef STBI_NO_TGA
802 static int      stbi__tga_test(stbi__context *s);
803 static void    *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
804 static int      stbi__tga_info(stbi__context *s, int *x, int *y, int *comp);
805 #endif
806 
807 #ifndef STBI_NO_PSD
808 static int      stbi__psd_test(stbi__context *s);
809 static void    *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc);
810 static int      stbi__psd_info(stbi__context *s, int *x, int *y, int *comp);
811 #endif
812 
813 #ifndef STBI_NO_HDR
814 static int      stbi__hdr_test(stbi__context *s);
815 static float   *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
816 static int      stbi__hdr_info(stbi__context *s, int *x, int *y, int *comp);
817 #endif
818 
819 #ifndef STBI_NO_PIC
820 static int      stbi__pic_test(stbi__context *s);
821 static void    *stbi__pic_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
822 static int      stbi__pic_info(stbi__context *s, int *x, int *y, int *comp);
823 #endif
824 
825 #ifndef STBI_NO_GIF
826 static int      stbi__gif_test(stbi__context *s);
827 static void    *stbi__gif_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
828 static int      stbi__gif_info(stbi__context *s, int *x, int *y, int *comp);
829 #endif
830 
831 #ifndef STBI_NO_PNM
832 static int      stbi__pnm_test(stbi__context *s);
833 static void    *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
834 static int      stbi__pnm_info(stbi__context *s, int *x, int *y, int *comp);
835 #endif
836 
837 // this is not threadsafe
838 static const char *stbi__g_failure_reason;
839 
stbi_failure_reason(void)840 STBIDEF const char *stbi_failure_reason(void)
841 {
842    return stbi__g_failure_reason;
843 }
844 
stbi__err(const char * str)845 static int stbi__err(const char *str)
846 {
847    stbi__g_failure_reason = str;
848    return 0;
849 }
850 
stbi__malloc(size_t size)851 static void *stbi__malloc(size_t size)
852 {
853     return STBI_MALLOC(size);
854 }
855 
856 // stb_image uses ints pervasively, including for offset calculations.
857 // therefore the largest decoded image size we can support with the
858 // current code, even on 64-bit targets, is INT_MAX. this is not a
859 // significant limitation for the intended use case.
860 //
861 // we do, however, need to make sure our size calculations don't
862 // overflow. hence a few helper functions for size calculations that
863 // multiply integers together, making sure that they're non-negative
864 // and no overflow occurs.
865 
866 // return 1 if the sum is valid, 0 on overflow.
867 // negative terms are considered invalid.
stbi__addsizes_valid(int a,int b)868 static int stbi__addsizes_valid(int a, int b)
869 {
870    if (b < 0) return 0;
871    // now 0 <= b <= INT_MAX, hence also
872    // 0 <= INT_MAX - b <= INTMAX.
873    // And "a + b <= INT_MAX" (which might overflow) is the
874    // same as a <= INT_MAX - b (no overflow)
875    return a <= INT_MAX - b;
876 }
877 
878 // returns 1 if the product is valid, 0 on overflow.
879 // negative factors are considered invalid.
stbi__mul2sizes_valid(int a,int b)880 static int stbi__mul2sizes_valid(int a, int b)
881 {
882    if (a < 0 || b < 0) return 0;
883    if (b == 0) return 1; // mul-by-0 is always safe
884    // portable way to check for no overflows in a*b
885    return a <= INT_MAX/b;
886 }
887 
888 // returns 1 if "a*b + add" has no negative terms/factors and doesn't overflow
stbi__mad2sizes_valid(int a,int b,int add)889 static int stbi__mad2sizes_valid(int a, int b, int add)
890 {
891    return stbi__mul2sizes_valid(a, b) && stbi__addsizes_valid(a*b, add);
892 }
893 
894 // returns 1 if "a*b*c + add" has no negative terms/factors and doesn't overflow
stbi__mad3sizes_valid(int a,int b,int c,int add)895 static int stbi__mad3sizes_valid(int a, int b, int c, int add)
896 {
897    return stbi__mul2sizes_valid(a, b) && stbi__mul2sizes_valid(a*b, c) &&
898       stbi__addsizes_valid(a*b*c, add);
899 }
900 
901 // returns 1 if "a*b*c*d + add" has no negative terms/factors and doesn't overflow
stbi__mad4sizes_valid(int a,int b,int c,int d,int add)902 static int stbi__mad4sizes_valid(int a, int b, int c, int d, int add)
903 {
904    return stbi__mul2sizes_valid(a, b) && stbi__mul2sizes_valid(a*b, c) &&
905       stbi__mul2sizes_valid(a*b*c, d) && stbi__addsizes_valid(a*b*c*d, add);
906 }
907 
908 // mallocs with size overflow checking
stbi__malloc_mad2(int a,int b,int add)909 static void *stbi__malloc_mad2(int a, int b, int add)
910 {
911    if (!stbi__mad2sizes_valid(a, b, add)) return NULL;
912    return stbi__malloc(a*b + add);
913 }
914 
stbi__malloc_mad3(int a,int b,int c,int add)915 static void *stbi__malloc_mad3(int a, int b, int c, int add)
916 {
917    if (!stbi__mad3sizes_valid(a, b, c, add)) return NULL;
918    return stbi__malloc(a*b*c + add);
919 }
920 
stbi__malloc_mad4(int a,int b,int c,int d,int add)921 static void *stbi__malloc_mad4(int a, int b, int c, int d, int add)
922 {
923    if (!stbi__mad4sizes_valid(a, b, c, d, add)) return NULL;
924    return stbi__malloc(a*b*c*d + add);
925 }
926 
927 // stbi__err - error
928 // stbi__errpf - error returning pointer to float
929 // stbi__errpuc - error returning pointer to unsigned char
930 
931 #ifdef STBI_NO_FAILURE_STRINGS
932    #define stbi__err(x,y)  0
933 #elif defined(STBI_FAILURE_USERMSG)
934    #define stbi__err(x,y)  stbi__err(y)
935 #else
936    #define stbi__err(x,y)  stbi__err(x)
937 #endif
938 
939 #define stbi__errpf(x,y)   ((float *)(size_t) (stbi__err(x,y)?NULL:NULL))
940 #define stbi__errpuc(x,y)  ((unsigned char *)(size_t) (stbi__err(x,y)?NULL:NULL))
941 
stbi_image_free(void * retval_from_stbi_load)942 STBIDEF void stbi_image_free(void *retval_from_stbi_load)
943 {
944    STBI_FREE(retval_from_stbi_load);
945 }
946 
947 #ifndef STBI_NO_LINEAR
948 static float   *stbi__ldr_to_hdr(stbi_uc *data, int x, int y, int comp);
949 #endif
950 
951 #ifndef STBI_NO_HDR
952 static stbi_uc *stbi__hdr_to_ldr(float   *data, int x, int y, int comp);
953 #endif
954 
955 static int stbi__vertically_flip_on_load = 0;
956 
stbi_set_flip_vertically_on_load(int flag_true_if_should_flip)957 STBIDEF void stbi_set_flip_vertically_on_load(int flag_true_if_should_flip)
958 {
959     stbi__vertically_flip_on_load = flag_true_if_should_flip;
960 }
961 
stbi__load_main(stbi__context * s,int * x,int * y,int * comp,int req_comp,stbi__result_info * ri,int bpc)962 static void *stbi__load_main(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc)
963 {
964    memset(ri, 0, sizeof(*ri)); // make sure it's initialized if we add new fields
965    ri->bits_per_channel = 8; // default is 8 so most paths don't have to be changed
966    ri->channel_order = STBI_ORDER_RGB; // all current input & output are this, but this is here so we can add BGR order
967    ri->num_channels = 0;
968 
969    #ifndef STBI_NO_JPEG
970    if (stbi__jpeg_test(s)) return stbi__jpeg_load(s,x,y,comp,req_comp, ri);
971    #endif
972    #ifndef STBI_NO_PNG
973    if (stbi__png_test(s))  return stbi__png_load(s,x,y,comp,req_comp, ri);
974    #endif
975    #ifndef STBI_NO_BMP
976    if (stbi__bmp_test(s))  return stbi__bmp_load(s,x,y,comp,req_comp, ri);
977    #endif
978    #ifndef STBI_NO_GIF
979    if (stbi__gif_test(s))  return stbi__gif_load(s,x,y,comp,req_comp, ri);
980    #endif
981    #ifndef STBI_NO_PSD
982    if (stbi__psd_test(s))  return stbi__psd_load(s,x,y,comp,req_comp, ri, bpc);
983    #endif
984    #ifndef STBI_NO_PIC
985    if (stbi__pic_test(s))  return stbi__pic_load(s,x,y,comp,req_comp, ri);
986    #endif
987    #ifndef STBI_NO_PNM
988    if (stbi__pnm_test(s))  return stbi__pnm_load(s,x,y,comp,req_comp, ri);
989    #endif
990 
991    #ifndef STBI_NO_HDR
992    if (stbi__hdr_test(s)) {
993       float *hdr = stbi__hdr_load(s, x,y,comp,req_comp, ri);
994       return stbi__hdr_to_ldr(hdr, *x, *y, req_comp ? req_comp : *comp);
995    }
996    #endif
997 
998    #ifndef STBI_NO_TGA
999    // test tga last because it's a crappy test!
1000    if (stbi__tga_test(s))
1001       return stbi__tga_load(s,x,y,comp,req_comp, ri);
1002    #endif
1003 
1004    return stbi__errpuc("unknown image type", "Image not of any known type, or corrupt");
1005 }
1006 
stbi__convert_16_to_8(stbi__uint16 * orig,int w,int h,int channels)1007 static stbi_uc *stbi__convert_16_to_8(stbi__uint16 *orig, int w, int h, int channels)
1008 {
1009    int i;
1010    int img_len = w * h * channels;
1011    stbi_uc *reduced;
1012 
1013    reduced = (stbi_uc *) stbi__malloc(img_len);
1014    if (reduced == NULL) return stbi__errpuc("outofmem", "Out of memory");
1015 
1016    for (i = 0; i < img_len; ++i)
1017       reduced[i] = (stbi_uc)((orig[i] >> 8) & 0xFF); // top half of each byte is sufficient approx of 16->8 bit scaling
1018 
1019    STBI_FREE(orig);
1020    return reduced;
1021 }
1022 
stbi__convert_8_to_16(stbi_uc * orig,int w,int h,int channels)1023 static stbi__uint16 *stbi__convert_8_to_16(stbi_uc *orig, int w, int h, int channels)
1024 {
1025    int i;
1026    int img_len = w * h * channels;
1027    stbi__uint16 *enlarged;
1028 
1029    enlarged = (stbi__uint16 *) stbi__malloc(img_len*2);
1030    if (enlarged == NULL) return (stbi__uint16 *) stbi__errpuc("outofmem", "Out of memory");
1031 
1032    for (i = 0; i < img_len; ++i)
1033       enlarged[i] = (stbi__uint16)((orig[i] << 8) + orig[i]); // replicate to high and low byte, maps 0->0, 255->0xffff
1034 
1035    STBI_FREE(orig);
1036    return enlarged;
1037 }
1038 
stbi__vertical_flip(void * image,int w,int h,int bytes_per_pixel)1039 static void stbi__vertical_flip(void *image, int w, int h, int bytes_per_pixel)
1040 {
1041    int row;
1042    size_t bytes_per_row = (size_t)w * bytes_per_pixel;
1043    stbi_uc temp[2048];
1044    stbi_uc *bytes = (stbi_uc *)image;
1045 
1046    for (row = 0; row < (h>>1); row++) {
1047       stbi_uc *row0 = bytes + row*bytes_per_row;
1048       stbi_uc *row1 = bytes + (h - row - 1)*bytes_per_row;
1049       // swap row0 with row1
1050       size_t bytes_left = bytes_per_row;
1051       while (bytes_left) {
1052          size_t bytes_copy = (bytes_left < sizeof(temp)) ? bytes_left : sizeof(temp);
1053          memcpy(temp, row0, bytes_copy);
1054          memcpy(row0, row1, bytes_copy);
1055          memcpy(row1, temp, bytes_copy);
1056          row0 += bytes_copy;
1057          row1 += bytes_copy;
1058          bytes_left -= bytes_copy;
1059       }
1060    }
1061 }
1062 
stbi__load_and_postprocess_8bit(stbi__context * s,int * x,int * y,int * comp,int req_comp)1063 static unsigned char *stbi__load_and_postprocess_8bit(stbi__context *s, int *x, int *y, int *comp, int req_comp)
1064 {
1065    stbi__result_info ri;
1066    void *result = stbi__load_main(s, x, y, comp, req_comp, &ri, 8);
1067 
1068    if (result == NULL)
1069       return NULL;
1070 
1071    if (ri.bits_per_channel != 8) {
1072       STBI_ASSERT(ri.bits_per_channel == 16);
1073       result = stbi__convert_16_to_8((stbi__uint16 *) result, *x, *y, req_comp == 0 ? *comp : req_comp);
1074       ri.bits_per_channel = 8;
1075    }
1076 
1077    // @TODO: move stbi__convert_format to here
1078 
1079    if (stbi__vertically_flip_on_load) {
1080       int channels = req_comp ? req_comp : *comp;
1081       stbi__vertical_flip(result, *x, *y, channels * sizeof(stbi_uc));
1082    }
1083 
1084    return (unsigned char *) result;
1085 }
1086 
stbi__load_and_postprocess_16bit(stbi__context * s,int * x,int * y,int * comp,int req_comp)1087 static stbi__uint16 *stbi__load_and_postprocess_16bit(stbi__context *s, int *x, int *y, int *comp, int req_comp)
1088 {
1089    stbi__result_info ri;
1090    void *result = stbi__load_main(s, x, y, comp, req_comp, &ri, 16);
1091 
1092    if (result == NULL)
1093       return NULL;
1094 
1095    if (ri.bits_per_channel != 16) {
1096       STBI_ASSERT(ri.bits_per_channel == 8);
1097       result = stbi__convert_8_to_16((stbi_uc *) result, *x, *y, req_comp == 0 ? *comp : req_comp);
1098       ri.bits_per_channel = 16;
1099    }
1100 
1101    // @TODO: move stbi__convert_format16 to here
1102    // @TODO: special case RGB-to-Y (and RGBA-to-YA) for 8-bit-to-16-bit case to keep more precision
1103 
1104    if (stbi__vertically_flip_on_load) {
1105       int channels = req_comp ? req_comp : *comp;
1106       stbi__vertical_flip(result, *x, *y, channels * sizeof(stbi__uint16));
1107    }
1108 
1109    return (stbi__uint16 *) result;
1110 }
1111 
1112 #ifndef STBI_NO_HDR
stbi__float_postprocess(float * result,int * x,int * y,int * comp,int req_comp)1113 static void stbi__float_postprocess(float *result, int *x, int *y, int *comp, int req_comp)
1114 {
1115    if (stbi__vertically_flip_on_load && result != NULL) {
1116       int channels = req_comp ? req_comp : *comp;
1117       stbi__vertical_flip(result, *x, *y, channels * sizeof(float));
1118    }
1119 }
1120 #endif
1121 
1122 #ifndef STBI_NO_STDIO
1123 
stbi__fopen(char const * filename,char const * mode)1124 static FILE *stbi__fopen(char const *filename, char const *mode)
1125 {
1126    FILE *f;
1127 #if defined(_MSC_VER) && _MSC_VER >= 1400
1128    if (0 != fopen_s(&f, filename, mode))
1129       f=0;
1130 #else
1131    f = fopen(filename, mode);
1132 #endif
1133    return f;
1134 }
1135 
1136 
stbi_load(char const * filename,int * x,int * y,int * comp,int req_comp)1137 STBIDEF stbi_uc *stbi_load(char const *filename, int *x, int *y, int *comp, int req_comp)
1138 {
1139    FILE *f = stbi__fopen(filename, "rb");
1140    unsigned char *result;
1141    if (!f) return stbi__errpuc("can't fopen", "Unable to open file");
1142    result = stbi_load_from_file(f,x,y,comp,req_comp);
1143    fclose(f);
1144    return result;
1145 }
1146 
stbi_load_from_file(FILE * f,int * x,int * y,int * comp,int req_comp)1147 STBIDEF stbi_uc *stbi_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
1148 {
1149    unsigned char *result;
1150    stbi__context s;
1151    stbi__start_file(&s,f);
1152    result = stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp);
1153    if (result) {
1154       // need to 'unget' all the characters in the IO buffer
1155       fseek(f, - (int) (s.img_buffer_end - s.img_buffer), SEEK_CUR);
1156    }
1157    return result;
1158 }
1159 
stbi_load_from_file_16(FILE * f,int * x,int * y,int * comp,int req_comp)1160 STBIDEF stbi__uint16 *stbi_load_from_file_16(FILE *f, int *x, int *y, int *comp, int req_comp)
1161 {
1162    stbi__uint16 *result;
1163    stbi__context s;
1164    stbi__start_file(&s,f);
1165    result = stbi__load_and_postprocess_16bit(&s,x,y,comp,req_comp);
1166    if (result) {
1167       // need to 'unget' all the characters in the IO buffer
1168       fseek(f, - (int) (s.img_buffer_end - s.img_buffer), SEEK_CUR);
1169    }
1170    return result;
1171 }
1172 
stbi_load_16(char const * filename,int * x,int * y,int * comp,int req_comp)1173 STBIDEF stbi_us *stbi_load_16(char const *filename, int *x, int *y, int *comp, int req_comp)
1174 {
1175    FILE *f = stbi__fopen(filename, "rb");
1176    stbi__uint16 *result;
1177    if (!f) return (stbi_us *) stbi__errpuc("can't fopen", "Unable to open file");
1178    result = stbi_load_from_file_16(f,x,y,comp,req_comp);
1179    fclose(f);
1180    return result;
1181 }
1182 
1183 
1184 #endif //!STBI_NO_STDIO
1185 
stbi_load_16_from_memory(stbi_uc const * buffer,int len,int * x,int * y,int * channels_in_file,int desired_channels)1186 STBIDEF stbi_us *stbi_load_16_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels)
1187 {
1188    stbi__context s;
1189    stbi__start_mem(&s,buffer,len);
1190    return stbi__load_and_postprocess_16bit(&s,x,y,channels_in_file,desired_channels);
1191 }
1192 
stbi_load_16_from_callbacks(stbi_io_callbacks const * clbk,void * user,int * x,int * y,int * channels_in_file,int desired_channels)1193 STBIDEF stbi_us *stbi_load_16_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *channels_in_file, int desired_channels)
1194 {
1195    stbi__context s;
1196    stbi__start_callbacks(&s, (stbi_io_callbacks *)clbk, user);
1197    return stbi__load_and_postprocess_16bit(&s,x,y,channels_in_file,desired_channels);
1198 }
1199 
stbi_load_from_memory(stbi_uc const * buffer,int len,int * x,int * y,int * comp,int req_comp)1200 STBIDEF stbi_uc *stbi_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
1201 {
1202    stbi__context s;
1203    stbi__start_mem(&s,buffer,len);
1204    return stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp);
1205 }
1206 
stbi_load_from_callbacks(stbi_io_callbacks const * clbk,void * user,int * x,int * y,int * comp,int req_comp)1207 STBIDEF stbi_uc *stbi_load_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp)
1208 {
1209    stbi__context s;
1210    stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user);
1211    return stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp);
1212 }
1213 
1214 #ifndef STBI_NO_LINEAR
stbi__loadf_main(stbi__context * s,int * x,int * y,int * comp,int req_comp)1215 static float *stbi__loadf_main(stbi__context *s, int *x, int *y, int *comp, int req_comp)
1216 {
1217    unsigned char *data;
1218    #ifndef STBI_NO_HDR
1219    if (stbi__hdr_test(s)) {
1220       stbi__result_info ri;
1221       float *hdr_data = stbi__hdr_load(s,x,y,comp,req_comp, &ri);
1222       if (hdr_data)
1223          stbi__float_postprocess(hdr_data,x,y,comp,req_comp);
1224       return hdr_data;
1225    }
1226    #endif
1227    data = stbi__load_and_postprocess_8bit(s, x, y, comp, req_comp);
1228    if (data)
1229       return stbi__ldr_to_hdr(data, *x, *y, req_comp ? req_comp : *comp);
1230    return stbi__errpf("unknown image type", "Image not of any known type, or corrupt");
1231 }
1232 
stbi_loadf_from_memory(stbi_uc const * buffer,int len,int * x,int * y,int * comp,int req_comp)1233 STBIDEF float *stbi_loadf_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
1234 {
1235    stbi__context s;
1236    stbi__start_mem(&s,buffer,len);
1237    return stbi__loadf_main(&s,x,y,comp,req_comp);
1238 }
1239 
stbi_loadf_from_callbacks(stbi_io_callbacks const * clbk,void * user,int * x,int * y,int * comp,int req_comp)1240 STBIDEF float *stbi_loadf_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp)
1241 {
1242    stbi__context s;
1243    stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user);
1244    return stbi__loadf_main(&s,x,y,comp,req_comp);
1245 }
1246 
1247 #ifndef STBI_NO_STDIO
stbi_loadf(char const * filename,int * x,int * y,int * comp,int req_comp)1248 STBIDEF float *stbi_loadf(char const *filename, int *x, int *y, int *comp, int req_comp)
1249 {
1250    float *result;
1251    FILE *f = stbi__fopen(filename, "rb");
1252    if (!f) return stbi__errpf("can't fopen", "Unable to open file");
1253    result = stbi_loadf_from_file(f,x,y,comp,req_comp);
1254    fclose(f);
1255    return result;
1256 }
1257 
stbi_loadf_from_file(FILE * f,int * x,int * y,int * comp,int req_comp)1258 STBIDEF float *stbi_loadf_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
1259 {
1260    stbi__context s;
1261    stbi__start_file(&s,f);
1262    return stbi__loadf_main(&s,x,y,comp,req_comp);
1263 }
1264 #endif // !STBI_NO_STDIO
1265 
1266 #endif // !STBI_NO_LINEAR
1267 
1268 // these is-hdr-or-not is defined independent of whether STBI_NO_LINEAR is
1269 // defined, for API simplicity; if STBI_NO_LINEAR is defined, it always
1270 // reports false!
1271 
stbi_is_hdr_from_memory(stbi_uc const * buffer,int len)1272 STBIDEF int stbi_is_hdr_from_memory(stbi_uc const *buffer, int len)
1273 {
1274    #ifndef STBI_NO_HDR
1275    stbi__context s;
1276    stbi__start_mem(&s,buffer,len);
1277    return stbi__hdr_test(&s);
1278    #else
1279    STBI_NOTUSED(buffer);
1280    STBI_NOTUSED(len);
1281    return 0;
1282    #endif
1283 }
1284 
1285 #ifndef STBI_NO_STDIO
stbi_is_hdr(char const * filename)1286 STBIDEF int      stbi_is_hdr          (char const *filename)
1287 {
1288    FILE *f = stbi__fopen(filename, "rb");
1289    int result=0;
1290    if (f) {
1291       result = stbi_is_hdr_from_file(f);
1292       fclose(f);
1293    }
1294    return result;
1295 }
1296 
stbi_is_hdr_from_file(FILE * f)1297 STBIDEF int      stbi_is_hdr_from_file(FILE *f)
1298 {
1299    #ifndef STBI_NO_HDR
1300    stbi__context s;
1301    stbi__start_file(&s,f);
1302    return stbi__hdr_test(&s);
1303    #else
1304    STBI_NOTUSED(f);
1305    return 0;
1306    #endif
1307 }
1308 #endif // !STBI_NO_STDIO
1309 
stbi_is_hdr_from_callbacks(stbi_io_callbacks const * clbk,void * user)1310 STBIDEF int      stbi_is_hdr_from_callbacks(stbi_io_callbacks const *clbk, void *user)
1311 {
1312    #ifndef STBI_NO_HDR
1313    stbi__context s;
1314    stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user);
1315    return stbi__hdr_test(&s);
1316    #else
1317    STBI_NOTUSED(clbk);
1318    STBI_NOTUSED(user);
1319    return 0;
1320    #endif
1321 }
1322 
1323 #ifndef STBI_NO_LINEAR
1324 static float stbi__l2h_gamma=2.2f, stbi__l2h_scale=1.0f;
1325 
stbi_ldr_to_hdr_gamma(float gamma)1326 STBIDEF void   stbi_ldr_to_hdr_gamma(float gamma) { stbi__l2h_gamma = gamma; }
stbi_ldr_to_hdr_scale(float scale)1327 STBIDEF void   stbi_ldr_to_hdr_scale(float scale) { stbi__l2h_scale = scale; }
1328 #endif
1329 
1330 static float stbi__h2l_gamma_i=1.0f/2.2f, stbi__h2l_scale_i=1.0f;
1331 
stbi_hdr_to_ldr_gamma(float gamma)1332 STBIDEF void   stbi_hdr_to_ldr_gamma(float gamma) { stbi__h2l_gamma_i = 1/gamma; }
stbi_hdr_to_ldr_scale(float scale)1333 STBIDEF void   stbi_hdr_to_ldr_scale(float scale) { stbi__h2l_scale_i = 1/scale; }
1334 
1335 
1336 //////////////////////////////////////////////////////////////////////////////
1337 //
1338 // Common code used by all image loaders
1339 //
1340 
1341 enum
1342 {
1343    STBI__SCAN_load=0,
1344    STBI__SCAN_type,
1345    STBI__SCAN_header
1346 };
1347 
stbi__refill_buffer(stbi__context * s)1348 static void stbi__refill_buffer(stbi__context *s)
1349 {
1350    int n = (s->io.read)(s->io_user_data,(char*)s->buffer_start,s->buflen);
1351    if (n == 0) {
1352       // at end of file, treat same as if from memory, but need to handle case
1353       // where s->img_buffer isn't pointing to safe memory, e.g. 0-byte file
1354       s->read_from_callbacks = 0;
1355       s->img_buffer = s->buffer_start;
1356       s->img_buffer_end = s->buffer_start+1;
1357       *s->img_buffer = 0;
1358    } else {
1359       s->img_buffer = s->buffer_start;
1360       s->img_buffer_end = s->buffer_start + n;
1361    }
1362 }
1363 
stbi__get8(stbi__context * s)1364 stbi_inline static stbi_uc stbi__get8(stbi__context *s)
1365 {
1366    if (s->img_buffer < s->img_buffer_end)
1367       return *s->img_buffer++;
1368    if (s->read_from_callbacks) {
1369       stbi__refill_buffer(s);
1370       return *s->img_buffer++;
1371    }
1372    return 0;
1373 }
1374 
stbi__at_eof(stbi__context * s)1375 stbi_inline static int stbi__at_eof(stbi__context *s)
1376 {
1377    if (s->io.read) {
1378       if (!(s->io.eof)(s->io_user_data)) return 0;
1379       // if feof() is true, check if buffer = end
1380       // special case: we've only got the special 0 character at the end
1381       if (s->read_from_callbacks == 0) return 1;
1382    }
1383 
1384    return s->img_buffer >= s->img_buffer_end;
1385 }
1386 
stbi__skip(stbi__context * s,int n)1387 static void stbi__skip(stbi__context *s, int n)
1388 {
1389    if (n < 0) {
1390       s->img_buffer = s->img_buffer_end;
1391       return;
1392    }
1393    if (s->io.read) {
1394       int blen = (int) (s->img_buffer_end - s->img_buffer);
1395       if (blen < n) {
1396          s->img_buffer = s->img_buffer_end;
1397          (s->io.skip)(s->io_user_data, n - blen);
1398          return;
1399       }
1400    }
1401    s->img_buffer += n;
1402 }
1403 
stbi__getn(stbi__context * s,stbi_uc * buffer,int n)1404 static int stbi__getn(stbi__context *s, stbi_uc *buffer, int n)
1405 {
1406    if (s->io.read) {
1407       int blen = (int) (s->img_buffer_end - s->img_buffer);
1408       if (blen < n) {
1409          int res, count;
1410 
1411          memcpy(buffer, s->img_buffer, blen);
1412 
1413          count = (s->io.read)(s->io_user_data, (char*) buffer + blen, n - blen);
1414          res = (count == (n-blen));
1415          s->img_buffer = s->img_buffer_end;
1416          return res;
1417       }
1418    }
1419 
1420    if (s->img_buffer+n <= s->img_buffer_end) {
1421       memcpy(buffer, s->img_buffer, n);
1422       s->img_buffer += n;
1423       return 1;
1424    } else
1425       return 0;
1426 }
1427 
stbi__get16be(stbi__context * s)1428 static int stbi__get16be(stbi__context *s)
1429 {
1430    int z = stbi__get8(s);
1431    return (z << 8) + stbi__get8(s);
1432 }
1433 
stbi__get32be(stbi__context * s)1434 static stbi__uint32 stbi__get32be(stbi__context *s)
1435 {
1436    stbi__uint32 z = stbi__get16be(s);
1437    return (z << 16) + stbi__get16be(s);
1438 }
1439 
1440 #if defined(STBI_NO_BMP) && defined(STBI_NO_TGA) && defined(STBI_NO_GIF)
1441 // nothing
1442 #else
stbi__get16le(stbi__context * s)1443 static int stbi__get16le(stbi__context *s)
1444 {
1445    int z = stbi__get8(s);
1446    return z + (stbi__get8(s) << 8);
1447 }
1448 #endif
1449 
1450 #ifndef STBI_NO_BMP
stbi__get32le(stbi__context * s)1451 static stbi__uint32 stbi__get32le(stbi__context *s)
1452 {
1453    stbi__uint32 z = stbi__get16le(s);
1454    return z + (stbi__get16le(s) << 16);
1455 }
1456 #endif
1457 
1458 #define STBI__BYTECAST(x)  ((stbi_uc) ((x) & 255))  // truncate int to byte without warnings
1459 
1460 
1461 //////////////////////////////////////////////////////////////////////////////
1462 //
1463 //  generic converter from built-in img_n to req_comp
1464 //    individual types do this automatically as much as possible (e.g. jpeg
1465 //    does all cases internally since it needs to colorspace convert anyway,
1466 //    and it never has alpha, so very few cases ). png can automatically
1467 //    interleave an alpha=255 channel, but falls back to this for other cases
1468 //
1469 //  assume data buffer is malloced, so malloc a new one and free that one
1470 //  only failure mode is malloc failing
1471 
stbi__compute_y(int r,int g,int b)1472 static stbi_uc stbi__compute_y(int r, int g, int b)
1473 {
1474    return (stbi_uc) (((r*77) + (g*150) +  (29*b)) >> 8);
1475 }
1476 
stbi__convert_format(unsigned char * data,int img_n,int req_comp,unsigned int x,unsigned int y)1477 static unsigned char *stbi__convert_format(unsigned char *data, int img_n, int req_comp, unsigned int x, unsigned int y)
1478 {
1479    int i,j;
1480    unsigned char *good;
1481 
1482    if (req_comp == img_n) return data;
1483    STBI_ASSERT(req_comp >= 1 && req_comp <= 4);
1484 
1485    good = (unsigned char *) stbi__malloc_mad3(req_comp, x, y, 0);
1486    if (good == NULL) {
1487       STBI_FREE(data);
1488       return stbi__errpuc("outofmem", "Out of memory");
1489    }
1490 
1491    for (j=0; j < (int) y; ++j) {
1492       unsigned char *src  = data + j * x * img_n   ;
1493       unsigned char *dest = good + j * x * req_comp;
1494 
1495       #define STBI__COMBO(a,b)  ((a)*8+(b))
1496       #define STBI__CASE(a,b)   case STBI__COMBO(a,b): for(i=x-1; i >= 0; --i, src += a, dest += b)
1497       // convert source image with img_n components to one with req_comp components;
1498       // avoid switch per pixel, so use switch per scanline and massive macros
1499       switch (STBI__COMBO(img_n, req_comp)) {
1500          STBI__CASE(1,2) { dest[0]=src[0], dest[1]=255;                                     } break;
1501          STBI__CASE(1,3) { dest[0]=dest[1]=dest[2]=src[0];                                  } break;
1502          STBI__CASE(1,4) { dest[0]=dest[1]=dest[2]=src[0], dest[3]=255;                     } break;
1503          STBI__CASE(2,1) { dest[0]=src[0];                                                  } break;
1504          STBI__CASE(2,3) { dest[0]=dest[1]=dest[2]=src[0];                                  } break;
1505          STBI__CASE(2,4) { dest[0]=dest[1]=dest[2]=src[0], dest[3]=src[1];                  } break;
1506          STBI__CASE(3,4) { dest[0]=src[0],dest[1]=src[1],dest[2]=src[2],dest[3]=255;        } break;
1507          STBI__CASE(3,1) { dest[0]=stbi__compute_y(src[0],src[1],src[2]);                   } break;
1508          STBI__CASE(3,2) { dest[0]=stbi__compute_y(src[0],src[1],src[2]), dest[1] = 255;    } break;
1509          STBI__CASE(4,1) { dest[0]=stbi__compute_y(src[0],src[1],src[2]);                   } break;
1510          STBI__CASE(4,2) { dest[0]=stbi__compute_y(src[0],src[1],src[2]), dest[1] = src[3]; } break;
1511          STBI__CASE(4,3) { dest[0]=src[0],dest[1]=src[1],dest[2]=src[2];                    } break;
1512          default: STBI_ASSERT(0);
1513       }
1514       #undef STBI__CASE
1515    }
1516 
1517    STBI_FREE(data);
1518    return good;
1519 }
1520 
stbi__compute_y_16(int r,int g,int b)1521 static stbi__uint16 stbi__compute_y_16(int r, int g, int b)
1522 {
1523    return (stbi__uint16) (((r*77) + (g*150) +  (29*b)) >> 8);
1524 }
1525 
stbi__convert_format16(stbi__uint16 * data,int img_n,int req_comp,unsigned int x,unsigned int y)1526 static stbi__uint16 *stbi__convert_format16(stbi__uint16 *data, int img_n, int req_comp, unsigned int x, unsigned int y)
1527 {
1528    int i,j;
1529    stbi__uint16 *good;
1530 
1531    if (req_comp == img_n) return data;
1532    STBI_ASSERT(req_comp >= 1 && req_comp <= 4);
1533 
1534    good = (stbi__uint16 *) stbi__malloc(req_comp * x * y * 2);
1535    if (good == NULL) {
1536       STBI_FREE(data);
1537       return (stbi__uint16 *) stbi__errpuc("outofmem", "Out of memory");
1538    }
1539 
1540    for (j=0; j < (int) y; ++j) {
1541       stbi__uint16 *src  = data + j * x * img_n   ;
1542       stbi__uint16 *dest = good + j * x * req_comp;
1543 
1544       #define STBI__COMBO(a,b)  ((a)*8+(b))
1545       #define STBI__CASE(a,b)   case STBI__COMBO(a,b): for(i=x-1; i >= 0; --i, src += a, dest += b)
1546       // convert source image with img_n components to one with req_comp components;
1547       // avoid switch per pixel, so use switch per scanline and massive macros
1548       switch (STBI__COMBO(img_n, req_comp)) {
1549          STBI__CASE(1,2) { dest[0]=src[0], dest[1]=0xffff;                                     } break;
1550          STBI__CASE(1,3) { dest[0]=dest[1]=dest[2]=src[0];                                     } break;
1551          STBI__CASE(1,4) { dest[0]=dest[1]=dest[2]=src[0], dest[3]=0xffff;                     } break;
1552          STBI__CASE(2,1) { dest[0]=src[0];                                                     } break;
1553          STBI__CASE(2,3) { dest[0]=dest[1]=dest[2]=src[0];                                     } break;
1554          STBI__CASE(2,4) { dest[0]=dest[1]=dest[2]=src[0], dest[3]=src[1];                     } break;
1555          STBI__CASE(3,4) { dest[0]=src[0],dest[1]=src[1],dest[2]=src[2],dest[3]=0xffff;        } break;
1556          STBI__CASE(3,1) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]);                   } break;
1557          STBI__CASE(3,2) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]), dest[1] = 0xffff; } break;
1558          STBI__CASE(4,1) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]);                   } break;
1559          STBI__CASE(4,2) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]), dest[1] = src[3]; } break;
1560          STBI__CASE(4,3) { dest[0]=src[0],dest[1]=src[1],dest[2]=src[2];                       } break;
1561          default: STBI_ASSERT(0);
1562       }
1563       #undef STBI__CASE
1564    }
1565 
1566    STBI_FREE(data);
1567    return good;
1568 }
1569 
1570 #ifndef STBI_NO_LINEAR
stbi__ldr_to_hdr(stbi_uc * data,int x,int y,int comp)1571 static float   *stbi__ldr_to_hdr(stbi_uc *data, int x, int y, int comp)
1572 {
1573    int i,k,n;
1574    float *output;
1575    if (!data) return NULL;
1576    output = (float *) stbi__malloc_mad4(x, y, comp, sizeof(float), 0);
1577    if (output == NULL) { STBI_FREE(data); return stbi__errpf("outofmem", "Out of memory"); }
1578    // compute number of non-alpha components
1579    if (comp & 1) n = comp; else n = comp-1;
1580    for (i=0; i < x*y; ++i) {
1581       for (k=0; k < n; ++k) {
1582          output[i*comp + k] = (float) (pow(data[i*comp+k]/255.0f, stbi__l2h_gamma) * stbi__l2h_scale);
1583       }
1584       if (k < comp) output[i*comp + k] = data[i*comp+k]/255.0f;
1585    }
1586    STBI_FREE(data);
1587    return output;
1588 }
1589 #endif
1590 
1591 #ifndef STBI_NO_HDR
1592 #define stbi__float2int(x)   ((int) (x))
stbi__hdr_to_ldr(float * data,int x,int y,int comp)1593 static stbi_uc *stbi__hdr_to_ldr(float   *data, int x, int y, int comp)
1594 {
1595    int i,k,n;
1596    stbi_uc *output;
1597    if (!data) return NULL;
1598    output = (stbi_uc *) stbi__malloc_mad3(x, y, comp, 0);
1599    if (output == NULL) { STBI_FREE(data); return stbi__errpuc("outofmem", "Out of memory"); }
1600    // compute number of non-alpha components
1601    if (comp & 1) n = comp; else n = comp-1;
1602    for (i=0; i < x*y; ++i) {
1603       for (k=0; k < n; ++k) {
1604          float z = (float) pow(data[i*comp+k]*stbi__h2l_scale_i, stbi__h2l_gamma_i) * 255 + 0.5f;
1605          if (z < 0) z = 0;
1606          if (z > 255) z = 255;
1607          output[i*comp + k] = (stbi_uc) stbi__float2int(z);
1608       }
1609       if (k < comp) {
1610          float z = data[i*comp+k] * 255 + 0.5f;
1611          if (z < 0) z = 0;
1612          if (z > 255) z = 255;
1613          output[i*comp + k] = (stbi_uc) stbi__float2int(z);
1614       }
1615    }
1616    STBI_FREE(data);
1617    return output;
1618 }
1619 #endif
1620 
1621 //////////////////////////////////////////////////////////////////////////////
1622 //
1623 //  "baseline" JPEG/JFIF decoder
1624 //
1625 //    simple implementation
1626 //      - doesn't support delayed output of y-dimension
1627 //      - simple interface (only one output format: 8-bit interleaved RGB)
1628 //      - doesn't try to recover corrupt jpegs
1629 //      - doesn't allow partial loading, loading multiple at once
1630 //      - still fast on x86 (copying globals into locals doesn't help x86)
1631 //      - allocates lots of intermediate memory (full size of all components)
1632 //        - non-interleaved case requires this anyway
1633 //        - allows good upsampling (see next)
1634 //    high-quality
1635 //      - upsampled channels are bilinearly interpolated, even across blocks
1636 //      - quality integer IDCT derived from IJG's 'slow'
1637 //    performance
1638 //      - fast huffman; reasonable integer IDCT
1639 //      - some SIMD kernels for common paths on targets with SSE2/NEON
1640 //      - uses a lot of intermediate memory, could cache poorly
1641 
1642 #ifndef STBI_NO_JPEG
1643 
1644 // huffman decoding acceleration
1645 #define FAST_BITS   9  // larger handles more cases; smaller stomps less cache
1646 
1647 typedef struct
1648 {
1649    stbi_uc  fast[1 << FAST_BITS];
1650    // weirdly, repacking this into AoS is a 10% speed loss, instead of a win
1651    stbi__uint16 code[256];
1652    stbi_uc  values[256];
1653    stbi_uc  size[257];
1654    unsigned int maxcode[18];
1655    int    delta[17];   // old 'firstsymbol' - old 'firstcode'
1656 } stbi__huffman;
1657 
1658 typedef struct
1659 {
1660    stbi__context *s;
1661    stbi__huffman huff_dc[4];
1662    stbi__huffman huff_ac[4];
1663    stbi__uint16 dequant[4][64];
1664    stbi__int16 fast_ac[4][1 << FAST_BITS];
1665 
1666 // sizes for components, interleaved MCUs
1667    int img_h_max, img_v_max;
1668    int img_mcu_x, img_mcu_y;
1669    int img_mcu_w, img_mcu_h;
1670 
1671 // definition of jpeg image component
1672    struct
1673    {
1674       int id;
1675       int h,v;
1676       int tq;
1677       int hd,ha;
1678       int dc_pred;
1679 
1680       int x,y,w2,h2;
1681       stbi_uc *data;
1682       void *raw_data, *raw_coeff;
1683       stbi_uc *linebuf;
1684       short   *coeff;   // progressive only
1685       int      coeff_w, coeff_h; // number of 8x8 coefficient blocks
1686    } img_comp[4];
1687 
1688    stbi__uint32   code_buffer; // jpeg entropy-coded buffer
1689    int            code_bits;   // number of valid bits
1690    unsigned char  marker;      // marker seen while filling entropy buffer
1691    int            nomore;      // flag if we saw a marker so must stop
1692 
1693    int            progressive;
1694    int            spec_start;
1695    int            spec_end;
1696    int            succ_high;
1697    int            succ_low;
1698    int            eob_run;
1699    int            jfif;
1700    int            app14_color_transform; // Adobe APP14 tag
1701    int            rgb;
1702 
1703    int scan_n, order[4];
1704    int restart_interval, todo;
1705 
1706 // kernels
1707    void (*idct_block_kernel)(stbi_uc *out, int out_stride, short data[64]);
1708    void (*YCbCr_to_RGB_kernel)(stbi_uc *out, const stbi_uc *y, const stbi_uc *pcb, const stbi_uc *pcr, int count, int step);
1709    stbi_uc *(*resample_row_hv_2_kernel)(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs);
1710 } stbi__jpeg;
1711 
stbi__build_huffman(stbi__huffman * h,int * count)1712 static int stbi__build_huffman(stbi__huffman *h, int *count)
1713 {
1714    int i,j,k=0,code;
1715    // build size list for each symbol (from JPEG spec)
1716    for (i=0; i < 16; ++i)
1717       for (j=0; j < count[i]; ++j)
1718          h->size[k++] = (stbi_uc) (i+1);
1719    h->size[k] = 0;
1720 
1721    // compute actual symbols (from jpeg spec)
1722    code = 0;
1723    k = 0;
1724    for(j=1; j <= 16; ++j) {
1725       // compute delta to add to code to compute symbol id
1726       h->delta[j] = k - code;
1727       if (h->size[k] == j) {
1728          while (h->size[k] == j)
1729             h->code[k++] = (stbi__uint16) (code++);
1730          if (code-1 >= (1 << j)) return stbi__err("bad code lengths","Corrupt JPEG");
1731       }
1732       // compute largest code + 1 for this size, preshifted as needed later
1733       h->maxcode[j] = code << (16-j);
1734       code <<= 1;
1735    }
1736    h->maxcode[j] = 0xffffffff;
1737 
1738    // build non-spec acceleration table; 255 is flag for not-accelerated
1739    memset(h->fast, 255, 1 << FAST_BITS);
1740    for (i=0; i < k; ++i) {
1741       int s = h->size[i];
1742       if (s <= FAST_BITS) {
1743          int c = h->code[i] << (FAST_BITS-s);
1744          int m = 1 << (FAST_BITS-s);
1745          for (j=0; j < m; ++j) {
1746             h->fast[c+j] = (stbi_uc) i;
1747          }
1748       }
1749    }
1750    return 1;
1751 }
1752 
1753 // build a table that decodes both magnitude and value of small ACs in
1754 // one go.
stbi__build_fast_ac(stbi__int16 * fast_ac,stbi__huffman * h)1755 static void stbi__build_fast_ac(stbi__int16 *fast_ac, stbi__huffman *h)
1756 {
1757    int i;
1758    for (i=0; i < (1 << FAST_BITS); ++i) {
1759       stbi_uc fast = h->fast[i];
1760       fast_ac[i] = 0;
1761       if (fast < 255) {
1762          int rs = h->values[fast];
1763          int run = (rs >> 4) & 15;
1764          int magbits = rs & 15;
1765          int len = h->size[fast];
1766 
1767          if (magbits && len + magbits <= FAST_BITS) {
1768             // magnitude code followed by receive_extend code
1769             int k = ((i << len) & ((1 << FAST_BITS) - 1)) >> (FAST_BITS - magbits);
1770             int m = 1 << (magbits - 1);
1771             if (k < m) k += (~0U << magbits) + 1;
1772             // if the result is small enough, we can fit it in fast_ac table
1773             if (k >= -128 && k <= 127)
1774                fast_ac[i] = (stbi__int16) ((k << 8) + (run << 4) + (len + magbits));
1775          }
1776       }
1777    }
1778 }
1779 
stbi__grow_buffer_unsafe(stbi__jpeg * j)1780 static void stbi__grow_buffer_unsafe(stbi__jpeg *j)
1781 {
1782    do {
1783       int b = j->nomore ? 0 : stbi__get8(j->s);
1784       if (b == 0xff) {
1785          int c = stbi__get8(j->s);
1786          while (c == 0xff) c = stbi__get8(j->s); // consume fill bytes
1787          if (c != 0) {
1788             j->marker = (unsigned char) c;
1789             j->nomore = 1;
1790             return;
1791          }
1792       }
1793       j->code_buffer |= b << (24 - j->code_bits);
1794       j->code_bits += 8;
1795    } while (j->code_bits <= 24);
1796 }
1797 
1798 // (1 << n) - 1
1799 static stbi__uint32 stbi__bmask[17]={0,1,3,7,15,31,63,127,255,511,1023,2047,4095,8191,16383,32767,65535};
1800 
1801 // decode a jpeg huffman value from the bitstream
stbi__jpeg_huff_decode(stbi__jpeg * j,stbi__huffman * h)1802 stbi_inline static int stbi__jpeg_huff_decode(stbi__jpeg *j, stbi__huffman *h)
1803 {
1804    unsigned int temp;
1805    int c,k;
1806 
1807    if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
1808 
1809    // look at the top FAST_BITS and determine what symbol ID it is,
1810    // if the code is <= FAST_BITS
1811    c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1);
1812    k = h->fast[c];
1813    if (k < 255) {
1814       int s = h->size[k];
1815       if (s > j->code_bits)
1816          return -1;
1817       j->code_buffer <<= s;
1818       j->code_bits -= s;
1819       return h->values[k];
1820    }
1821 
1822    // naive test is to shift the code_buffer down so k bits are
1823    // valid, then test against maxcode. To speed this up, we've
1824    // preshifted maxcode left so that it has (16-k) 0s at the
1825    // end; in other words, regardless of the number of bits, it
1826    // wants to be compared against something shifted to have 16;
1827    // that way we don't need to shift inside the loop.
1828    temp = j->code_buffer >> 16;
1829    for (k=FAST_BITS+1 ; ; ++k)
1830       if (temp < h->maxcode[k])
1831          break;
1832    if (k == 17) {
1833       // error! code not found
1834       j->code_bits -= 16;
1835       return -1;
1836    }
1837 
1838    if (k > j->code_bits)
1839       return -1;
1840 
1841    // convert the huffman code to the symbol id
1842    c = ((j->code_buffer >> (32 - k)) & stbi__bmask[k]) + h->delta[k];
1843    STBI_ASSERT((((j->code_buffer) >> (32 - h->size[c])) & stbi__bmask[h->size[c]]) == h->code[c]);
1844 
1845    // convert the id to a symbol
1846    j->code_bits -= k;
1847    j->code_buffer <<= k;
1848    return h->values[c];
1849 }
1850 
1851 // bias[n] = (-1<<n) + 1
1852 static int const stbi__jbias[16] = {0,-1,-3,-7,-15,-31,-63,-127,-255,-511,-1023,-2047,-4095,-8191,-16383,-32767};
1853 
1854 // combined JPEG 'receive' and JPEG 'extend', since baseline
1855 // always extends everything it receives.
stbi__extend_receive(stbi__jpeg * j,int n)1856 stbi_inline static int stbi__extend_receive(stbi__jpeg *j, int n)
1857 {
1858    unsigned int k;
1859    int sgn;
1860    if (j->code_bits < n) stbi__grow_buffer_unsafe(j);
1861 
1862    sgn = (stbi__int32)j->code_buffer >> 31; // sign bit is always in MSB
1863    k = stbi_lrot(j->code_buffer, n);
1864    STBI_ASSERT(n >= 0 && n < (int) (sizeof(stbi__bmask)/sizeof(*stbi__bmask)));
1865    j->code_buffer = k & ~stbi__bmask[n];
1866    k &= stbi__bmask[n];
1867    j->code_bits -= n;
1868    return k + (stbi__jbias[n] & ~sgn);
1869 }
1870 
1871 // get some unsigned bits
stbi__jpeg_get_bits(stbi__jpeg * j,int n)1872 stbi_inline static int stbi__jpeg_get_bits(stbi__jpeg *j, int n)
1873 {
1874    unsigned int k;
1875    if (j->code_bits < n) stbi__grow_buffer_unsafe(j);
1876    k = stbi_lrot(j->code_buffer, n);
1877    j->code_buffer = k & ~stbi__bmask[n];
1878    k &= stbi__bmask[n];
1879    j->code_bits -= n;
1880    return k;
1881 }
1882 
stbi__jpeg_get_bit(stbi__jpeg * j)1883 stbi_inline static int stbi__jpeg_get_bit(stbi__jpeg *j)
1884 {
1885    unsigned int k;
1886    if (j->code_bits < 1) stbi__grow_buffer_unsafe(j);
1887    k = j->code_buffer;
1888    j->code_buffer <<= 1;
1889    --j->code_bits;
1890    return k & 0x80000000;
1891 }
1892 
1893 // given a value that's at position X in the zigzag stream,
1894 // where does it appear in the 8x8 matrix coded as row-major?
1895 static stbi_uc stbi__jpeg_dezigzag[64+15] =
1896 {
1897     0,  1,  8, 16,  9,  2,  3, 10,
1898    17, 24, 32, 25, 18, 11,  4,  5,
1899    12, 19, 26, 33, 40, 48, 41, 34,
1900    27, 20, 13,  6,  7, 14, 21, 28,
1901    35, 42, 49, 56, 57, 50, 43, 36,
1902    29, 22, 15, 23, 30, 37, 44, 51,
1903    58, 59, 52, 45, 38, 31, 39, 46,
1904    53, 60, 61, 54, 47, 55, 62, 63,
1905    // let corrupt input sample past end
1906    63, 63, 63, 63, 63, 63, 63, 63,
1907    63, 63, 63, 63, 63, 63, 63
1908 };
1909 
1910 // decode one 64-entry block--
stbi__jpeg_decode_block(stbi__jpeg * j,short data[64],stbi__huffman * hdc,stbi__huffman * hac,stbi__int16 * fac,int b,stbi__uint16 * dequant)1911 static int stbi__jpeg_decode_block(stbi__jpeg *j, short data[64], stbi__huffman *hdc, stbi__huffman *hac, stbi__int16 *fac, int b, stbi__uint16 *dequant)
1912 {
1913    int diff,dc,k;
1914    int t;
1915 
1916    if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
1917    t = stbi__jpeg_huff_decode(j, hdc);
1918    if (t < 0) return stbi__err("bad huffman code","Corrupt JPEG");
1919 
1920    // 0 all the ac values now so we can do it 32-bits at a time
1921    memset(data,0,64*sizeof(data[0]));
1922 
1923    diff = t ? stbi__extend_receive(j, t) : 0;
1924    dc = j->img_comp[b].dc_pred + diff;
1925    j->img_comp[b].dc_pred = dc;
1926    data[0] = (short) (dc * dequant[0]);
1927 
1928    // decode AC components, see JPEG spec
1929    k = 1;
1930    do {
1931       unsigned int zig;
1932       int c,r,s;
1933       if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
1934       c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1);
1935       r = fac[c];
1936       if (r) { // fast-AC path
1937          k += (r >> 4) & 15; // run
1938          s = r & 15; // combined length
1939          j->code_buffer <<= s;
1940          j->code_bits -= s;
1941          // decode into unzigzag'd location
1942          zig = stbi__jpeg_dezigzag[k++];
1943          data[zig] = (short) ((r >> 8) * dequant[zig]);
1944       } else {
1945          int rs = stbi__jpeg_huff_decode(j, hac);
1946          if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG");
1947          s = rs & 15;
1948          r = rs >> 4;
1949          if (s == 0) {
1950             if (rs != 0xf0) break; // end block
1951             k += 16;
1952          } else {
1953             k += r;
1954             // decode into unzigzag'd location
1955             zig = stbi__jpeg_dezigzag[k++];
1956             data[zig] = (short) (stbi__extend_receive(j,s) * dequant[zig]);
1957          }
1958       }
1959    } while (k < 64);
1960    return 1;
1961 }
1962 
stbi__jpeg_decode_block_prog_dc(stbi__jpeg * j,short data[64],stbi__huffman * hdc,int b)1963 static int stbi__jpeg_decode_block_prog_dc(stbi__jpeg *j, short data[64], stbi__huffman *hdc, int b)
1964 {
1965    int diff,dc;
1966    int t;
1967    if (j->spec_end != 0) return stbi__err("can't merge dc and ac", "Corrupt JPEG");
1968 
1969    if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
1970 
1971    if (j->succ_high == 0) {
1972       // first scan for DC coefficient, must be first
1973       memset(data,0,64*sizeof(data[0])); // 0 all the ac values now
1974       t = stbi__jpeg_huff_decode(j, hdc);
1975       diff = t ? stbi__extend_receive(j, t) : 0;
1976 
1977       dc = j->img_comp[b].dc_pred + diff;
1978       j->img_comp[b].dc_pred = dc;
1979       data[0] = (short) (dc << j->succ_low);
1980    } else {
1981       // refinement scan for DC coefficient
1982       if (stbi__jpeg_get_bit(j))
1983          data[0] += (short) (1 << j->succ_low);
1984    }
1985    return 1;
1986 }
1987 
1988 // @OPTIMIZE: store non-zigzagged during the decode passes,
1989 // and only de-zigzag when dequantizing
stbi__jpeg_decode_block_prog_ac(stbi__jpeg * j,short data[64],stbi__huffman * hac,stbi__int16 * fac)1990 static int stbi__jpeg_decode_block_prog_ac(stbi__jpeg *j, short data[64], stbi__huffman *hac, stbi__int16 *fac)
1991 {
1992    int k;
1993    if (j->spec_start == 0) return stbi__err("can't merge dc and ac", "Corrupt JPEG");
1994 
1995    if (j->succ_high == 0) {
1996       int shift = j->succ_low;
1997 
1998       if (j->eob_run) {
1999          --j->eob_run;
2000          return 1;
2001       }
2002 
2003       k = j->spec_start;
2004       do {
2005          unsigned int zig;
2006          int c,r,s;
2007          if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
2008          c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1);
2009          r = fac[c];
2010          if (r) { // fast-AC path
2011             k += (r >> 4) & 15; // run
2012             s = r & 15; // combined length
2013             j->code_buffer <<= s;
2014             j->code_bits -= s;
2015             zig = stbi__jpeg_dezigzag[k++];
2016             data[zig] = (short) ((r >> 8) << shift);
2017          } else {
2018             int rs = stbi__jpeg_huff_decode(j, hac);
2019             if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG");
2020             s = rs & 15;
2021             r = rs >> 4;
2022             if (s == 0) {
2023                if (r < 15) {
2024                   j->eob_run = (1 << r);
2025                   if (r)
2026                      j->eob_run += stbi__jpeg_get_bits(j, r);
2027                   --j->eob_run;
2028                   break;
2029                }
2030                k += 16;
2031             } else {
2032                k += r;
2033                zig = stbi__jpeg_dezigzag[k++];
2034                data[zig] = (short) (stbi__extend_receive(j,s) << shift);
2035             }
2036          }
2037       } while (k <= j->spec_end);
2038    } else {
2039       // refinement scan for these AC coefficients
2040 
2041       short bit = (short) (1 << j->succ_low);
2042 
2043       if (j->eob_run) {
2044          --j->eob_run;
2045          for (k = j->spec_start; k <= j->spec_end; ++k) {
2046             short *p = &data[stbi__jpeg_dezigzag[k]];
2047             if (*p != 0)
2048                if (stbi__jpeg_get_bit(j))
2049                   if ((*p & bit)==0) {
2050                      if (*p > 0)
2051                         *p += bit;
2052                      else
2053                         *p -= bit;
2054                   }
2055          }
2056       } else {
2057          k = j->spec_start;
2058          do {
2059             int r,s;
2060             int rs = stbi__jpeg_huff_decode(j, hac); // @OPTIMIZE see if we can use the fast path here, advance-by-r is so slow, eh
2061             if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG");
2062             s = rs & 15;
2063             r = rs >> 4;
2064             if (s == 0) {
2065                if (r < 15) {
2066                   j->eob_run = (1 << r) - 1;
2067                   if (r)
2068                      j->eob_run += stbi__jpeg_get_bits(j, r);
2069                   r = 64; // force end of block
2070                } else {
2071                   // r=15 s=0 should write 16 0s, so we just do
2072                   // a run of 15 0s and then write s (which is 0),
2073                   // so we don't have to do anything special here
2074                }
2075             } else {
2076                if (s != 1) return stbi__err("bad huffman code", "Corrupt JPEG");
2077                // sign bit
2078                if (stbi__jpeg_get_bit(j))
2079                   s = bit;
2080                else
2081                   s = -bit;
2082             }
2083 
2084             // advance by r
2085             while (k <= j->spec_end) {
2086                short *p = &data[stbi__jpeg_dezigzag[k++]];
2087                if (*p != 0) {
2088                   if (stbi__jpeg_get_bit(j))
2089                      if ((*p & bit)==0) {
2090                         if (*p > 0)
2091                            *p += bit;
2092                         else
2093                            *p -= bit;
2094                      }
2095                } else {
2096                   if (r == 0) {
2097                      *p = (short) s;
2098                      break;
2099                   }
2100                   --r;
2101                }
2102             }
2103          } while (k <= j->spec_end);
2104       }
2105    }
2106    return 1;
2107 }
2108 
2109 // take a -128..127 value and stbi__clamp it and convert to 0..255
stbi__clamp(int x)2110 stbi_inline static stbi_uc stbi__clamp(int x)
2111 {
2112    // trick to use a single test to catch both cases
2113    if ((unsigned int) x > 255) {
2114       if (x < 0) return 0;
2115       if (x > 255) return 255;
2116    }
2117    return (stbi_uc) x;
2118 }
2119 
2120 #define stbi__f2f(x)  ((int) (((x) * 4096 + 0.5)))
2121 #define stbi__fsh(x)  ((x) << 12)
2122 
2123 // derived from jidctint -- DCT_ISLOW
2124 #define STBI__IDCT_1D(s0,s1,s2,s3,s4,s5,s6,s7) \
2125    int t0,t1,t2,t3,p1,p2,p3,p4,p5,x0,x1,x2,x3; \
2126    p2 = s2;                                    \
2127    p3 = s6;                                    \
2128    p1 = (p2+p3) * stbi__f2f(0.5411961f);       \
2129    t2 = p1 + p3*stbi__f2f(-1.847759065f);      \
2130    t3 = p1 + p2*stbi__f2f( 0.765366865f);      \
2131    p2 = s0;                                    \
2132    p3 = s4;                                    \
2133    t0 = stbi__fsh(p2+p3);                      \
2134    t1 = stbi__fsh(p2-p3);                      \
2135    x0 = t0+t3;                                 \
2136    x3 = t0-t3;                                 \
2137    x1 = t1+t2;                                 \
2138    x2 = t1-t2;                                 \
2139    t0 = s7;                                    \
2140    t1 = s5;                                    \
2141    t2 = s3;                                    \
2142    t3 = s1;                                    \
2143    p3 = t0+t2;                                 \
2144    p4 = t1+t3;                                 \
2145    p1 = t0+t3;                                 \
2146    p2 = t1+t2;                                 \
2147    p5 = (p3+p4)*stbi__f2f( 1.175875602f);      \
2148    t0 = t0*stbi__f2f( 0.298631336f);           \
2149    t1 = t1*stbi__f2f( 2.053119869f);           \
2150    t2 = t2*stbi__f2f( 3.072711026f);           \
2151    t3 = t3*stbi__f2f( 1.501321110f);           \
2152    p1 = p5 + p1*stbi__f2f(-0.899976223f);      \
2153    p2 = p5 + p2*stbi__f2f(-2.562915447f);      \
2154    p3 = p3*stbi__f2f(-1.961570560f);           \
2155    p4 = p4*stbi__f2f(-0.390180644f);           \
2156    t3 += p1+p4;                                \
2157    t2 += p2+p3;                                \
2158    t1 += p2+p4;                                \
2159    t0 += p1+p3;
2160 
stbi__idct_block(stbi_uc * out,int out_stride,short data[64])2161 static void stbi__idct_block(stbi_uc *out, int out_stride, short data[64])
2162 {
2163    int i,val[64],*v=val;
2164    stbi_uc *o;
2165    short *d = data;
2166 
2167    // columns
2168    for (i=0; i < 8; ++i,++d, ++v) {
2169       // if all zeroes, shortcut -- this avoids dequantizing 0s and IDCTing
2170       if (d[ 8]==0 && d[16]==0 && d[24]==0 && d[32]==0
2171            && d[40]==0 && d[48]==0 && d[56]==0) {
2172          //    no shortcut                 0     seconds
2173          //    (1|2|3|4|5|6|7)==0          0     seconds
2174          //    all separate               -0.047 seconds
2175          //    1 && 2|3 && 4|5 && 6|7:    -0.047 seconds
2176          int dcterm = d[0] << 2;
2177          v[0] = v[8] = v[16] = v[24] = v[32] = v[40] = v[48] = v[56] = dcterm;
2178       } else {
2179          STBI__IDCT_1D(d[ 0],d[ 8],d[16],d[24],d[32],d[40],d[48],d[56])
2180          // constants scaled things up by 1<<12; let's bring them back
2181          // down, but keep 2 extra bits of precision
2182          x0 += 512; x1 += 512; x2 += 512; x3 += 512;
2183          v[ 0] = (x0+t3) >> 10;
2184          v[56] = (x0-t3) >> 10;
2185          v[ 8] = (x1+t2) >> 10;
2186          v[48] = (x1-t2) >> 10;
2187          v[16] = (x2+t1) >> 10;
2188          v[40] = (x2-t1) >> 10;
2189          v[24] = (x3+t0) >> 10;
2190          v[32] = (x3-t0) >> 10;
2191       }
2192    }
2193 
2194    for (i=0, v=val, o=out; i < 8; ++i,v+=8,o+=out_stride) {
2195       // no fast case since the first 1D IDCT spread components out
2196       STBI__IDCT_1D(v[0],v[1],v[2],v[3],v[4],v[5],v[6],v[7])
2197       // constants scaled things up by 1<<12, plus we had 1<<2 from first
2198       // loop, plus horizontal and vertical each scale by sqrt(8) so together
2199       // we've got an extra 1<<3, so 1<<17 total we need to remove.
2200       // so we want to round that, which means adding 0.5 * 1<<17,
2201       // aka 65536. Also, we'll end up with -128 to 127 that we want
2202       // to encode as 0..255 by adding 128, so we'll add that before the shift
2203       x0 += 65536 + (128<<17);
2204       x1 += 65536 + (128<<17);
2205       x2 += 65536 + (128<<17);
2206       x3 += 65536 + (128<<17);
2207       // tried computing the shifts into temps, or'ing the temps to see
2208       // if any were out of range, but that was slower
2209       o[0] = stbi__clamp((x0+t3) >> 17);
2210       o[7] = stbi__clamp((x0-t3) >> 17);
2211       o[1] = stbi__clamp((x1+t2) >> 17);
2212       o[6] = stbi__clamp((x1-t2) >> 17);
2213       o[2] = stbi__clamp((x2+t1) >> 17);
2214       o[5] = stbi__clamp((x2-t1) >> 17);
2215       o[3] = stbi__clamp((x3+t0) >> 17);
2216       o[4] = stbi__clamp((x3-t0) >> 17);
2217    }
2218 }
2219 
2220 #ifdef STBI_SSE2
2221 // sse2 integer IDCT. not the fastest possible implementation but it
2222 // produces bit-identical results to the generic C version so it's
2223 // fully "transparent".
stbi__idct_simd(stbi_uc * out,int out_stride,short data[64])2224 static void stbi__idct_simd(stbi_uc *out, int out_stride, short data[64])
2225 {
2226    // This is constructed to match our regular (generic) integer IDCT exactly.
2227    __m128i row0, row1, row2, row3, row4, row5, row6, row7;
2228    __m128i tmp;
2229 
2230    // dot product constant: even elems=x, odd elems=y
2231    #define dct_const(x,y)  _mm_setr_epi16((x),(y),(x),(y),(x),(y),(x),(y))
2232 
2233    // out(0) = c0[even]*x + c0[odd]*y   (c0, x, y 16-bit, out 32-bit)
2234    // out(1) = c1[even]*x + c1[odd]*y
2235    #define dct_rot(out0,out1, x,y,c0,c1) \
2236       __m128i c0##lo = _mm_unpacklo_epi16((x),(y)); \
2237       __m128i c0##hi = _mm_unpackhi_epi16((x),(y)); \
2238       __m128i out0##_l = _mm_madd_epi16(c0##lo, c0); \
2239       __m128i out0##_h = _mm_madd_epi16(c0##hi, c0); \
2240       __m128i out1##_l = _mm_madd_epi16(c0##lo, c1); \
2241       __m128i out1##_h = _mm_madd_epi16(c0##hi, c1)
2242 
2243    // out = in << 12  (in 16-bit, out 32-bit)
2244    #define dct_widen(out, in) \
2245       __m128i out##_l = _mm_srai_epi32(_mm_unpacklo_epi16(_mm_setzero_si128(), (in)), 4); \
2246       __m128i out##_h = _mm_srai_epi32(_mm_unpackhi_epi16(_mm_setzero_si128(), (in)), 4)
2247 
2248    // wide add
2249    #define dct_wadd(out, a, b) \
2250       __m128i out##_l = _mm_add_epi32(a##_l, b##_l); \
2251       __m128i out##_h = _mm_add_epi32(a##_h, b##_h)
2252 
2253    // wide sub
2254    #define dct_wsub(out, a, b) \
2255       __m128i out##_l = _mm_sub_epi32(a##_l, b##_l); \
2256       __m128i out##_h = _mm_sub_epi32(a##_h, b##_h)
2257 
2258    // butterfly a/b, add bias, then shift by "s" and pack
2259    #define dct_bfly32o(out0, out1, a,b,bias,s) \
2260       { \
2261          __m128i abiased_l = _mm_add_epi32(a##_l, bias); \
2262          __m128i abiased_h = _mm_add_epi32(a##_h, bias); \
2263          dct_wadd(sum, abiased, b); \
2264          dct_wsub(dif, abiased, b); \
2265          out0 = _mm_packs_epi32(_mm_srai_epi32(sum_l, s), _mm_srai_epi32(sum_h, s)); \
2266          out1 = _mm_packs_epi32(_mm_srai_epi32(dif_l, s), _mm_srai_epi32(dif_h, s)); \
2267       }
2268 
2269    // 8-bit interleave step (for transposes)
2270    #define dct_interleave8(a, b) \
2271       tmp = a; \
2272       a = _mm_unpacklo_epi8(a, b); \
2273       b = _mm_unpackhi_epi8(tmp, b)
2274 
2275    // 16-bit interleave step (for transposes)
2276    #define dct_interleave16(a, b) \
2277       tmp = a; \
2278       a = _mm_unpacklo_epi16(a, b); \
2279       b = _mm_unpackhi_epi16(tmp, b)
2280 
2281    #define dct_pass(bias,shift) \
2282       { \
2283          /* even part */ \
2284          dct_rot(t2e,t3e, row2,row6, rot0_0,rot0_1); \
2285          __m128i sum04 = _mm_add_epi16(row0, row4); \
2286          __m128i dif04 = _mm_sub_epi16(row0, row4); \
2287          dct_widen(t0e, sum04); \
2288          dct_widen(t1e, dif04); \
2289          dct_wadd(x0, t0e, t3e); \
2290          dct_wsub(x3, t0e, t3e); \
2291          dct_wadd(x1, t1e, t2e); \
2292          dct_wsub(x2, t1e, t2e); \
2293          /* odd part */ \
2294          dct_rot(y0o,y2o, row7,row3, rot2_0,rot2_1); \
2295          dct_rot(y1o,y3o, row5,row1, rot3_0,rot3_1); \
2296          __m128i sum17 = _mm_add_epi16(row1, row7); \
2297          __m128i sum35 = _mm_add_epi16(row3, row5); \
2298          dct_rot(y4o,y5o, sum17,sum35, rot1_0,rot1_1); \
2299          dct_wadd(x4, y0o, y4o); \
2300          dct_wadd(x5, y1o, y5o); \
2301          dct_wadd(x6, y2o, y5o); \
2302          dct_wadd(x7, y3o, y4o); \
2303          dct_bfly32o(row0,row7, x0,x7,bias,shift); \
2304          dct_bfly32o(row1,row6, x1,x6,bias,shift); \
2305          dct_bfly32o(row2,row5, x2,x5,bias,shift); \
2306          dct_bfly32o(row3,row4, x3,x4,bias,shift); \
2307       }
2308 
2309    __m128i rot0_0 = dct_const(stbi__f2f(0.5411961f), stbi__f2f(0.5411961f) + stbi__f2f(-1.847759065f)); // NOLINT
2310    __m128i rot0_1 = dct_const(stbi__f2f(0.5411961f) + stbi__f2f( 0.765366865f), stbi__f2f(0.5411961f)); // NOLINT
2311    __m128i rot1_0 = dct_const(stbi__f2f(1.175875602f) + stbi__f2f(-0.899976223f), stbi__f2f(1.175875602f)); // NOLINT
2312    __m128i rot1_1 = dct_const(stbi__f2f(1.175875602f), stbi__f2f(1.175875602f) + stbi__f2f(-2.562915447f)); // NOLINT
2313    __m128i rot2_0 = dct_const(stbi__f2f(-1.961570560f) + stbi__f2f( 0.298631336f), stbi__f2f(-1.961570560f)); // NOLINT
2314    __m128i rot2_1 = dct_const(stbi__f2f(-1.961570560f), stbi__f2f(-1.961570560f) + stbi__f2f( 3.072711026f)); // NOLINT
2315    __m128i rot3_0 = dct_const(stbi__f2f(-0.390180644f) + stbi__f2f( 2.053119869f), stbi__f2f(-0.390180644f)); // NOLINT
2316    __m128i rot3_1 = dct_const(stbi__f2f(-0.390180644f), stbi__f2f(-0.390180644f) + stbi__f2f( 1.501321110f)); // NOLINT
2317 
2318    // rounding biases in column/row passes, see stbi__idct_block for explanation.
2319    __m128i bias_0 = _mm_set1_epi32(512);
2320    __m128i bias_1 = _mm_set1_epi32(65536 + (128<<17));
2321 
2322    // load
2323    row0 = _mm_load_si128((const __m128i *) (data + 0*8));
2324    row1 = _mm_load_si128((const __m128i *) (data + 1*8));
2325    row2 = _mm_load_si128((const __m128i *) (data + 2*8));
2326    row3 = _mm_load_si128((const __m128i *) (data + 3*8));
2327    row4 = _mm_load_si128((const __m128i *) (data + 4*8));
2328    row5 = _mm_load_si128((const __m128i *) (data + 5*8));
2329    row6 = _mm_load_si128((const __m128i *) (data + 6*8));
2330    row7 = _mm_load_si128((const __m128i *) (data + 7*8));
2331 
2332    // column pass
2333    dct_pass(bias_0, 10);
2334 
2335    {
2336       // 16bit 8x8 transpose pass 1
2337       dct_interleave16(row0, row4);
2338       dct_interleave16(row1, row5);
2339       dct_interleave16(row2, row6);
2340       dct_interleave16(row3, row7);
2341 
2342       // transpose pass 2
2343       dct_interleave16(row0, row2);
2344       dct_interleave16(row1, row3);
2345       dct_interleave16(row4, row6);
2346       dct_interleave16(row5, row7);
2347 
2348       // transpose pass 3
2349       dct_interleave16(row0, row1);
2350       dct_interleave16(row2, row3);
2351       dct_interleave16(row4, row5);
2352       dct_interleave16(row6, row7);
2353    }
2354 
2355    // row pass
2356    dct_pass(bias_1, 17);
2357 
2358    {
2359       // pack
2360       __m128i p0 = _mm_packus_epi16(row0, row1); // a0a1a2a3...a7b0b1b2b3...b7
2361       __m128i p1 = _mm_packus_epi16(row2, row3);
2362       __m128i p2 = _mm_packus_epi16(row4, row5);
2363       __m128i p3 = _mm_packus_epi16(row6, row7);
2364 
2365       // 8bit 8x8 transpose pass 1
2366       dct_interleave8(p0, p2); // a0e0a1e1...
2367       dct_interleave8(p1, p3); // c0g0c1g1...
2368 
2369       // transpose pass 2
2370       dct_interleave8(p0, p1); // a0c0e0g0...
2371       dct_interleave8(p2, p3); // b0d0f0h0...
2372 
2373       // transpose pass 3
2374       dct_interleave8(p0, p2); // a0b0c0d0...
2375       dct_interleave8(p1, p3); // a4b4c4d4...
2376 
2377       // store
2378       _mm_storel_epi64((__m128i *) out, p0); out += out_stride;
2379       _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p0, 0x4e)); out += out_stride;
2380       _mm_storel_epi64((__m128i *) out, p2); out += out_stride;
2381       _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p2, 0x4e)); out += out_stride;
2382       _mm_storel_epi64((__m128i *) out, p1); out += out_stride;
2383       _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p1, 0x4e)); out += out_stride;
2384       _mm_storel_epi64((__m128i *) out, p3); out += out_stride;
2385       _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p3, 0x4e));
2386    }
2387 
2388 #undef dct_const
2389 #undef dct_rot
2390 #undef dct_widen
2391 #undef dct_wadd
2392 #undef dct_wsub
2393 #undef dct_bfly32o
2394 #undef dct_interleave8
2395 #undef dct_interleave16
2396 #undef dct_pass
2397 }
2398 
2399 #endif // STBI_SSE2
2400 
2401 #ifdef STBI_NEON
2402 
2403 // NEON integer IDCT. should produce bit-identical
2404 // results to the generic C version.
stbi__idct_simd(stbi_uc * out,int out_stride,short data[64])2405 static void stbi__idct_simd(stbi_uc *out, int out_stride, short data[64])
2406 {
2407    int16x8_t row0, row1, row2, row3, row4, row5, row6, row7;
2408 
2409    int16x4_t rot0_0 = vdup_n_s16(stbi__f2f(0.5411961f));
2410    int16x4_t rot0_1 = vdup_n_s16(stbi__f2f(-1.847759065f));
2411    int16x4_t rot0_2 = vdup_n_s16(stbi__f2f( 0.765366865f));
2412    int16x4_t rot1_0 = vdup_n_s16(stbi__f2f( 1.175875602f));
2413    int16x4_t rot1_1 = vdup_n_s16(stbi__f2f(-0.899976223f));
2414    int16x4_t rot1_2 = vdup_n_s16(stbi__f2f(-2.562915447f));
2415    int16x4_t rot2_0 = vdup_n_s16(stbi__f2f(-1.961570560f));
2416    int16x4_t rot2_1 = vdup_n_s16(stbi__f2f(-0.390180644f));
2417    int16x4_t rot3_0 = vdup_n_s16(stbi__f2f( 0.298631336f));
2418    int16x4_t rot3_1 = vdup_n_s16(stbi__f2f( 2.053119869f));
2419    int16x4_t rot3_2 = vdup_n_s16(stbi__f2f( 3.072711026f));
2420    int16x4_t rot3_3 = vdup_n_s16(stbi__f2f( 1.501321110f));
2421 
2422 #define dct_long_mul(out, inq, coeff) \
2423    int32x4_t out##_l = vmull_s16(vget_low_s16(inq), coeff); \
2424    int32x4_t out##_h = vmull_s16(vget_high_s16(inq), coeff)
2425 
2426 #define dct_long_mac(out, acc, inq, coeff) \
2427    int32x4_t out##_l = vmlal_s16(acc##_l, vget_low_s16(inq), coeff); \
2428    int32x4_t out##_h = vmlal_s16(acc##_h, vget_high_s16(inq), coeff)
2429 
2430 #define dct_widen(out, inq) \
2431    int32x4_t out##_l = vshll_n_s16(vget_low_s16(inq), 12); \
2432    int32x4_t out##_h = vshll_n_s16(vget_high_s16(inq), 12)
2433 
2434 // wide add
2435 #define dct_wadd(out, a, b) \
2436    int32x4_t out##_l = vaddq_s32(a##_l, b##_l); \
2437    int32x4_t out##_h = vaddq_s32(a##_h, b##_h)
2438 
2439 // wide sub
2440 #define dct_wsub(out, a, b) \
2441    int32x4_t out##_l = vsubq_s32(a##_l, b##_l); \
2442    int32x4_t out##_h = vsubq_s32(a##_h, b##_h)
2443 
2444 // butterfly a/b, then shift using "shiftop" by "s" and pack
2445 #define dct_bfly32o(out0,out1, a,b,shiftop,s) \
2446    { \
2447       dct_wadd(sum, a, b); \
2448       dct_wsub(dif, a, b); \
2449       out0 = vcombine_s16(shiftop(sum_l, s), shiftop(sum_h, s)); \
2450       out1 = vcombine_s16(shiftop(dif_l, s), shiftop(dif_h, s)); \
2451    }
2452 
2453 #define dct_pass(shiftop, shift) \
2454    { \
2455       /* even part */ \
2456       int16x8_t sum26 = vaddq_s16(row2, row6); \
2457       dct_long_mul(p1e, sum26, rot0_0); \
2458       dct_long_mac(t2e, p1e, row6, rot0_1); \
2459       dct_long_mac(t3e, p1e, row2, rot0_2); \
2460       int16x8_t sum04 = vaddq_s16(row0, row4); \
2461       int16x8_t dif04 = vsubq_s16(row0, row4); \
2462       dct_widen(t0e, sum04); \
2463       dct_widen(t1e, dif04); \
2464       dct_wadd(x0, t0e, t3e); \
2465       dct_wsub(x3, t0e, t3e); \
2466       dct_wadd(x1, t1e, t2e); \
2467       dct_wsub(x2, t1e, t2e); \
2468       /* odd part */ \
2469       int16x8_t sum15 = vaddq_s16(row1, row5); \
2470       int16x8_t sum17 = vaddq_s16(row1, row7); \
2471       int16x8_t sum35 = vaddq_s16(row3, row5); \
2472       int16x8_t sum37 = vaddq_s16(row3, row7); \
2473       int16x8_t sumodd = vaddq_s16(sum17, sum35); \
2474       dct_long_mul(p5o, sumodd, rot1_0); \
2475       dct_long_mac(p1o, p5o, sum17, rot1_1); \
2476       dct_long_mac(p2o, p5o, sum35, rot1_2); \
2477       dct_long_mul(p3o, sum37, rot2_0); \
2478       dct_long_mul(p4o, sum15, rot2_1); \
2479       dct_wadd(sump13o, p1o, p3o); \
2480       dct_wadd(sump24o, p2o, p4o); \
2481       dct_wadd(sump23o, p2o, p3o); \
2482       dct_wadd(sump14o, p1o, p4o); \
2483       dct_long_mac(x4, sump13o, row7, rot3_0); \
2484       dct_long_mac(x5, sump24o, row5, rot3_1); \
2485       dct_long_mac(x6, sump23o, row3, rot3_2); \
2486       dct_long_mac(x7, sump14o, row1, rot3_3); \
2487       dct_bfly32o(row0,row7, x0,x7,shiftop,shift); \
2488       dct_bfly32o(row1,row6, x1,x6,shiftop,shift); \
2489       dct_bfly32o(row2,row5, x2,x5,shiftop,shift); \
2490       dct_bfly32o(row3,row4, x3,x4,shiftop,shift); \
2491    }
2492 
2493    // load
2494    row0 = vld1q_s16(data + 0*8);
2495    row1 = vld1q_s16(data + 1*8);
2496    row2 = vld1q_s16(data + 2*8);
2497    row3 = vld1q_s16(data + 3*8);
2498    row4 = vld1q_s16(data + 4*8);
2499    row5 = vld1q_s16(data + 5*8);
2500    row6 = vld1q_s16(data + 6*8);
2501    row7 = vld1q_s16(data + 7*8);
2502 
2503    // add DC bias
2504    row0 = vaddq_s16(row0, vsetq_lane_s16(1024, vdupq_n_s16(0), 0));
2505 
2506    // column pass
2507    dct_pass(vrshrn_n_s32, 10);
2508 
2509    // 16bit 8x8 transpose
2510    {
2511 // these three map to a single VTRN.16, VTRN.32, and VSWP, respectively.
2512 // whether compilers actually get this is another story, sadly.
2513 #define dct_trn16(x, y) { int16x8x2_t t = vtrnq_s16(x, y); x = t.val[0]; y = t.val[1]; }
2514 #define dct_trn32(x, y) { int32x4x2_t t = vtrnq_s32(vreinterpretq_s32_s16(x), vreinterpretq_s32_s16(y)); x = vreinterpretq_s16_s32(t.val[0]); y = vreinterpretq_s16_s32(t.val[1]); }
2515 #define dct_trn64(x, y) { int16x8_t x0 = x; int16x8_t y0 = y; x = vcombine_s16(vget_low_s16(x0), vget_low_s16(y0)); y = vcombine_s16(vget_high_s16(x0), vget_high_s16(y0)); }
2516 
2517       // pass 1
2518       dct_trn16(row0, row1); // a0b0a2b2a4b4a6b6
2519       dct_trn16(row2, row3);
2520       dct_trn16(row4, row5);
2521       dct_trn16(row6, row7);
2522 
2523       // pass 2
2524       dct_trn32(row0, row2); // a0b0c0d0a4b4c4d4
2525       dct_trn32(row1, row3);
2526       dct_trn32(row4, row6);
2527       dct_trn32(row5, row7);
2528 
2529       // pass 3
2530       dct_trn64(row0, row4); // a0b0c0d0e0f0g0h0
2531       dct_trn64(row1, row5);
2532       dct_trn64(row2, row6);
2533       dct_trn64(row3, row7);
2534 
2535 #undef dct_trn16
2536 #undef dct_trn32
2537 #undef dct_trn64
2538    }
2539 
2540    // row pass
2541    // vrshrn_n_s32 only supports shifts up to 16, we need
2542    // 17. so do a non-rounding shift of 16 first then follow
2543    // up with a rounding shift by 1.
2544    dct_pass(vshrn_n_s32, 16);
2545 
2546    {
2547       // pack and round
2548       uint8x8_t p0 = vqrshrun_n_s16(row0, 1);
2549       uint8x8_t p1 = vqrshrun_n_s16(row1, 1);
2550       uint8x8_t p2 = vqrshrun_n_s16(row2, 1);
2551       uint8x8_t p3 = vqrshrun_n_s16(row3, 1);
2552       uint8x8_t p4 = vqrshrun_n_s16(row4, 1);
2553       uint8x8_t p5 = vqrshrun_n_s16(row5, 1);
2554       uint8x8_t p6 = vqrshrun_n_s16(row6, 1);
2555       uint8x8_t p7 = vqrshrun_n_s16(row7, 1);
2556 
2557       // again, these can translate into one instruction, but often don't.
2558 #define dct_trn8_8(x, y) { uint8x8x2_t t = vtrn_u8(x, y); x = t.val[0]; y = t.val[1]; }
2559 #define dct_trn8_16(x, y) { uint16x4x2_t t = vtrn_u16(vreinterpret_u16_u8(x), vreinterpret_u16_u8(y)); x = vreinterpret_u8_u16(t.val[0]); y = vreinterpret_u8_u16(t.val[1]); }
2560 #define dct_trn8_32(x, y) { uint32x2x2_t t = vtrn_u32(vreinterpret_u32_u8(x), vreinterpret_u32_u8(y)); x = vreinterpret_u8_u32(t.val[0]); y = vreinterpret_u8_u32(t.val[1]); }
2561 
2562       // sadly can't use interleaved stores here since we only write
2563       // 8 bytes to each scan line!
2564 
2565       // 8x8 8-bit transpose pass 1
2566       dct_trn8_8(p0, p1);
2567       dct_trn8_8(p2, p3);
2568       dct_trn8_8(p4, p5);
2569       dct_trn8_8(p6, p7);
2570 
2571       // pass 2
2572       dct_trn8_16(p0, p2);
2573       dct_trn8_16(p1, p3);
2574       dct_trn8_16(p4, p6);
2575       dct_trn8_16(p5, p7);
2576 
2577       // pass 3
2578       dct_trn8_32(p0, p4);
2579       dct_trn8_32(p1, p5);
2580       dct_trn8_32(p2, p6);
2581       dct_trn8_32(p3, p7);
2582 
2583       // store
2584       vst1_u8(out, p0); out += out_stride;
2585       vst1_u8(out, p1); out += out_stride;
2586       vst1_u8(out, p2); out += out_stride;
2587       vst1_u8(out, p3); out += out_stride;
2588       vst1_u8(out, p4); out += out_stride;
2589       vst1_u8(out, p5); out += out_stride;
2590       vst1_u8(out, p6); out += out_stride;
2591       vst1_u8(out, p7);
2592 
2593 #undef dct_trn8_8
2594 #undef dct_trn8_16
2595 #undef dct_trn8_32
2596    }
2597 
2598 #undef dct_long_mul
2599 #undef dct_long_mac
2600 #undef dct_widen
2601 #undef dct_wadd
2602 #undef dct_wsub
2603 #undef dct_bfly32o
2604 #undef dct_pass
2605 }
2606 
2607 #endif // STBI_NEON
2608 
2609 #define STBI__MARKER_none  0xff
2610 // if there's a pending marker from the entropy stream, return that
2611 // otherwise, fetch from the stream and get a marker. if there's no
2612 // marker, return 0xff, which is never a valid marker value
stbi__get_marker(stbi__jpeg * j)2613 static stbi_uc stbi__get_marker(stbi__jpeg *j)
2614 {
2615    stbi_uc x;
2616    if (j->marker != STBI__MARKER_none) { x = j->marker; j->marker = STBI__MARKER_none; return x; }
2617    x = stbi__get8(j->s);
2618    if (x != 0xff) return STBI__MARKER_none;
2619    while (x == 0xff)
2620       x = stbi__get8(j->s); // consume repeated 0xff fill bytes
2621    return x;
2622 }
2623 
2624 // in each scan, we'll have scan_n components, and the order
2625 // of the components is specified by order[]
2626 #define STBI__RESTART(x)     ((x) >= 0xd0 && (x) <= 0xd7)
2627 
2628 // after a restart interval, stbi__jpeg_reset the entropy decoder and
2629 // the dc prediction
stbi__jpeg_reset(stbi__jpeg * j)2630 static void stbi__jpeg_reset(stbi__jpeg *j)
2631 {
2632    j->code_bits = 0;
2633    j->code_buffer = 0;
2634    j->nomore = 0;
2635    j->img_comp[0].dc_pred = j->img_comp[1].dc_pred = j->img_comp[2].dc_pred = j->img_comp[3].dc_pred = 0;
2636    j->marker = STBI__MARKER_none;
2637    j->todo = j->restart_interval ? j->restart_interval : 0x7fffffff;
2638    j->eob_run = 0;
2639    // no more than 1<<31 MCUs if no restart_interal? that's plenty safe,
2640    // since we don't even allow 1<<30 pixels
2641 }
2642 
stbi__parse_entropy_coded_data(stbi__jpeg * z)2643 static int stbi__parse_entropy_coded_data(stbi__jpeg *z)
2644 {
2645    stbi__jpeg_reset(z);
2646    if (!z->progressive) {
2647       if (z->scan_n == 1) {
2648          int i,j;
2649          STBI_SIMD_ALIGN(short, data[64]);
2650          int n = z->order[0];
2651          // non-interleaved data, we just need to process one block at a time,
2652          // in trivial scanline order
2653          // number of blocks to do just depends on how many actual "pixels" this
2654          // component has, independent of interleaved MCU blocking and such
2655          int w = (z->img_comp[n].x+7) >> 3;
2656          int h = (z->img_comp[n].y+7) >> 3;
2657          for (j=0; j < h; ++j) {
2658             for (i=0; i < w; ++i) {
2659                int ha = z->img_comp[n].ha;
2660                if (!stbi__jpeg_decode_block(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+ha, z->fast_ac[ha], n, z->dequant[z->img_comp[n].tq])) return 0;
2661                z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*j*8+i*8, z->img_comp[n].w2, data);
2662                // every data block is an MCU, so countdown the restart interval
2663                if (--z->todo <= 0) {
2664                   if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
2665                   // if it's NOT a restart, then just bail, so we get corrupt data
2666                   // rather than no data
2667                   if (!STBI__RESTART(z->marker)) return 1;
2668                   stbi__jpeg_reset(z);
2669                }
2670             }
2671          }
2672          return 1;
2673       } else { // interleaved
2674          int i,j,k,x,y;
2675          STBI_SIMD_ALIGN(short, data[64]);
2676          for (j=0; j < z->img_mcu_y; ++j) {
2677             for (i=0; i < z->img_mcu_x; ++i) {
2678                // scan an interleaved mcu... process scan_n components in order
2679                for (k=0; k < z->scan_n; ++k) {
2680                   int n = z->order[k];
2681                   // scan out an mcu's worth of this component; that's just determined
2682                   // by the basic H and V specified for the component
2683                   for (y=0; y < z->img_comp[n].v; ++y) {
2684                      for (x=0; x < z->img_comp[n].h; ++x) {
2685                         int x2 = (i*z->img_comp[n].h + x)*8;
2686                         int y2 = (j*z->img_comp[n].v + y)*8;
2687                         int ha = z->img_comp[n].ha;
2688                         if (!stbi__jpeg_decode_block(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+ha, z->fast_ac[ha], n, z->dequant[z->img_comp[n].tq])) return 0;
2689                         z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*y2+x2, z->img_comp[n].w2, data);
2690                      }
2691                   }
2692                }
2693                // after all interleaved components, that's an interleaved MCU,
2694                // so now count down the restart interval
2695                if (--z->todo <= 0) {
2696                   if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
2697                   if (!STBI__RESTART(z->marker)) return 1;
2698                   stbi__jpeg_reset(z);
2699                }
2700             }
2701          }
2702          return 1;
2703       }
2704    } else {
2705       if (z->scan_n == 1) {
2706          int i,j;
2707          int n = z->order[0];
2708          // non-interleaved data, we just need to process one block at a time,
2709          // in trivial scanline order
2710          // number of blocks to do just depends on how many actual "pixels" this
2711          // component has, independent of interleaved MCU blocking and such
2712          int w = (z->img_comp[n].x+7) >> 3;
2713          int h = (z->img_comp[n].y+7) >> 3;
2714          for (j=0; j < h; ++j) {
2715             for (i=0; i < w; ++i) {
2716                short *data = z->img_comp[n].coeff + 64 * (i + j * z->img_comp[n].coeff_w);
2717                if (z->spec_start == 0) {
2718                   if (!stbi__jpeg_decode_block_prog_dc(z, data, &z->huff_dc[z->img_comp[n].hd], n))
2719                      return 0;
2720                } else {
2721                   int ha = z->img_comp[n].ha;
2722                   if (!stbi__jpeg_decode_block_prog_ac(z, data, &z->huff_ac[ha], z->fast_ac[ha]))
2723                      return 0;
2724                }
2725                // every data block is an MCU, so countdown the restart interval
2726                if (--z->todo <= 0) {
2727                   if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
2728                   if (!STBI__RESTART(z->marker)) return 1;
2729                   stbi__jpeg_reset(z);
2730                }
2731             }
2732          }
2733          return 1;
2734       } else { // interleaved
2735          int i,j,k,x,y;
2736          for (j=0; j < z->img_mcu_y; ++j) {
2737             for (i=0; i < z->img_mcu_x; ++i) {
2738                // scan an interleaved mcu... process scan_n components in order
2739                for (k=0; k < z->scan_n; ++k) {
2740                   int n = z->order[k];
2741                   // scan out an mcu's worth of this component; that's just determined
2742                   // by the basic H and V specified for the component
2743                   for (y=0; y < z->img_comp[n].v; ++y) {
2744                      for (x=0; x < z->img_comp[n].h; ++x) {
2745                         int x2 = (i*z->img_comp[n].h + x);
2746                         int y2 = (j*z->img_comp[n].v + y);
2747                         short *data = z->img_comp[n].coeff + 64 * (x2 + y2 * z->img_comp[n].coeff_w);
2748                         if (!stbi__jpeg_decode_block_prog_dc(z, data, &z->huff_dc[z->img_comp[n].hd], n))
2749                            return 0;
2750                      }
2751                   }
2752                }
2753                // after all interleaved components, that's an interleaved MCU,
2754                // so now count down the restart interval
2755                if (--z->todo <= 0) {
2756                   if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
2757                   if (!STBI__RESTART(z->marker)) return 1;
2758                   stbi__jpeg_reset(z);
2759                }
2760             }
2761          }
2762          return 1;
2763       }
2764    }
2765 }
2766 
stbi__jpeg_dequantize(short * data,stbi__uint16 * dequant)2767 static void stbi__jpeg_dequantize(short *data, stbi__uint16 *dequant)
2768 {
2769    int i;
2770    for (i=0; i < 64; ++i)
2771       data[i] *= dequant[i];
2772 }
2773 
stbi__jpeg_finish(stbi__jpeg * z)2774 static void stbi__jpeg_finish(stbi__jpeg *z)
2775 {
2776    if (z->progressive) {
2777       // dequantize and idct the data
2778       int i,j,n;
2779       for (n=0; n < z->s->img_n; ++n) {
2780          int w = (z->img_comp[n].x+7) >> 3;
2781          int h = (z->img_comp[n].y+7) >> 3;
2782          for (j=0; j < h; ++j) {
2783             for (i=0; i < w; ++i) {
2784                short *data = z->img_comp[n].coeff + 64 * (i + j * z->img_comp[n].coeff_w);
2785                stbi__jpeg_dequantize(data, z->dequant[z->img_comp[n].tq]);
2786                z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*j*8+i*8, z->img_comp[n].w2, data);
2787             }
2788          }
2789       }
2790    }
2791 }
2792 
stbi__process_marker(stbi__jpeg * z,int m)2793 static int stbi__process_marker(stbi__jpeg *z, int m)
2794 {
2795    int L;
2796    switch (m) {
2797       case STBI__MARKER_none: // no marker found
2798          return stbi__err("expected marker","Corrupt JPEG");
2799 
2800       case 0xDD: // DRI - specify restart interval
2801          if (stbi__get16be(z->s) != 4) return stbi__err("bad DRI len","Corrupt JPEG");
2802          z->restart_interval = stbi__get16be(z->s);
2803          return 1;
2804 
2805       case 0xDB: // DQT - define quantization table
2806          L = stbi__get16be(z->s)-2;
2807          while (L > 0) {
2808             int q = stbi__get8(z->s);
2809             int p = q >> 4, sixteen = (p != 0);
2810             int t = q & 15,i;
2811             if (p != 0 && p != 1) return stbi__err("bad DQT type","Corrupt JPEG");
2812             if (t > 3) return stbi__err("bad DQT table","Corrupt JPEG");
2813 
2814             for (i=0; i < 64; ++i)
2815                z->dequant[t][stbi__jpeg_dezigzag[i]] = (stbi__uint16)(sixteen ? stbi__get16be(z->s) : stbi__get8(z->s));
2816             L -= (sixteen ? 129 : 65);
2817          }
2818          return L==0;
2819 
2820       case 0xC4: // DHT - define huffman table
2821          L = stbi__get16be(z->s)-2;
2822          while (L > 0) {
2823             stbi_uc *v;
2824             int sizes[16],i,n=0;
2825             int q = stbi__get8(z->s);
2826             int tc = q >> 4;
2827             int th = q & 15;
2828             if (tc > 1 || th > 3) return stbi__err("bad DHT header","Corrupt JPEG");
2829             for (i=0; i < 16; ++i) {
2830                sizes[i] = stbi__get8(z->s);
2831                n += sizes[i];
2832             }
2833             L -= 17;
2834             if (tc == 0) {
2835                if (!stbi__build_huffman(z->huff_dc+th, sizes)) return 0;
2836                v = z->huff_dc[th].values;
2837             } else {
2838                if (!stbi__build_huffman(z->huff_ac+th, sizes)) return 0;
2839                v = z->huff_ac[th].values;
2840             }
2841             for (i=0; i < n; ++i)
2842                v[i] = stbi__get8(z->s);
2843             if (tc != 0)
2844                stbi__build_fast_ac(z->fast_ac[th], z->huff_ac + th);
2845             L -= n;
2846          }
2847          return L==0;
2848    }
2849 
2850    // check for comment block or APP blocks
2851    if ((m >= 0xE0 && m <= 0xEF) || m == 0xFE) {
2852       L = stbi__get16be(z->s);
2853       if (L < 2) {
2854          if (m == 0xFE)
2855             return stbi__err("bad COM len","Corrupt JPEG");
2856          else
2857             return stbi__err("bad APP len","Corrupt JPEG");
2858       }
2859       L -= 2;
2860 
2861       if (m == 0xE0 && L >= 5) { // JFIF APP0 segment
2862          static const unsigned char tag[5] = {'J','F','I','F','\0'};
2863          int ok = 1;
2864          int i;
2865          for (i=0; i < 5; ++i)
2866             if (stbi__get8(z->s) != tag[i])
2867                ok = 0;
2868          L -= 5;
2869          if (ok)
2870             z->jfif = 1;
2871       } else if (m == 0xEE && L >= 12) { // Adobe APP14 segment
2872          static const unsigned char tag[6] = {'A','d','o','b','e','\0'};
2873          int ok = 1;
2874          int i;
2875          for (i=0; i < 6; ++i)
2876             if (stbi__get8(z->s) != tag[i])
2877                ok = 0;
2878          L -= 6;
2879          if (ok) {
2880             stbi__get8(z->s); // version
2881             stbi__get16be(z->s); // flags0
2882             stbi__get16be(z->s); // flags1
2883             z->app14_color_transform = stbi__get8(z->s); // color transform
2884             L -= 6;
2885          }
2886       }
2887 
2888       stbi__skip(z->s, L);
2889       return 1;
2890    }
2891 
2892    return stbi__err("unknown marker","Corrupt JPEG");
2893 }
2894 
2895 // after we see SOS
stbi__process_scan_header(stbi__jpeg * z)2896 static int stbi__process_scan_header(stbi__jpeg *z)
2897 {
2898    int i;
2899    int Ls = stbi__get16be(z->s);
2900    z->scan_n = stbi__get8(z->s);
2901    if (z->scan_n < 1 || z->scan_n > 4 || z->scan_n > (int) z->s->img_n) return stbi__err("bad SOS component count","Corrupt JPEG");
2902    if (Ls != 6+2*z->scan_n) return stbi__err("bad SOS len","Corrupt JPEG");
2903    for (i=0; i < z->scan_n; ++i) {
2904       int id = stbi__get8(z->s), which;
2905       int q = stbi__get8(z->s);
2906       for (which = 0; which < z->s->img_n; ++which)
2907          if (z->img_comp[which].id == id)
2908             break;
2909       if (which == z->s->img_n) return 0; // no match
2910       z->img_comp[which].hd = q >> 4;   if (z->img_comp[which].hd > 3) return stbi__err("bad DC huff","Corrupt JPEG");
2911       z->img_comp[which].ha = q & 15;   if (z->img_comp[which].ha > 3) return stbi__err("bad AC huff","Corrupt JPEG");
2912       z->order[i] = which;
2913    }
2914 
2915    {
2916       int aa;
2917       z->spec_start = stbi__get8(z->s);
2918       z->spec_end   = stbi__get8(z->s); // should be 63, but might be 0
2919       aa = stbi__get8(z->s);
2920       z->succ_high = (aa >> 4);
2921       z->succ_low  = (aa & 15);
2922       if (z->progressive) {
2923          if (z->spec_start > 63 || z->spec_end > 63  || z->spec_start > z->spec_end || z->succ_high > 13 || z->succ_low > 13)
2924             return stbi__err("bad SOS", "Corrupt JPEG");
2925       } else {
2926          if (z->spec_start != 0) return stbi__err("bad SOS","Corrupt JPEG");
2927          if (z->succ_high != 0 || z->succ_low != 0) return stbi__err("bad SOS","Corrupt JPEG");
2928          z->spec_end = 63;
2929       }
2930    }
2931 
2932    return 1;
2933 }
2934 
stbi__free_jpeg_components(stbi__jpeg * z,int ncomp,int why)2935 static int stbi__free_jpeg_components(stbi__jpeg *z, int ncomp, int why)
2936 {
2937    int i;
2938    for (i=0; i < ncomp; ++i) {
2939       if (z->img_comp[i].raw_data) {
2940          STBI_FREE(z->img_comp[i].raw_data);
2941          z->img_comp[i].raw_data = NULL;
2942          z->img_comp[i].data = NULL;
2943       }
2944       if (z->img_comp[i].raw_coeff) {
2945          STBI_FREE(z->img_comp[i].raw_coeff);
2946          z->img_comp[i].raw_coeff = 0;
2947          z->img_comp[i].coeff = 0;
2948       }
2949       if (z->img_comp[i].linebuf) {
2950          STBI_FREE(z->img_comp[i].linebuf);
2951          z->img_comp[i].linebuf = NULL;
2952       }
2953    }
2954    return why;
2955 }
2956 
stbi__process_frame_header(stbi__jpeg * z,int scan)2957 static int stbi__process_frame_header(stbi__jpeg *z, int scan)
2958 {
2959    stbi__context *s = z->s;
2960    int Lf,p,i,q, h_max=1,v_max=1,c;
2961    Lf = stbi__get16be(s);         if (Lf < 11) return stbi__err("bad SOF len","Corrupt JPEG"); // JPEG
2962    p  = stbi__get8(s);            if (p != 8) return stbi__err("only 8-bit","JPEG format not supported: 8-bit only"); // JPEG baseline
2963    s->img_y = stbi__get16be(s);   if (s->img_y == 0) return stbi__err("no header height", "JPEG format not supported: delayed height"); // Legal, but we don't handle it--but neither does IJG
2964    s->img_x = stbi__get16be(s);   if (s->img_x == 0) return stbi__err("0 width","Corrupt JPEG"); // JPEG requires
2965    c = stbi__get8(s);
2966    if (c != 3 && c != 1 && c != 4) return stbi__err("bad component count","Corrupt JPEG");
2967    s->img_n = c;
2968    for (i=0; i < c; ++i) {
2969       z->img_comp[i].data = NULL;
2970       z->img_comp[i].linebuf = NULL;
2971    }
2972 
2973    if (Lf != 8+3*s->img_n) return stbi__err("bad SOF len","Corrupt JPEG");
2974 
2975    z->rgb = 0;
2976    for (i=0; i < s->img_n; ++i) {
2977       static unsigned char rgb[3] = { 'R', 'G', 'B' };
2978       z->img_comp[i].id = stbi__get8(s);
2979       if (s->img_n == 3 && z->img_comp[i].id == rgb[i])
2980          ++z->rgb;
2981       q = stbi__get8(s);
2982       z->img_comp[i].h = (q >> 4);  if (!z->img_comp[i].h || z->img_comp[i].h > 4) return stbi__err("bad H","Corrupt JPEG");
2983       z->img_comp[i].v = q & 15;    if (!z->img_comp[i].v || z->img_comp[i].v > 4) return stbi__err("bad V","Corrupt JPEG");
2984       z->img_comp[i].tq = stbi__get8(s);  if (z->img_comp[i].tq > 3) return stbi__err("bad TQ","Corrupt JPEG");
2985    }
2986 
2987    if (scan != STBI__SCAN_load) return 1;
2988 
2989    if (!stbi__mad3sizes_valid(s->img_x, s->img_y, s->img_n, 0)) return stbi__err("too large", "Image too large to decode");
2990 
2991    for (i=0; i < s->img_n; ++i) {
2992       if (z->img_comp[i].h > h_max) h_max = z->img_comp[i].h;
2993       if (z->img_comp[i].v > v_max) v_max = z->img_comp[i].v;
2994    }
2995 
2996    // compute interleaved mcu info
2997    z->img_h_max = h_max;
2998    z->img_v_max = v_max;
2999    z->img_mcu_w = h_max * 8;
3000    z->img_mcu_h = v_max * 8;
3001    // these sizes can't be more than 17 bits
3002    z->img_mcu_x = (s->img_x + z->img_mcu_w-1) / z->img_mcu_w;
3003    z->img_mcu_y = (s->img_y + z->img_mcu_h-1) / z->img_mcu_h;
3004 
3005    for (i=0; i < s->img_n; ++i) {
3006       // number of effective pixels (e.g. for non-interleaved MCU)
3007       z->img_comp[i].x = (s->img_x * z->img_comp[i].h + h_max-1) / h_max;
3008       z->img_comp[i].y = (s->img_y * z->img_comp[i].v + v_max-1) / v_max;
3009       // to simplify generation, we'll allocate enough memory to decode
3010       // the bogus oversized data from using interleaved MCUs and their
3011       // big blocks (e.g. a 16x16 iMCU on an image of width 33); we won't
3012       // discard the extra data until colorspace conversion
3013       //
3014       // img_mcu_x, img_mcu_y: <=17 bits; comp[i].h and .v are <=4 (checked earlier)
3015       // so these muls can't overflow with 32-bit ints (which we require)
3016       z->img_comp[i].w2 = z->img_mcu_x * z->img_comp[i].h * 8;
3017       z->img_comp[i].h2 = z->img_mcu_y * z->img_comp[i].v * 8;
3018       z->img_comp[i].coeff = 0;
3019       z->img_comp[i].raw_coeff = 0;
3020       z->img_comp[i].linebuf = NULL;
3021       z->img_comp[i].raw_data = stbi__malloc_mad2(z->img_comp[i].w2, z->img_comp[i].h2, 15);
3022       if (z->img_comp[i].raw_data == NULL)
3023          return stbi__free_jpeg_components(z, i+1, stbi__err("outofmem", "Out of memory"));
3024       // align blocks for idct using mmx/sse
3025       z->img_comp[i].data = (stbi_uc*) (((size_t) z->img_comp[i].raw_data + 15) & ~15);
3026       if (z->progressive) {
3027          // w2, h2 are multiples of 8 (see above)
3028          z->img_comp[i].coeff_w = z->img_comp[i].w2 / 8;
3029          z->img_comp[i].coeff_h = z->img_comp[i].h2 / 8;
3030          z->img_comp[i].raw_coeff = stbi__malloc_mad3(z->img_comp[i].w2, z->img_comp[i].h2, sizeof(short), 15);
3031          if (z->img_comp[i].raw_coeff == NULL)
3032             return stbi__free_jpeg_components(z, i+1, stbi__err("outofmem", "Out of memory"));
3033          z->img_comp[i].coeff = (short*) (((size_t) z->img_comp[i].raw_coeff + 15) & ~15);
3034       }
3035    }
3036 
3037    return 1;
3038 }
3039 
3040 // use comparisons since in some cases we handle more than one case (e.g. SOF)
3041 #define stbi__DNL(x)         ((x) == 0xdc)
3042 #define stbi__SOI(x)         ((x) == 0xd8)
3043 #define stbi__EOI(x)         ((x) == 0xd9)
3044 #define stbi__SOF(x)         ((x) == 0xc0 || (x) == 0xc1 || (x) == 0xc2)
3045 #define stbi__SOS(x)         ((x) == 0xda)
3046 
3047 #define stbi__SOF_progressive(x)   ((x) == 0xc2)
3048 
stbi__decode_jpeg_header(stbi__jpeg * z,int scan)3049 static int stbi__decode_jpeg_header(stbi__jpeg *z, int scan)
3050 {
3051    int m;
3052    z->jfif = 0;
3053    z->app14_color_transform = -1; // valid values are 0,1,2
3054    z->marker = STBI__MARKER_none; // initialize cached marker to empty
3055    m = stbi__get_marker(z);
3056    if (!stbi__SOI(m)) return stbi__err("no SOI","Corrupt JPEG");
3057    if (scan == STBI__SCAN_type) return 1;
3058    m = stbi__get_marker(z);
3059    while (!stbi__SOF(m)) {
3060       if (!stbi__process_marker(z,m)) return 0;
3061       m = stbi__get_marker(z);
3062       while (m == STBI__MARKER_none) {
3063          // some files have extra padding after their blocks, so ok, we'll scan
3064          if (stbi__at_eof(z->s)) return stbi__err("no SOF", "Corrupt JPEG");
3065          m = stbi__get_marker(z);
3066       }
3067    }
3068    z->progressive = stbi__SOF_progressive(m);
3069    if (!stbi__process_frame_header(z, scan)) return 0;
3070    return 1;
3071 }
3072 
3073 // decode image to YCbCr format
stbi__decode_jpeg_image(stbi__jpeg * j)3074 static int stbi__decode_jpeg_image(stbi__jpeg *j)
3075 {
3076    int m;
3077    for (m = 0; m < 4; m++) {
3078       j->img_comp[m].raw_data = NULL;
3079       j->img_comp[m].raw_coeff = NULL;
3080    }
3081    j->restart_interval = 0;
3082    if (!stbi__decode_jpeg_header(j, STBI__SCAN_load)) return 0;
3083    m = stbi__get_marker(j);
3084    while (!stbi__EOI(m)) {
3085       if (stbi__SOS(m)) {
3086          if (!stbi__process_scan_header(j)) return 0;
3087          if (!stbi__parse_entropy_coded_data(j)) return 0;
3088          if (j->marker == STBI__MARKER_none ) {
3089             // handle 0s at the end of image data from IP Kamera 9060
3090             while (!stbi__at_eof(j->s)) {
3091                int x = stbi__get8(j->s);
3092                if (x == 255) {
3093                   j->marker = stbi__get8(j->s);
3094                   break;
3095                }
3096             }
3097             // if we reach eof without hitting a marker, stbi__get_marker() below will fail and we'll eventually return 0
3098          }
3099       } else if (stbi__DNL(m)) {
3100          int Ld = stbi__get16be(j->s);
3101          stbi__uint32 NL = stbi__get16be(j->s);
3102          if (Ld != 4) stbi__err("bad DNL len", "Corrupt JPEG");
3103          if (NL != j->s->img_y) stbi__err("bad DNL height", "Corrupt JPEG");
3104       } else {
3105          if (!stbi__process_marker(j, m)) return 0;
3106       }
3107       m = stbi__get_marker(j);
3108    }
3109    if (j->progressive)
3110       stbi__jpeg_finish(j);
3111    return 1;
3112 }
3113 
3114 // static jfif-centered resampling (across block boundaries)
3115 
3116 typedef stbi_uc *(*resample_row_func)(stbi_uc *out, stbi_uc *in0, stbi_uc *in1,
3117                                     int w, int hs);
3118 
3119 #define stbi__div4(x) ((stbi_uc) ((x) >> 2))
3120 
resample_row_1(stbi_uc * out,stbi_uc * in_near,stbi_uc * in_far,int w,int hs)3121 static stbi_uc *resample_row_1(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
3122 {
3123    STBI_NOTUSED(out);
3124    STBI_NOTUSED(in_far);
3125    STBI_NOTUSED(w);
3126    STBI_NOTUSED(hs);
3127    return in_near;
3128 }
3129 
stbi__resample_row_v_2(stbi_uc * out,stbi_uc * in_near,stbi_uc * in_far,int w,int hs)3130 static stbi_uc* stbi__resample_row_v_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
3131 {
3132    // need to generate two samples vertically for every one in input
3133    int i;
3134    STBI_NOTUSED(hs);
3135    for (i=0; i < w; ++i)
3136       out[i] = stbi__div4(3*in_near[i] + in_far[i] + 2);
3137    return out;
3138 }
3139 
stbi__resample_row_h_2(stbi_uc * out,stbi_uc * in_near,stbi_uc * in_far,int w,int hs)3140 static stbi_uc*  stbi__resample_row_h_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
3141 {
3142    // need to generate two samples horizontally for every one in input
3143    int i;
3144    stbi_uc *input = in_near;
3145 
3146    if (w == 1) {
3147       // if only one sample, can't do any interpolation
3148       out[0] = out[1] = input[0];
3149       return out;
3150    }
3151 
3152    out[0] = input[0];
3153    out[1] = stbi__div4(input[0]*3 + input[1] + 2);
3154    for (i=1; i < w-1; ++i) {
3155       int n = 3*input[i]+2;
3156       out[i*2+0] = stbi__div4(n+input[i-1]);
3157       out[i*2+1] = stbi__div4(n+input[i+1]);
3158    }
3159    out[i*2+0] = stbi__div4(input[w-2]*3 + input[w-1] + 2);
3160    out[i*2+1] = input[w-1];
3161 
3162    STBI_NOTUSED(in_far);
3163    STBI_NOTUSED(hs);
3164 
3165    return out;
3166 }
3167 
3168 #define stbi__div16(x) ((stbi_uc) ((x) >> 4))
3169 
stbi__resample_row_hv_2(stbi_uc * out,stbi_uc * in_near,stbi_uc * in_far,int w,int hs)3170 static stbi_uc *stbi__resample_row_hv_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
3171 {
3172    // need to generate 2x2 samples for every one in input
3173    int i,t0,t1;
3174    if (w == 1) {
3175       out[0] = out[1] = stbi__div4(3*in_near[0] + in_far[0] + 2);
3176       return out;
3177    }
3178 
3179    t1 = 3*in_near[0] + in_far[0];
3180    out[0] = stbi__div4(t1+2);
3181    for (i=1; i < w; ++i) {
3182       t0 = t1;
3183       t1 = 3*in_near[i]+in_far[i];
3184       out[i*2-1] = stbi__div16(3*t0 + t1 + 8);
3185       out[i*2  ] = stbi__div16(3*t1 + t0 + 8);
3186    }
3187    out[w*2-1] = stbi__div4(t1+2);
3188 
3189    STBI_NOTUSED(hs);
3190 
3191    return out;
3192 }
3193 
3194 #if defined(STBI_SSE2) || defined(STBI_NEON)
stbi__resample_row_hv_2_simd(stbi_uc * out,stbi_uc * in_near,stbi_uc * in_far,int w,int hs)3195 static stbi_uc *stbi__resample_row_hv_2_simd(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
3196 {
3197    // need to generate 2x2 samples for every one in input
3198    int i=0,t0,t1;
3199 
3200    if (w == 1) {
3201       out[0] = out[1] = stbi__div4(3*in_near[0] + in_far[0] + 2);
3202       return out;
3203    }
3204 
3205    t1 = 3*in_near[0] + in_far[0];
3206    // process groups of 8 pixels for as long as we can.
3207    // note we can't handle the last pixel in a row in this loop
3208    // because we need to handle the filter boundary conditions.
3209    for (; i < ((w-1) & ~7); i += 8) {
3210 #if defined(STBI_SSE2)
3211       // load and perform the vertical filtering pass
3212       // this uses 3*x + y = 4*x + (y - x)
3213       __m128i zero  = _mm_setzero_si128();
3214       __m128i farb  = _mm_loadl_epi64((__m128i *) (in_far + i));
3215       __m128i nearb = _mm_loadl_epi64((__m128i *) (in_near + i));
3216       __m128i farw  = _mm_unpacklo_epi8(farb, zero);
3217       __m128i nearw = _mm_unpacklo_epi8(nearb, zero);
3218       __m128i diff  = _mm_sub_epi16(farw, nearw);
3219       __m128i nears = _mm_slli_epi16(nearw, 2);
3220       __m128i curr  = _mm_add_epi16(nears, diff); // current row
3221 
3222       // horizontal filter works the same based on shifted vers of current
3223       // row. "prev" is current row shifted right by 1 pixel; we need to
3224       // insert the previous pixel value (from t1).
3225       // "next" is current row shifted left by 1 pixel, with first pixel
3226       // of next block of 8 pixels added in.
3227       __m128i prv0 = _mm_slli_si128(curr, 2);
3228       __m128i nxt0 = _mm_srli_si128(curr, 2);
3229       __m128i prev = _mm_insert_epi16(prv0, t1, 0);
3230       __m128i next = _mm_insert_epi16(nxt0, 3*in_near[i+8] + in_far[i+8], 7);
3231 
3232       // horizontal filter, polyphase implementation since it's convenient:
3233       // even pixels = 3*cur + prev = cur*4 + (prev - cur)
3234       // odd  pixels = 3*cur + next = cur*4 + (next - cur)
3235       // note the shared term.
3236       __m128i bias  = _mm_set1_epi16(8);
3237       __m128i curs = _mm_slli_epi16(curr, 2);
3238       __m128i prvd = _mm_sub_epi16(prev, curr);
3239       __m128i nxtd = _mm_sub_epi16(next, curr);
3240       __m128i curb = _mm_add_epi16(curs, bias);
3241       __m128i even = _mm_add_epi16(prvd, curb);
3242       __m128i odd  = _mm_add_epi16(nxtd, curb);
3243 
3244       // interleave even and odd pixels, then undo scaling.
3245       __m128i int0 = _mm_unpacklo_epi16(even, odd);
3246       __m128i int1 = _mm_unpackhi_epi16(even, odd);
3247       __m128i de0  = _mm_srli_epi16(int0, 4);
3248       __m128i de1  = _mm_srli_epi16(int1, 4);
3249 
3250       // pack and write output
3251       __m128i outv = _mm_packus_epi16(de0, de1);
3252       _mm_storeu_si128((__m128i *) (out + i*2), outv);
3253 #elif defined(STBI_NEON)
3254       // load and perform the vertical filtering pass
3255       // this uses 3*x + y = 4*x + (y - x)
3256       uint8x8_t farb  = vld1_u8(in_far + i);
3257       uint8x8_t nearb = vld1_u8(in_near + i);
3258       int16x8_t diff  = vreinterpretq_s16_u16(vsubl_u8(farb, nearb));
3259       int16x8_t nears = vreinterpretq_s16_u16(vshll_n_u8(nearb, 2));
3260       int16x8_t curr  = vaddq_s16(nears, diff); // current row
3261 
3262       // horizontal filter works the same based on shifted vers of current
3263       // row. "prev" is current row shifted right by 1 pixel; we need to
3264       // insert the previous pixel value (from t1).
3265       // "next" is current row shifted left by 1 pixel, with first pixel
3266       // of next block of 8 pixels added in.
3267       int16x8_t prv0 = vextq_s16(curr, curr, 7);
3268       int16x8_t nxt0 = vextq_s16(curr, curr, 1);
3269       int16x8_t prev = vsetq_lane_s16(t1, prv0, 0);
3270       int16x8_t next = vsetq_lane_s16(3*in_near[i+8] + in_far[i+8], nxt0, 7);
3271 
3272       // horizontal filter, polyphase implementation since it's convenient:
3273       // even pixels = 3*cur + prev = cur*4 + (prev - cur)
3274       // odd  pixels = 3*cur + next = cur*4 + (next - cur)
3275       // note the shared term.
3276       int16x8_t curs = vshlq_n_s16(curr, 2);
3277       int16x8_t prvd = vsubq_s16(prev, curr);
3278       int16x8_t nxtd = vsubq_s16(next, curr);
3279       int16x8_t even = vaddq_s16(curs, prvd);
3280       int16x8_t odd  = vaddq_s16(curs, nxtd);
3281 
3282       // undo scaling and round, then store with even/odd phases interleaved
3283       uint8x8x2_t o;
3284       o.val[0] = vqrshrun_n_s16(even, 4);
3285       o.val[1] = vqrshrun_n_s16(odd,  4);
3286       vst2_u8(out + i*2, o);
3287 #endif
3288 
3289       // "previous" value for next iter
3290       t1 = 3*in_near[i+7] + in_far[i+7];
3291    }
3292 
3293    t0 = t1;
3294    t1 = 3*in_near[i] + in_far[i];
3295    out[i*2] = stbi__div16(3*t1 + t0 + 8);
3296 
3297    for (++i; i < w; ++i) {
3298       t0 = t1;
3299       t1 = 3*in_near[i]+in_far[i];
3300       out[i*2-1] = stbi__div16(3*t0 + t1 + 8);
3301       out[i*2  ] = stbi__div16(3*t1 + t0 + 8);
3302    }
3303    out[w*2-1] = stbi__div4(t1+2);
3304 
3305    STBI_NOTUSED(hs);
3306 
3307    return out;
3308 }
3309 #endif
3310 
stbi__resample_row_generic(stbi_uc * out,stbi_uc * in_near,stbi_uc * in_far,int w,int hs)3311 static stbi_uc *stbi__resample_row_generic(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
3312 {
3313    // resample with nearest-neighbor
3314    int i,j;
3315    STBI_NOTUSED(in_far);
3316    for (i=0; i < w; ++i)
3317       for (j=0; j < hs; ++j)
3318          out[i*hs+j] = in_near[i];
3319    return out;
3320 }
3321 
3322 // this is a reduced-precision calculation of YCbCr-to-RGB introduced
3323 // to make sure the code produces the same results in both SIMD and scalar
3324 #define stbi__float2fixed(x)  (((int) ((x) * 4096.0f + 0.5f)) << 8)
stbi__YCbCr_to_RGB_row(stbi_uc * out,const stbi_uc * y,const stbi_uc * pcb,const stbi_uc * pcr,int count,int step)3325 static void stbi__YCbCr_to_RGB_row(stbi_uc *out, const stbi_uc *y, const stbi_uc *pcb, const stbi_uc *pcr, int count, int step)
3326 {
3327    int i;
3328    for (i=0; i < count; ++i) {
3329       int y_fixed = (y[i] << 20) + (1<<19); // rounding
3330       int r,g,b;
3331       int cr = pcr[i] - 128;
3332       int cb = pcb[i] - 128;
3333       r = y_fixed +  cr* stbi__float2fixed(1.40200f);
3334       g = y_fixed + (cr*-stbi__float2fixed(0.71414f)) + ((cb*-stbi__float2fixed(0.34414f)) & 0xffff0000);
3335       b = y_fixed                                     +   cb* stbi__float2fixed(1.77200f);
3336       r >>= 20;
3337       g >>= 20;
3338       b >>= 20;
3339       if ((unsigned) r > 255) { if (r < 0) r = 0; else r = 255; }
3340       if ((unsigned) g > 255) { if (g < 0) g = 0; else g = 255; }
3341       if ((unsigned) b > 255) { if (b < 0) b = 0; else b = 255; }
3342       out[0] = (stbi_uc)r;
3343       out[1] = (stbi_uc)g;
3344       out[2] = (stbi_uc)b;
3345       out[3] = 255;
3346       out += step;
3347    }
3348 }
3349 
3350 #if defined(STBI_SSE2) || defined(STBI_NEON)
stbi__YCbCr_to_RGB_simd(stbi_uc * out,stbi_uc const * y,stbi_uc const * pcb,stbi_uc const * pcr,int count,int step)3351 static void stbi__YCbCr_to_RGB_simd(stbi_uc *out, stbi_uc const *y, stbi_uc const *pcb, stbi_uc const *pcr, int count, int step)
3352 {
3353    int i = 0;
3354 
3355 #ifdef STBI_SSE2
3356    // step == 3 is pretty ugly on the final interleave, and i'm not convinced
3357    // it's useful in practice (you wouldn't use it for textures, for example).
3358    // so just accelerate step == 4 case.
3359    if (step == 4) {
3360       // this is a fairly straightforward implementation and not super-optimized.
3361       __m128i signflip  = _mm_set1_epi8(-0x80);
3362       __m128i cr_const0 = _mm_set1_epi16(   (short) ( 1.40200f*4096.0f+0.5f)); // NOLINT
3363       __m128i cr_const1 = _mm_set1_epi16( - (short) ( 0.71414f*4096.0f+0.5f)); // NOLINT
3364       __m128i cb_const0 = _mm_set1_epi16( - (short) ( 0.34414f*4096.0f+0.5f)); // NOLINT
3365       __m128i cb_const1 = _mm_set1_epi16(   (short) ( 1.77200f*4096.0f+0.5f)); // NOLINT
3366       __m128i y_bias = _mm_set1_epi8((char) (unsigned char) 128);
3367       __m128i xw = _mm_set1_epi16(255); // alpha channel
3368 
3369       for (; i+7 < count; i += 8) {
3370          // load
3371          __m128i y_bytes = _mm_loadl_epi64((__m128i *) (y+i));
3372          __m128i cr_bytes = _mm_loadl_epi64((__m128i *) (pcr+i));
3373          __m128i cb_bytes = _mm_loadl_epi64((__m128i *) (pcb+i));
3374          __m128i cr_biased = _mm_xor_si128(cr_bytes, signflip); // -128
3375          __m128i cb_biased = _mm_xor_si128(cb_bytes, signflip); // -128
3376 
3377          // unpack to short (and left-shift cr, cb by 8)
3378          __m128i yw  = _mm_unpacklo_epi8(y_bias, y_bytes);
3379          __m128i crw = _mm_unpacklo_epi8(_mm_setzero_si128(), cr_biased);
3380          __m128i cbw = _mm_unpacklo_epi8(_mm_setzero_si128(), cb_biased);
3381 
3382          // color transform
3383          __m128i yws = _mm_srli_epi16(yw, 4);
3384          __m128i cr0 = _mm_mulhi_epi16(cr_const0, crw);
3385          __m128i cb0 = _mm_mulhi_epi16(cb_const0, cbw);
3386          __m128i cb1 = _mm_mulhi_epi16(cbw, cb_const1);
3387          __m128i cr1 = _mm_mulhi_epi16(crw, cr_const1);
3388          __m128i rws = _mm_add_epi16(cr0, yws);
3389          __m128i gwt = _mm_add_epi16(cb0, yws);
3390          __m128i bws = _mm_add_epi16(yws, cb1);
3391          __m128i gws = _mm_add_epi16(gwt, cr1);
3392 
3393          // descale
3394          __m128i rw = _mm_srai_epi16(rws, 4);
3395          __m128i bw = _mm_srai_epi16(bws, 4);
3396          __m128i gw = _mm_srai_epi16(gws, 4);
3397 
3398          // back to byte, set up for transpose
3399          __m128i brb = _mm_packus_epi16(rw, bw);
3400          __m128i gxb = _mm_packus_epi16(gw, xw);
3401 
3402          // transpose to interleave channels
3403          __m128i t0 = _mm_unpacklo_epi8(brb, gxb);
3404          __m128i t1 = _mm_unpackhi_epi8(brb, gxb);
3405          __m128i o0 = _mm_unpacklo_epi16(t0, t1);
3406          __m128i o1 = _mm_unpackhi_epi16(t0, t1);
3407 
3408          // store
3409          _mm_storeu_si128((__m128i *) (out + 0), o0);
3410          _mm_storeu_si128((__m128i *) (out + 16), o1);
3411          out += 32;
3412       }
3413    }
3414 #endif
3415 
3416 #ifdef STBI_NEON
3417    // in this version, step=3 support would be easy to add. but is there demand?
3418    if (step == 4) {
3419       // this is a fairly straightforward implementation and not super-optimized.
3420       uint8x8_t signflip = vdup_n_u8(0x80);
3421       int16x8_t cr_const0 = vdupq_n_s16(   (short) ( 1.40200f*4096.0f+0.5f));
3422       int16x8_t cr_const1 = vdupq_n_s16( - (short) ( 0.71414f*4096.0f+0.5f));
3423       int16x8_t cb_const0 = vdupq_n_s16( - (short) ( 0.34414f*4096.0f+0.5f));
3424       int16x8_t cb_const1 = vdupq_n_s16(   (short) ( 1.77200f*4096.0f+0.5f));
3425 
3426       for (; i+7 < count; i += 8) {
3427          // load
3428          uint8x8_t y_bytes  = vld1_u8(y + i);
3429          uint8x8_t cr_bytes = vld1_u8(pcr + i);
3430          uint8x8_t cb_bytes = vld1_u8(pcb + i);
3431          int8x8_t cr_biased = vreinterpret_s8_u8(vsub_u8(cr_bytes, signflip));
3432          int8x8_t cb_biased = vreinterpret_s8_u8(vsub_u8(cb_bytes, signflip));
3433 
3434          // expand to s16
3435          int16x8_t yws = vreinterpretq_s16_u16(vshll_n_u8(y_bytes, 4));
3436          int16x8_t crw = vshll_n_s8(cr_biased, 7);
3437          int16x8_t cbw = vshll_n_s8(cb_biased, 7);
3438 
3439          // color transform
3440          int16x8_t cr0 = vqdmulhq_s16(crw, cr_const0);
3441          int16x8_t cb0 = vqdmulhq_s16(cbw, cb_const0);
3442          int16x8_t cr1 = vqdmulhq_s16(crw, cr_const1);
3443          int16x8_t cb1 = vqdmulhq_s16(cbw, cb_const1);
3444          int16x8_t rws = vaddq_s16(yws, cr0);
3445          int16x8_t gws = vaddq_s16(vaddq_s16(yws, cb0), cr1);
3446          int16x8_t bws = vaddq_s16(yws, cb1);
3447 
3448          // undo scaling, round, convert to byte
3449          uint8x8x4_t o;
3450          o.val[0] = vqrshrun_n_s16(rws, 4);
3451          o.val[1] = vqrshrun_n_s16(gws, 4);
3452          o.val[2] = vqrshrun_n_s16(bws, 4);
3453          o.val[3] = vdup_n_u8(255);
3454 
3455          // store, interleaving r/g/b/a
3456          vst4_u8(out, o);
3457          out += 8*4;
3458       }
3459    }
3460 #endif
3461 
3462    for (; i < count; ++i) {
3463       int y_fixed = (y[i] << 20) + (1<<19); // rounding
3464       int r,g,b;
3465       int cr = pcr[i] - 128;
3466       int cb = pcb[i] - 128;
3467       r = y_fixed + cr* stbi__float2fixed(1.40200f);
3468       g = y_fixed + cr*-stbi__float2fixed(0.71414f) + ((cb*-stbi__float2fixed(0.34414f)) & 0xffff0000);
3469       b = y_fixed                                   +   cb* stbi__float2fixed(1.77200f);
3470       r >>= 20;
3471       g >>= 20;
3472       b >>= 20;
3473       if ((unsigned) r > 255) { if (r < 0) r = 0; else r = 255; }
3474       if ((unsigned) g > 255) { if (g < 0) g = 0; else g = 255; }
3475       if ((unsigned) b > 255) { if (b < 0) b = 0; else b = 255; }
3476       out[0] = (stbi_uc)r;
3477       out[1] = (stbi_uc)g;
3478       out[2] = (stbi_uc)b;
3479       out[3] = 255;
3480       out += step;
3481    }
3482 }
3483 #endif
3484 
3485 // set up the kernels
stbi__setup_jpeg(stbi__jpeg * j)3486 static void stbi__setup_jpeg(stbi__jpeg *j)
3487 {
3488    j->idct_block_kernel = stbi__idct_block;
3489    j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_row;
3490    j->resample_row_hv_2_kernel = stbi__resample_row_hv_2;
3491 
3492 #ifdef STBI_SSE2
3493    if (stbi__sse2_available()) {
3494       j->idct_block_kernel = stbi__idct_simd;
3495       j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_simd;
3496       j->resample_row_hv_2_kernel = stbi__resample_row_hv_2_simd;
3497    }
3498 #endif
3499 
3500 #ifdef STBI_NEON
3501    j->idct_block_kernel = stbi__idct_simd;
3502    j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_simd;
3503    j->resample_row_hv_2_kernel = stbi__resample_row_hv_2_simd;
3504 #endif
3505 }
3506 
3507 // clean up the temporary component buffers
stbi__cleanup_jpeg(stbi__jpeg * j)3508 static void stbi__cleanup_jpeg(stbi__jpeg *j)
3509 {
3510    stbi__free_jpeg_components(j, j->s->img_n, 0);
3511 }
3512 
3513 typedef struct
3514 {
3515    resample_row_func resample;
3516    stbi_uc *line0,*line1;
3517    int hs,vs;   // expansion factor in each axis
3518    int w_lores; // horizontal pixels pre-expansion
3519    int ystep;   // how far through vertical expansion we are
3520    int ypos;    // which pre-expansion row we're on
3521 } stbi__resample;
3522 
3523 // fast 0..255 * 0..255 => 0..255 rounded multiplication
stbi__blinn_8x8(stbi_uc x,stbi_uc y)3524 static stbi_uc stbi__blinn_8x8(stbi_uc x, stbi_uc y)
3525 {
3526    unsigned int t = x*y + 128;
3527    return (stbi_uc) ((t + (t >>8)) >> 8);
3528 }
3529 
load_jpeg_image(stbi__jpeg * z,int * out_x,int * out_y,int * comp,int req_comp)3530 static stbi_uc *load_jpeg_image(stbi__jpeg *z, int *out_x, int *out_y, int *comp, int req_comp)
3531 {
3532    int n, decode_n, is_rgb;
3533    z->s->img_n = 0; // make stbi__cleanup_jpeg safe
3534 
3535    // validate req_comp
3536    if (req_comp < 0 || req_comp > 4) return stbi__errpuc("bad req_comp", "Internal error");
3537 
3538    // load a jpeg image from whichever source, but leave in YCbCr format
3539    if (!stbi__decode_jpeg_image(z)) { stbi__cleanup_jpeg(z); return NULL; }
3540 
3541    // determine actual number of components to generate
3542    n = req_comp ? req_comp : z->s->img_n >= 3 ? 3 : 1;
3543 
3544    is_rgb = z->s->img_n == 3 && (z->rgb == 3 || (z->app14_color_transform == 0 && !z->jfif));
3545 
3546    if (z->s->img_n == 3 && n < 3 && !is_rgb)
3547       decode_n = 1;
3548    else
3549       decode_n = z->s->img_n;
3550 
3551    // resample and color-convert
3552    {
3553       int k;
3554       unsigned int i,j;
3555       stbi_uc *output;
3556       stbi_uc *coutput[4];
3557 
3558       stbi__resample res_comp[4];
3559 
3560       for (k=0; k < decode_n; ++k) {
3561          stbi__resample *r = &res_comp[k];
3562 
3563          // allocate line buffer big enough for upsampling off the edges
3564          // with upsample factor of 4
3565          z->img_comp[k].linebuf = (stbi_uc *) stbi__malloc(z->s->img_x + 3);
3566          if (!z->img_comp[k].linebuf) { stbi__cleanup_jpeg(z); return stbi__errpuc("outofmem", "Out of memory"); }
3567 
3568          r->hs      = z->img_h_max / z->img_comp[k].h;
3569          r->vs      = z->img_v_max / z->img_comp[k].v;
3570          r->ystep   = r->vs >> 1;
3571          r->w_lores = (z->s->img_x + r->hs-1) / r->hs;
3572          r->ypos    = 0;
3573          r->line0   = r->line1 = z->img_comp[k].data;
3574 
3575          if      (r->hs == 1 && r->vs == 1) r->resample = resample_row_1;
3576          else if (r->hs == 1 && r->vs == 2) r->resample = stbi__resample_row_v_2;
3577          else if (r->hs == 2 && r->vs == 1) r->resample = stbi__resample_row_h_2;
3578          else if (r->hs == 2 && r->vs == 2) r->resample = z->resample_row_hv_2_kernel;
3579          else                               r->resample = stbi__resample_row_generic;
3580       }
3581 
3582       // can't error after this so, this is safe
3583       output = (stbi_uc *) stbi__malloc_mad3(n, z->s->img_x, z->s->img_y, 1);
3584       if (!output) { stbi__cleanup_jpeg(z); return stbi__errpuc("outofmem", "Out of memory"); }
3585 
3586       // now go ahead and resample
3587       for (j=0; j < z->s->img_y; ++j) {
3588          stbi_uc *out = output + n * z->s->img_x * j;
3589          for (k=0; k < decode_n; ++k) {
3590             stbi__resample *r = &res_comp[k];
3591             int y_bot = r->ystep >= (r->vs >> 1);
3592             coutput[k] = r->resample(z->img_comp[k].linebuf,
3593                                      y_bot ? r->line1 : r->line0,
3594                                      y_bot ? r->line0 : r->line1,
3595                                      r->w_lores, r->hs);
3596             if (++r->ystep >= r->vs) {
3597                r->ystep = 0;
3598                r->line0 = r->line1;
3599                if (++r->ypos < z->img_comp[k].y)
3600                   r->line1 += z->img_comp[k].w2;
3601             }
3602          }
3603          if (n >= 3) {
3604             stbi_uc *y = coutput[0];
3605             if (z->s->img_n == 3) {
3606                if (is_rgb) {
3607                   for (i=0; i < z->s->img_x; ++i) {
3608                      out[0] = y[i];
3609                      out[1] = coutput[1][i];
3610                      out[2] = coutput[2][i];
3611                      out[3] = 255;
3612                      out += n;
3613                   }
3614                } else {
3615                   z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n);
3616                }
3617             } else if (z->s->img_n == 4) {
3618                if (z->app14_color_transform == 0) { // CMYK
3619                   for (i=0; i < z->s->img_x; ++i) {
3620                      stbi_uc m = coutput[3][i];
3621                      out[0] = stbi__blinn_8x8(coutput[0][i], m);
3622                      out[1] = stbi__blinn_8x8(coutput[1][i], m);
3623                      out[2] = stbi__blinn_8x8(coutput[2][i], m);
3624                      out[3] = 255;
3625                      out += n;
3626                   }
3627                } else if (z->app14_color_transform == 2) { // YCCK
3628                   z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n);
3629                   for (i=0; i < z->s->img_x; ++i) {
3630                      stbi_uc m = coutput[3][i];
3631                      out[0] = stbi__blinn_8x8(255 - out[0], m);
3632                      out[1] = stbi__blinn_8x8(255 - out[1], m);
3633                      out[2] = stbi__blinn_8x8(255 - out[2], m);
3634                      out += n;
3635                   }
3636                } else { // YCbCr + alpha?  Ignore the fourth channel for now
3637                   z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n);
3638                }
3639             } else
3640                for (i=0; i < z->s->img_x; ++i) {
3641                   out[0] = out[1] = out[2] = y[i];
3642                   out[3] = 255; // not used if n==3
3643                   out += n;
3644                }
3645          } else {
3646             if (is_rgb) {
3647                if (n == 1)
3648                   for (i=0; i < z->s->img_x; ++i)
3649                      *out++ = stbi__compute_y(coutput[0][i], coutput[1][i], coutput[2][i]);
3650                else {
3651                   for (i=0; i < z->s->img_x; ++i, out += 2) {
3652                      out[0] = stbi__compute_y(coutput[0][i], coutput[1][i], coutput[2][i]);
3653                      out[1] = 255;
3654                   }
3655                }
3656             } else if (z->s->img_n == 4 && z->app14_color_transform == 0) {
3657                for (i=0; i < z->s->img_x; ++i) {
3658                   stbi_uc m = coutput[3][i];
3659                   stbi_uc r = stbi__blinn_8x8(coutput[0][i], m);
3660                   stbi_uc g = stbi__blinn_8x8(coutput[1][i], m);
3661                   stbi_uc b = stbi__blinn_8x8(coutput[2][i], m);
3662                   out[0] = stbi__compute_y(r, g, b);
3663                   out[1] = 255;
3664                   out += n;
3665                }
3666             } else if (z->s->img_n == 4 && z->app14_color_transform == 2) {
3667                for (i=0; i < z->s->img_x; ++i) {
3668                   out[0] = stbi__blinn_8x8(255 - coutput[0][i], coutput[3][i]);
3669                   out[1] = 255;
3670                   out += n;
3671                }
3672             } else {
3673                stbi_uc *y = coutput[0];
3674                if (n == 1)
3675                   for (i=0; i < z->s->img_x; ++i) out[i] = y[i];
3676                else
3677                   for (i=0; i < z->s->img_x; ++i) *out++ = y[i], *out++ = 255;
3678             }
3679          }
3680       }
3681       stbi__cleanup_jpeg(z);
3682       *out_x = z->s->img_x;
3683       *out_y = z->s->img_y;
3684       if (comp) *comp = z->s->img_n >= 3 ? 3 : 1; // report original components, not output
3685       return output;
3686    }
3687 }
3688 
stbi__jpeg_load(stbi__context * s,int * x,int * y,int * comp,int req_comp,stbi__result_info * ri)3689 static void *stbi__jpeg_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
3690 {
3691    unsigned char* result;
3692    stbi__jpeg* j = (stbi__jpeg*) stbi__malloc(sizeof(stbi__jpeg));
3693    STBI_NOTUSED(ri);
3694    j->s = s;
3695    stbi__setup_jpeg(j);
3696    result = load_jpeg_image(j, x,y,comp,req_comp);
3697    STBI_FREE(j);
3698    return result;
3699 }
3700 
stbi__jpeg_test(stbi__context * s)3701 static int stbi__jpeg_test(stbi__context *s)
3702 {
3703    int r;
3704    stbi__jpeg* j = (stbi__jpeg*)stbi__malloc(sizeof(stbi__jpeg));
3705    j->s = s;
3706    stbi__setup_jpeg(j);
3707    r = stbi__decode_jpeg_header(j, STBI__SCAN_type);
3708    stbi__rewind(s);
3709    STBI_FREE(j);
3710    return r;
3711 }
3712 
stbi__jpeg_info_raw(stbi__jpeg * j,int * x,int * y,int * comp)3713 static int stbi__jpeg_info_raw(stbi__jpeg *j, int *x, int *y, int *comp)
3714 {
3715    if (!stbi__decode_jpeg_header(j, STBI__SCAN_header)) {
3716       stbi__rewind( j->s );
3717       return 0;
3718    }
3719    if (x) *x = j->s->img_x;
3720    if (y) *y = j->s->img_y;
3721    if (comp) *comp = j->s->img_n >= 3 ? 3 : 1;
3722    return 1;
3723 }
3724 
stbi__jpeg_info(stbi__context * s,int * x,int * y,int * comp)3725 static int stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp)
3726 {
3727    int result;
3728    stbi__jpeg* j = (stbi__jpeg*) (stbi__malloc(sizeof(stbi__jpeg)));
3729    j->s = s;
3730    result = stbi__jpeg_info_raw(j, x, y, comp);
3731    STBI_FREE(j);
3732    return result;
3733 }
3734 #endif
3735 
3736 // public domain zlib decode    v0.2  Sean Barrett 2006-11-18
3737 //    simple implementation
3738 //      - all input must be provided in an upfront buffer
3739 //      - all output is written to a single output buffer (can malloc/realloc)
3740 //    performance
3741 //      - fast huffman
3742 
3743 #ifndef STBI_NO_ZLIB
3744 
3745 // fast-way is faster to check than jpeg huffman, but slow way is slower
3746 #define STBI__ZFAST_BITS  9 // accelerate all cases in default tables
3747 #define STBI__ZFAST_MASK  ((1 << STBI__ZFAST_BITS) - 1)
3748 
3749 // zlib-style huffman encoding
3750 // (jpegs packs from left, zlib from right, so can't share code)
3751 typedef struct
3752 {
3753    stbi__uint16 fast[1 << STBI__ZFAST_BITS];
3754    stbi__uint16 firstcode[16];
3755    int maxcode[17];
3756    stbi__uint16 firstsymbol[16];
3757    stbi_uc  size[288];
3758    stbi__uint16 value[288];
3759 } stbi__zhuffman;
3760 
stbi__bitreverse16(int n)3761 stbi_inline static int stbi__bitreverse16(int n)
3762 {
3763   n = ((n & 0xAAAA) >>  1) | ((n & 0x5555) << 1);
3764   n = ((n & 0xCCCC) >>  2) | ((n & 0x3333) << 2);
3765   n = ((n & 0xF0F0) >>  4) | ((n & 0x0F0F) << 4);
3766   n = ((n & 0xFF00) >>  8) | ((n & 0x00FF) << 8);
3767   return n;
3768 }
3769 
stbi__bit_reverse(int v,int bits)3770 stbi_inline static int stbi__bit_reverse(int v, int bits)
3771 {
3772    STBI_ASSERT(bits <= 16);
3773    // to bit reverse n bits, reverse 16 and shift
3774    // e.g. 11 bits, bit reverse and shift away 5
3775    return stbi__bitreverse16(v) >> (16-bits);
3776 }
3777 
stbi__zbuild_huffman(stbi__zhuffman * z,const stbi_uc * sizelist,int num)3778 static int stbi__zbuild_huffman(stbi__zhuffman *z, const stbi_uc *sizelist, int num)
3779 {
3780    int i,k=0;
3781    int code, next_code[16], sizes[17];
3782 
3783    // DEFLATE spec for generating codes
3784    memset(sizes, 0, sizeof(sizes));
3785    memset(z->fast, 0, sizeof(z->fast));
3786    for (i=0; i < num; ++i)
3787       ++sizes[sizelist[i]];
3788    sizes[0] = 0;
3789    for (i=1; i < 16; ++i)
3790       if (sizes[i] > (1 << i))
3791          return stbi__err("bad sizes", "Corrupt PNG");
3792    code = 0;
3793    for (i=1; i < 16; ++i) {
3794       next_code[i] = code;
3795       z->firstcode[i] = (stbi__uint16) code;
3796       z->firstsymbol[i] = (stbi__uint16) k;
3797       code = (code + sizes[i]);
3798       if (sizes[i])
3799          if (code-1 >= (1 << i)) return stbi__err("bad codelengths","Corrupt PNG");
3800       z->maxcode[i] = code << (16-i); // preshift for inner loop
3801       code <<= 1;
3802       k += sizes[i];
3803    }
3804    z->maxcode[16] = 0x10000; // sentinel
3805    for (i=0; i < num; ++i) {
3806       int s = sizelist[i];
3807       if (s) {
3808          int c = next_code[s] - z->firstcode[s] + z->firstsymbol[s];
3809          stbi__uint16 fastv = (stbi__uint16) ((s << 9) | i);
3810          z->size [c] = (stbi_uc     ) s;
3811          z->value[c] = (stbi__uint16) i;
3812          if (s <= STBI__ZFAST_BITS) {
3813             int j = stbi__bit_reverse(next_code[s],s);
3814             while (j < (1 << STBI__ZFAST_BITS)) {
3815                z->fast[j] = fastv;
3816                j += (1 << s);
3817             }
3818          }
3819          ++next_code[s];
3820       }
3821    }
3822    return 1;
3823 }
3824 
3825 // zlib-from-memory implementation for PNG reading
3826 //    because PNG allows splitting the zlib stream arbitrarily,
3827 //    and it's annoying structurally to have PNG call ZLIB call PNG,
3828 //    we require PNG read all the IDATs and combine them into a single
3829 //    memory buffer
3830 
3831 typedef struct
3832 {
3833    stbi_uc *zbuffer, *zbuffer_end;
3834    int num_bits;
3835    stbi__uint32 code_buffer;
3836 
3837    char *zout;
3838    char *zout_start;
3839    char *zout_end;
3840    int   z_expandable;
3841 
3842    stbi__zhuffman z_length, z_distance;
3843 } stbi__zbuf;
3844 
stbi__zget8(stbi__zbuf * z)3845 stbi_inline static stbi_uc stbi__zget8(stbi__zbuf *z)
3846 {
3847    if (z->zbuffer >= z->zbuffer_end) return 0;
3848    return *z->zbuffer++;
3849 }
3850 
stbi__fill_bits(stbi__zbuf * z)3851 static void stbi__fill_bits(stbi__zbuf *z)
3852 {
3853    do {
3854       STBI_ASSERT(z->code_buffer < (1U << z->num_bits));
3855       z->code_buffer |= (unsigned int) stbi__zget8(z) << z->num_bits;
3856       z->num_bits += 8;
3857    } while (z->num_bits <= 24);
3858 }
3859 
stbi__zreceive(stbi__zbuf * z,int n)3860 stbi_inline static unsigned int stbi__zreceive(stbi__zbuf *z, int n)
3861 {
3862    unsigned int k;
3863    if (z->num_bits < n) stbi__fill_bits(z);
3864    k = z->code_buffer & ((1 << n) - 1);
3865    z->code_buffer >>= n;
3866    z->num_bits -= n;
3867    return k;
3868 }
3869 
stbi__zhuffman_decode_slowpath(stbi__zbuf * a,stbi__zhuffman * z)3870 static int stbi__zhuffman_decode_slowpath(stbi__zbuf *a, stbi__zhuffman *z)
3871 {
3872    int b,s,k;
3873    // not resolved by fast table, so compute it the slow way
3874    // use jpeg approach, which requires MSbits at top
3875    k = stbi__bit_reverse(a->code_buffer, 16);
3876    for (s=STBI__ZFAST_BITS+1; ; ++s)
3877       if (k < z->maxcode[s])
3878          break;
3879    if (s == 16) return -1; // invalid code!
3880    // code size is s, so:
3881    b = (k >> (16-s)) - z->firstcode[s] + z->firstsymbol[s];
3882    STBI_ASSERT(z->size[b] == s);
3883    a->code_buffer >>= s;
3884    a->num_bits -= s;
3885    return z->value[b];
3886 }
3887 
stbi__zhuffman_decode(stbi__zbuf * a,stbi__zhuffman * z)3888 stbi_inline static int stbi__zhuffman_decode(stbi__zbuf *a, stbi__zhuffman *z)
3889 {
3890    int b,s;
3891    if (a->num_bits < 16) stbi__fill_bits(a);
3892    b = z->fast[a->code_buffer & STBI__ZFAST_MASK];
3893    if (b) {
3894       s = b >> 9;
3895       a->code_buffer >>= s;
3896       a->num_bits -= s;
3897       return b & 511;
3898    }
3899    return stbi__zhuffman_decode_slowpath(a, z);
3900 }
3901 
stbi__zexpand(stbi__zbuf * z,char * zout,int n)3902 static int stbi__zexpand(stbi__zbuf *z, char *zout, int n)  // need to make room for n bytes
3903 {
3904    char *q;
3905    int cur, limit, old_limit;
3906    z->zout = zout;
3907    if (!z->z_expandable) return stbi__err("output buffer limit","Corrupt PNG");
3908    cur   = (int) (z->zout     - z->zout_start);
3909    limit = old_limit = (int) (z->zout_end - z->zout_start);
3910    while (cur + n > limit)
3911       limit *= 2;
3912    q = (char *) STBI_REALLOC_SIZED(z->zout_start, old_limit, limit);
3913    STBI_NOTUSED(old_limit);
3914    if (q == NULL) return stbi__err("outofmem", "Out of memory");
3915    z->zout_start = q;
3916    z->zout       = q + cur;
3917    z->zout_end   = q + limit;
3918    return 1;
3919 }
3920 
3921 static int stbi__zlength_base[31] = {
3922    3,4,5,6,7,8,9,10,11,13,
3923    15,17,19,23,27,31,35,43,51,59,
3924    67,83,99,115,131,163,195,227,258,0,0 };
3925 
3926 static int stbi__zlength_extra[31]=
3927 { 0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0,0,0 };
3928 
3929 static int stbi__zdist_base[32] = { 1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193,
3930 257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577,0,0};
3931 
3932 static int stbi__zdist_extra[32] =
3933 { 0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13};
3934 
stbi__parse_huffman_block(stbi__zbuf * a)3935 static int stbi__parse_huffman_block(stbi__zbuf *a)
3936 {
3937    char *zout = a->zout;
3938    for(;;) {
3939       int z = stbi__zhuffman_decode(a, &a->z_length);
3940       if (z < 256) {
3941          if (z < 0) return stbi__err("bad huffman code","Corrupt PNG"); // error in huffman codes
3942          if (zout >= a->zout_end) {
3943             if (!stbi__zexpand(a, zout, 1)) return 0;
3944             zout = a->zout;
3945          }
3946          *zout++ = (char) z;
3947       } else {
3948          stbi_uc *p;
3949          int len,dist;
3950          if (z == 256) {
3951             a->zout = zout;
3952             return 1;
3953          }
3954          z -= 257;
3955          len = stbi__zlength_base[z];
3956          if (stbi__zlength_extra[z]) len += stbi__zreceive(a, stbi__zlength_extra[z]);
3957          z = stbi__zhuffman_decode(a, &a->z_distance);
3958          if (z < 0) return stbi__err("bad huffman code","Corrupt PNG");
3959          dist = stbi__zdist_base[z];
3960          if (stbi__zdist_extra[z]) dist += stbi__zreceive(a, stbi__zdist_extra[z]);
3961          if (zout - a->zout_start < dist) return stbi__err("bad dist","Corrupt PNG");
3962          if (zout + len > a->zout_end) {
3963             if (!stbi__zexpand(a, zout, len)) return 0;
3964             zout = a->zout;
3965          }
3966          p = (stbi_uc *) (zout - dist);
3967          if (dist == 1) { // run of one byte; common in images.
3968             stbi_uc v = *p;
3969             if (len) { do *zout++ = v; while (--len); }
3970          } else {
3971             if (len) { do *zout++ = *p++; while (--len); }
3972          }
3973       }
3974    }
3975 }
3976 
stbi__compute_huffman_codes(stbi__zbuf * a)3977 static int stbi__compute_huffman_codes(stbi__zbuf *a)
3978 {
3979    static stbi_uc length_dezigzag[19] = { 16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15 };
3980    stbi__zhuffman z_codelength;
3981    stbi_uc lencodes[286+32+137];//padding for maximum single op
3982    stbi_uc codelength_sizes[19];
3983    int i,n;
3984 
3985    int hlit  = stbi__zreceive(a,5) + 257;
3986    int hdist = stbi__zreceive(a,5) + 1;
3987    int hclen = stbi__zreceive(a,4) + 4;
3988    int ntot  = hlit + hdist;
3989 
3990    memset(codelength_sizes, 0, sizeof(codelength_sizes));
3991    for (i=0; i < hclen; ++i) {
3992       int s = stbi__zreceive(a,3);
3993       codelength_sizes[length_dezigzag[i]] = (stbi_uc) s;
3994    }
3995    if (!stbi__zbuild_huffman(&z_codelength, codelength_sizes, 19)) return 0;
3996 
3997    n = 0;
3998    while (n < ntot) {
3999       int c = stbi__zhuffman_decode(a, &z_codelength);
4000       if (c < 0 || c >= 19) return stbi__err("bad codelengths", "Corrupt PNG");
4001       if (c < 16)
4002          lencodes[n++] = (stbi_uc) c;
4003       else {
4004          stbi_uc fill = 0;
4005          if (c == 16) {
4006             c = stbi__zreceive(a,2)+3;
4007             if (n == 0) return stbi__err("bad codelengths", "Corrupt PNG");
4008             fill = lencodes[n-1];
4009          } else if (c == 17)
4010             c = stbi__zreceive(a,3)+3;
4011          else {
4012             STBI_ASSERT(c == 18);
4013             c = stbi__zreceive(a,7)+11;
4014          }
4015          if (ntot - n < c) return stbi__err("bad codelengths", "Corrupt PNG");
4016          memset(lencodes+n, fill, c);
4017          n += c;
4018       }
4019    }
4020    if (n != ntot) return stbi__err("bad codelengths","Corrupt PNG");
4021    if (!stbi__zbuild_huffman(&a->z_length, lencodes, hlit)) return 0;
4022    if (!stbi__zbuild_huffman(&a->z_distance, lencodes+hlit, hdist)) return 0;
4023    return 1;
4024 }
4025 
stbi__parse_uncompressed_block(stbi__zbuf * a)4026 static int stbi__parse_uncompressed_block(stbi__zbuf *a)
4027 {
4028    stbi_uc header[4];
4029    int len,nlen,k;
4030    if (a->num_bits & 7)
4031       stbi__zreceive(a, a->num_bits & 7); // discard
4032    // drain the bit-packed data into header
4033    k = 0;
4034    while (a->num_bits > 0) {
4035       header[k++] = (stbi_uc) (a->code_buffer & 255); // suppress MSVC run-time check
4036       a->code_buffer >>= 8;
4037       a->num_bits -= 8;
4038    }
4039    STBI_ASSERT(a->num_bits == 0);
4040    // now fill header the normal way
4041    while (k < 4)
4042       header[k++] = stbi__zget8(a);
4043    len  = header[1] * 256 + header[0];
4044    nlen = header[3] * 256 + header[2];
4045    if (nlen != (len ^ 0xffff)) return stbi__err("zlib corrupt","Corrupt PNG");
4046    if (a->zbuffer + len > a->zbuffer_end) return stbi__err("read past buffer","Corrupt PNG");
4047    if (a->zout + len > a->zout_end)
4048       if (!stbi__zexpand(a, a->zout, len)) return 0;
4049    memcpy(a->zout, a->zbuffer, len);
4050    a->zbuffer += len;
4051    a->zout += len;
4052    return 1;
4053 }
4054 
stbi__parse_zlib_header(stbi__zbuf * a)4055 static int stbi__parse_zlib_header(stbi__zbuf *a)
4056 {
4057    int cmf   = stbi__zget8(a);
4058    int cm    = cmf & 15;
4059    /* int cinfo = cmf >> 4; */
4060    int flg   = stbi__zget8(a);
4061    if ((cmf*256+flg) % 31 != 0) return stbi__err("bad zlib header","Corrupt PNG"); // zlib spec
4062    if (flg & 32) return stbi__err("no preset dict","Corrupt PNG"); // preset dictionary not allowed in png
4063    if (cm != 8) return stbi__err("bad compression","Corrupt PNG"); // DEFLATE required for png
4064    // window = 1 << (8 + cinfo)... but who cares, we fully buffer output
4065    return 1;
4066 }
4067 
4068 static const stbi_uc stbi__zdefault_length[288] =
4069 {
4070    8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
4071    8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
4072    8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
4073    8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
4074    8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
4075    9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
4076    9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
4077    9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
4078    7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,8,8,8,8,8,8,8,8
4079 };
4080 static const stbi_uc stbi__zdefault_distance[32] =
4081 {
4082    5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5
4083 };
4084 /*
4085 Init algorithm:
4086 {
4087    int i;   // use <= to match clearly with spec
4088    for (i=0; i <= 143; ++i)     stbi__zdefault_length[i]   = 8;
4089    for (   ; i <= 255; ++i)     stbi__zdefault_length[i]   = 9;
4090    for (   ; i <= 279; ++i)     stbi__zdefault_length[i]   = 7;
4091    for (   ; i <= 287; ++i)     stbi__zdefault_length[i]   = 8;
4092 
4093    for (i=0; i <=  31; ++i)     stbi__zdefault_distance[i] = 5;
4094 }
4095 */
4096 
stbi__parse_zlib(stbi__zbuf * a,int parse_header)4097 static int stbi__parse_zlib(stbi__zbuf *a, int parse_header)
4098 {
4099    int final, type;
4100    if (parse_header)
4101       if (!stbi__parse_zlib_header(a)) return 0;
4102    a->num_bits = 0;
4103    a->code_buffer = 0;
4104    do {
4105       final = stbi__zreceive(a,1);
4106       type = stbi__zreceive(a,2);
4107       if (type == 0) {
4108          if (!stbi__parse_uncompressed_block(a)) return 0;
4109       } else if (type == 3) {
4110          return 0;
4111       } else {
4112          if (type == 1) {
4113             // use fixed code lengths
4114             if (!stbi__zbuild_huffman(&a->z_length  , stbi__zdefault_length  , 288)) return 0;
4115             if (!stbi__zbuild_huffman(&a->z_distance, stbi__zdefault_distance,  32)) return 0;
4116          } else {
4117             if (!stbi__compute_huffman_codes(a)) return 0;
4118          }
4119          if (!stbi__parse_huffman_block(a)) return 0;
4120       }
4121    } while (!final);
4122    return 1;
4123 }
4124 
stbi__do_zlib(stbi__zbuf * a,char * obuf,int olen,int exp,int parse_header)4125 static int stbi__do_zlib(stbi__zbuf *a, char *obuf, int olen, int exp, int parse_header)
4126 {
4127    a->zout_start = obuf;
4128    a->zout       = obuf;
4129    a->zout_end   = obuf + olen;
4130    a->z_expandable = exp;
4131 
4132    return stbi__parse_zlib(a, parse_header);
4133 }
4134 
stbi_zlib_decode_malloc_guesssize(const char * buffer,int len,int initial_size,int * outlen)4135 STBIDEF char *stbi_zlib_decode_malloc_guesssize(const char *buffer, int len, int initial_size, int *outlen)
4136 {
4137    stbi__zbuf a;
4138    char *p = (char *) stbi__malloc(initial_size);
4139    if (p == NULL) return NULL;
4140    a.zbuffer = (stbi_uc *) buffer;
4141    a.zbuffer_end = (stbi_uc *) buffer + len;
4142    if (stbi__do_zlib(&a, p, initial_size, 1, 1)) {
4143       if (outlen) *outlen = (int) (a.zout - a.zout_start);
4144       return a.zout_start;
4145    } else {
4146       STBI_FREE(a.zout_start);
4147       return NULL;
4148    }
4149 }
4150 
stbi_zlib_decode_malloc(char const * buffer,int len,int * outlen)4151 STBIDEF char *stbi_zlib_decode_malloc(char const *buffer, int len, int *outlen)
4152 {
4153    return stbi_zlib_decode_malloc_guesssize(buffer, len, 16384, outlen);
4154 }
4155 
stbi_zlib_decode_malloc_guesssize_headerflag(const char * buffer,int len,int initial_size,int * outlen,int parse_header)4156 STBIDEF char *stbi_zlib_decode_malloc_guesssize_headerflag(const char *buffer, int len, int initial_size, int *outlen, int parse_header)
4157 {
4158    stbi__zbuf a;
4159    char *p = (char *) stbi__malloc(initial_size);
4160    if (p == NULL) return NULL;
4161    a.zbuffer = (stbi_uc *) buffer;
4162    a.zbuffer_end = (stbi_uc *) buffer + len;
4163    if (stbi__do_zlib(&a, p, initial_size, 1, parse_header)) {
4164       if (outlen) *outlen = (int) (a.zout - a.zout_start);
4165       return a.zout_start;
4166    } else {
4167       STBI_FREE(a.zout_start);
4168       return NULL;
4169    }
4170 }
4171 
stbi_zlib_decode_buffer(char * obuffer,int olen,char const * ibuffer,int ilen)4172 STBIDEF int stbi_zlib_decode_buffer(char *obuffer, int olen, char const *ibuffer, int ilen)
4173 {
4174    stbi__zbuf a;
4175    a.zbuffer = (stbi_uc *) ibuffer;
4176    a.zbuffer_end = (stbi_uc *) ibuffer + ilen;
4177    if (stbi__do_zlib(&a, obuffer, olen, 0, 1))
4178       return (int) (a.zout - a.zout_start);
4179    else
4180       return -1;
4181 }
4182 
stbi_zlib_decode_noheader_malloc(char const * buffer,int len,int * outlen)4183 STBIDEF char *stbi_zlib_decode_noheader_malloc(char const *buffer, int len, int *outlen)
4184 {
4185    stbi__zbuf a;
4186    char *p = (char *) stbi__malloc(16384);
4187    if (p == NULL) return NULL;
4188    a.zbuffer = (stbi_uc *) buffer;
4189    a.zbuffer_end = (stbi_uc *) buffer+len;
4190    if (stbi__do_zlib(&a, p, 16384, 1, 0)) {
4191       if (outlen) *outlen = (int) (a.zout - a.zout_start);
4192       return a.zout_start;
4193    } else {
4194       STBI_FREE(a.zout_start);
4195       return NULL;
4196    }
4197 }
4198 
stbi_zlib_decode_noheader_buffer(char * obuffer,int olen,const char * ibuffer,int ilen)4199 STBIDEF int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char *ibuffer, int ilen)
4200 {
4201    stbi__zbuf a;
4202    a.zbuffer = (stbi_uc *) ibuffer;
4203    a.zbuffer_end = (stbi_uc *) ibuffer + ilen;
4204    if (stbi__do_zlib(&a, obuffer, olen, 0, 0))
4205       return (int) (a.zout - a.zout_start);
4206    else
4207       return -1;
4208 }
4209 #endif
4210 
4211 // public domain "baseline" PNG decoder   v0.10  Sean Barrett 2006-11-18
4212 //    simple implementation
4213 //      - only 8-bit samples
4214 //      - no CRC checking
4215 //      - allocates lots of intermediate memory
4216 //        - avoids problem of streaming data between subsystems
4217 //        - avoids explicit window management
4218 //    performance
4219 //      - uses stb_zlib, a PD zlib implementation with fast huffman decoding
4220 
4221 #ifndef STBI_NO_PNG
4222 typedef struct
4223 {
4224    stbi__uint32 length;
4225    stbi__uint32 type;
4226 } stbi__pngchunk;
4227 
stbi__get_chunk_header(stbi__context * s)4228 static stbi__pngchunk stbi__get_chunk_header(stbi__context *s)
4229 {
4230    stbi__pngchunk c;
4231    c.length = stbi__get32be(s);
4232    c.type   = stbi__get32be(s);
4233    return c;
4234 }
4235 
stbi__check_png_header(stbi__context * s)4236 static int stbi__check_png_header(stbi__context *s)
4237 {
4238    static stbi_uc png_sig[8] = { 137,80,78,71,13,10,26,10 };
4239    int i;
4240    for (i=0; i < 8; ++i)
4241       if (stbi__get8(s) != png_sig[i]) return stbi__err("bad png sig","Not a PNG");
4242    return 1;
4243 }
4244 
4245 typedef struct
4246 {
4247    stbi__context *s;
4248    stbi_uc *idata, *expanded, *out;
4249    int depth;
4250 } stbi__png;
4251 
4252 
4253 enum {
4254    STBI__F_none=0,
4255    STBI__F_sub=1,
4256    STBI__F_up=2,
4257    STBI__F_avg=3,
4258    STBI__F_paeth=4,
4259    // synthetic filters used for first scanline to avoid needing a dummy row of 0s
4260    STBI__F_avg_first,
4261    STBI__F_paeth_first
4262 };
4263 
4264 static stbi_uc first_row_filter[5] =
4265 {
4266    STBI__F_none,
4267    STBI__F_sub,
4268    STBI__F_none,
4269    STBI__F_avg_first,
4270    STBI__F_paeth_first
4271 };
4272 
stbi__paeth(int a,int b,int c)4273 static int stbi__paeth(int a, int b, int c)
4274 {
4275    int p = a + b - c;
4276    int pa = abs(p-a);
4277    int pb = abs(p-b);
4278    int pc = abs(p-c);
4279    if (pa <= pb && pa <= pc) return a;
4280    if (pb <= pc) return b;
4281    return c;
4282 }
4283 
4284 static stbi_uc stbi__depth_scale_table[9] = { 0, 0xff, 0x55, 0, 0x11, 0,0,0, 0x01 };
4285 
4286 // create the png data from post-deflated data
stbi__create_png_image_raw(stbi__png * a,stbi_uc * raw,stbi__uint32 raw_len,int out_n,stbi__uint32 x,stbi__uint32 y,int depth,int color)4287 static int stbi__create_png_image_raw(stbi__png *a, stbi_uc *raw, stbi__uint32 raw_len, int out_n, stbi__uint32 x, stbi__uint32 y, int depth, int color)
4288 {
4289    int bytes = (depth == 16? 2 : 1);
4290    stbi__context *s = a->s;
4291    stbi__uint32 i,j,stride = x*out_n*bytes;
4292    stbi__uint32 img_len, img_width_bytes;
4293    int k;
4294    int img_n = s->img_n; // copy it into a local for later
4295 
4296    int output_bytes = out_n*bytes;
4297    int filter_bytes = img_n*bytes;
4298    int width = x;
4299 
4300    STBI_ASSERT(out_n == s->img_n || out_n == s->img_n+1);
4301    a->out = (stbi_uc *) stbi__malloc_mad3(x, y, output_bytes, 0); // extra bytes to write off the end into
4302    if (!a->out) return stbi__err("outofmem", "Out of memory");
4303 
4304    img_width_bytes = (((img_n * x * depth) + 7) >> 3);
4305    img_len = (img_width_bytes + 1) * y;
4306    // we used to check for exact match between raw_len and img_len on non-interlaced PNGs,
4307    // but issue #276 reported a PNG in the wild that had extra data at the end (all zeros),
4308    // so just check for raw_len < img_len always.
4309    if (raw_len < img_len) return stbi__err("not enough pixels","Corrupt PNG");
4310 
4311    for (j=0; j < y; ++j) {
4312       stbi_uc *cur = a->out + stride*j;
4313       stbi_uc *prior;
4314       int filter = *raw++;
4315 
4316       if (filter > 4)
4317          return stbi__err("invalid filter","Corrupt PNG");
4318 
4319       if (depth < 8) {
4320          STBI_ASSERT(img_width_bytes <= x);
4321          cur += x*out_n - img_width_bytes; // store output to the rightmost img_len bytes, so we can decode in place
4322          filter_bytes = 1;
4323          width = img_width_bytes;
4324       }
4325       prior = cur - stride; // bugfix: need to compute this after 'cur +=' computation above
4326 
4327       // if first row, use special filter that doesn't sample previous row
4328       if (j == 0) filter = first_row_filter[filter];
4329 
4330       // handle first byte explicitly
4331       for (k=0; k < filter_bytes; ++k) {
4332          switch (filter) {
4333             case STBI__F_none       : cur[k] = raw[k]; break;
4334             case STBI__F_sub        : cur[k] = raw[k]; break;
4335             case STBI__F_up         : cur[k] = STBI__BYTECAST(raw[k] + prior[k]); break;
4336             case STBI__F_avg        : cur[k] = STBI__BYTECAST(raw[k] + (prior[k]>>1)); break;
4337             case STBI__F_paeth      : cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(0,prior[k],0)); break;
4338             case STBI__F_avg_first  : cur[k] = raw[k]; break;
4339             case STBI__F_paeth_first: cur[k] = raw[k]; break;
4340          }
4341       }
4342 
4343       if (depth == 8) {
4344          if (img_n != out_n)
4345             cur[img_n] = 255; // first pixel
4346          raw += img_n;
4347          cur += out_n;
4348          prior += out_n;
4349       } else if (depth == 16) {
4350          if (img_n != out_n) {
4351             cur[filter_bytes]   = 255; // first pixel top byte
4352             cur[filter_bytes+1] = 255; // first pixel bottom byte
4353          }
4354          raw += filter_bytes;
4355          cur += output_bytes;
4356          prior += output_bytes;
4357       } else {
4358          raw += 1;
4359          cur += 1;
4360          prior += 1;
4361       }
4362 
4363       // this is a little gross, so that we don't switch per-pixel or per-component
4364       if (depth < 8 || img_n == out_n) {
4365          int nk = (width - 1)*filter_bytes;
4366          #define STBI__CASE(f) \
4367              case f:     \
4368                 for (k=0; k < nk; ++k)
4369          switch (filter) {
4370             // "none" filter turns into a memcpy here; make that explicit.
4371             case STBI__F_none:         memcpy(cur, raw, nk); break;
4372             STBI__CASE(STBI__F_sub)          { cur[k] = STBI__BYTECAST(raw[k] + cur[k-filter_bytes]); } break;
4373             STBI__CASE(STBI__F_up)           { cur[k] = STBI__BYTECAST(raw[k] + prior[k]); } break;
4374             STBI__CASE(STBI__F_avg)          { cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k-filter_bytes])>>1)); } break;
4375             STBI__CASE(STBI__F_paeth)        { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-filter_bytes],prior[k],prior[k-filter_bytes])); } break;
4376             STBI__CASE(STBI__F_avg_first)    { cur[k] = STBI__BYTECAST(raw[k] + (cur[k-filter_bytes] >> 1)); } break;
4377             STBI__CASE(STBI__F_paeth_first)  { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-filter_bytes],0,0)); } break;
4378          }
4379          #undef STBI__CASE
4380          raw += nk;
4381       } else {
4382          STBI_ASSERT(img_n+1 == out_n);
4383          #define STBI__CASE(f) \
4384              case f:     \
4385                 for (i=x-1; i >= 1; --i, cur[filter_bytes]=255,raw+=filter_bytes,cur+=output_bytes,prior+=output_bytes) \
4386                    for (k=0; k < filter_bytes; ++k)
4387          switch (filter) {
4388             STBI__CASE(STBI__F_none)         { cur[k] = raw[k]; } break;
4389             STBI__CASE(STBI__F_sub)          { cur[k] = STBI__BYTECAST(raw[k] + cur[k- output_bytes]); } break;
4390             STBI__CASE(STBI__F_up)           { cur[k] = STBI__BYTECAST(raw[k] + prior[k]); } break;
4391             STBI__CASE(STBI__F_avg)          { cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k- output_bytes])>>1)); } break;
4392             STBI__CASE(STBI__F_paeth)        { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k- output_bytes],prior[k],prior[k- output_bytes])); } break;
4393             STBI__CASE(STBI__F_avg_first)    { cur[k] = STBI__BYTECAST(raw[k] + (cur[k- output_bytes] >> 1)); } break;
4394             STBI__CASE(STBI__F_paeth_first)  { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k- output_bytes],0,0)); } break;
4395          }
4396          #undef STBI__CASE
4397 
4398          // the loop above sets the high byte of the pixels' alpha, but for
4399          // 16 bit png files we also need the low byte set. we'll do that here.
4400          if (depth == 16) {
4401             cur = a->out + stride*j; // start at the beginning of the row again
4402             for (i=0; i < x; ++i,cur+=output_bytes) {
4403                cur[filter_bytes+1] = 255;
4404             }
4405          }
4406       }
4407    }
4408 
4409    // we make a separate pass to expand bits to pixels; for performance,
4410    // this could run two scanlines behind the above code, so it won't
4411    // intefere with filtering but will still be in the cache.
4412    if (depth < 8) {
4413       for (j=0; j < y; ++j) {
4414          stbi_uc *cur = a->out + stride*j;
4415          stbi_uc *in  = a->out + stride*j + x*out_n - img_width_bytes;
4416          // unpack 1/2/4-bit into a 8-bit buffer. allows us to keep the common 8-bit path optimal at minimal cost for 1/2/4-bit
4417          // png guarante byte alignment, if width is not multiple of 8/4/2 we'll decode dummy trailing data that will be skipped in the later loop
4418          stbi_uc scale = (color == 0) ? stbi__depth_scale_table[depth] : 1; // scale grayscale values to 0..255 range
4419 
4420          // note that the final byte might overshoot and write more data than desired.
4421          // we can allocate enough data that this never writes out of memory, but it
4422          // could also overwrite the next scanline. can it overwrite non-empty data
4423          // on the next scanline? yes, consider 1-pixel-wide scanlines with 1-bit-per-pixel.
4424          // so we need to explicitly clamp the final ones
4425 
4426          if (depth == 4) {
4427             for (k=x*img_n; k >= 2; k-=2, ++in) {
4428                *cur++ = scale * ((*in >> 4)       );
4429                *cur++ = scale * ((*in     ) & 0x0f);
4430             }
4431             if (k > 0) *cur++ = scale * ((*in >> 4)       );
4432          } else if (depth == 2) {
4433             for (k=x*img_n; k >= 4; k-=4, ++in) {
4434                *cur++ = scale * ((*in >> 6)       );
4435                *cur++ = scale * ((*in >> 4) & 0x03);
4436                *cur++ = scale * ((*in >> 2) & 0x03);
4437                *cur++ = scale * ((*in     ) & 0x03);
4438             }
4439             if (k > 0) *cur++ = scale * ((*in >> 6)       );
4440             if (k > 1) *cur++ = scale * ((*in >> 4) & 0x03);
4441             if (k > 2) *cur++ = scale * ((*in >> 2) & 0x03);
4442          } else if (depth == 1) {
4443             for (k=x*img_n; k >= 8; k-=8, ++in) {
4444                *cur++ = scale * ((*in >> 7)       );
4445                *cur++ = scale * ((*in >> 6) & 0x01);
4446                *cur++ = scale * ((*in >> 5) & 0x01);
4447                *cur++ = scale * ((*in >> 4) & 0x01);
4448                *cur++ = scale * ((*in >> 3) & 0x01);
4449                *cur++ = scale * ((*in >> 2) & 0x01);
4450                *cur++ = scale * ((*in >> 1) & 0x01);
4451                *cur++ = scale * ((*in     ) & 0x01);
4452             }
4453             if (k > 0) *cur++ = scale * ((*in >> 7)       );
4454             if (k > 1) *cur++ = scale * ((*in >> 6) & 0x01);
4455             if (k > 2) *cur++ = scale * ((*in >> 5) & 0x01);
4456             if (k > 3) *cur++ = scale * ((*in >> 4) & 0x01);
4457             if (k > 4) *cur++ = scale * ((*in >> 3) & 0x01);
4458             if (k > 5) *cur++ = scale * ((*in >> 2) & 0x01);
4459             if (k > 6) *cur++ = scale * ((*in >> 1) & 0x01);
4460          }
4461          if (img_n != out_n) {
4462             int q;
4463             // insert alpha = 255
4464             cur = a->out + stride*j;
4465             if (img_n == 1) {
4466                for (q=x-1; q >= 0; --q) {
4467                   cur[q*2+1] = 255;
4468                   cur[q*2+0] = cur[q];
4469                }
4470             } else {
4471                STBI_ASSERT(img_n == 3);
4472                for (q=x-1; q >= 0; --q) {
4473                   cur[q*4+3] = 255;
4474                   cur[q*4+2] = cur[q*3+2];
4475                   cur[q*4+1] = cur[q*3+1];
4476                   cur[q*4+0] = cur[q*3+0];
4477                }
4478             }
4479          }
4480       }
4481    } else if (depth == 16) {
4482       // force the image data from big-endian to platform-native.
4483       // this is done in a separate pass due to the decoding relying
4484       // on the data being untouched, but could probably be done
4485       // per-line during decode if care is taken.
4486       stbi_uc *cur = a->out;
4487       stbi__uint16 *cur16 = (stbi__uint16*)cur;
4488 
4489       for(i=0; i < x*y*out_n; ++i,cur16++,cur+=2) {
4490          *cur16 = (cur[0] << 8) | cur[1];
4491       }
4492    }
4493 
4494    return 1;
4495 }
4496 
stbi__create_png_image(stbi__png * a,stbi_uc * image_data,stbi__uint32 image_data_len,int out_n,int depth,int color,int interlaced)4497 static int stbi__create_png_image(stbi__png *a, stbi_uc *image_data, stbi__uint32 image_data_len, int out_n, int depth, int color, int interlaced)
4498 {
4499    int bytes = (depth == 16 ? 2 : 1);
4500    int out_bytes = out_n * bytes;
4501    stbi_uc *final;
4502    int p;
4503    if (!interlaced)
4504       return stbi__create_png_image_raw(a, image_data, image_data_len, out_n, a->s->img_x, a->s->img_y, depth, color);
4505 
4506    // de-interlacing
4507    final = (stbi_uc *) stbi__malloc_mad3(a->s->img_x, a->s->img_y, out_bytes, 0);
4508    for (p=0; p < 7; ++p) {
4509       int xorig[] = { 0,4,0,2,0,1,0 };
4510       int yorig[] = { 0,0,4,0,2,0,1 };
4511       int xspc[]  = { 8,8,4,4,2,2,1 };
4512       int yspc[]  = { 8,8,8,4,4,2,2 };
4513       int i,j,x,y;
4514       // pass1_x[4] = 0, pass1_x[5] = 1, pass1_x[12] = 1
4515       x = (a->s->img_x - xorig[p] + xspc[p]-1) / xspc[p];
4516       y = (a->s->img_y - yorig[p] + yspc[p]-1) / yspc[p];
4517       if (x && y) {
4518          stbi__uint32 img_len = ((((a->s->img_n * x * depth) + 7) >> 3) + 1) * y;
4519          if (!stbi__create_png_image_raw(a, image_data, image_data_len, out_n, x, y, depth, color)) {
4520             STBI_FREE(final);
4521             return 0;
4522          }
4523          for (j=0; j < y; ++j) {
4524             for (i=0; i < x; ++i) {
4525                int out_y = j*yspc[p]+yorig[p];
4526                int out_x = i*xspc[p]+xorig[p];
4527                memcpy(final + out_y*a->s->img_x*out_bytes + out_x*out_bytes,
4528                       a->out + (j*x+i)*out_bytes, out_bytes);
4529             }
4530          }
4531          STBI_FREE(a->out);
4532          image_data += img_len;
4533          image_data_len -= img_len;
4534       }
4535    }
4536    a->out = final;
4537 
4538    return 1;
4539 }
4540 
stbi__compute_transparency(stbi__png * z,stbi_uc tc[3],int out_n)4541 static int stbi__compute_transparency(stbi__png *z, stbi_uc tc[3], int out_n)
4542 {
4543    stbi__context *s = z->s;
4544    stbi__uint32 i, pixel_count = s->img_x * s->img_y;
4545    stbi_uc *p = z->out;
4546 
4547    // compute color-based transparency, assuming we've
4548    // already got 255 as the alpha value in the output
4549    STBI_ASSERT(out_n == 2 || out_n == 4);
4550 
4551    if (out_n == 2) {
4552       for (i=0; i < pixel_count; ++i) {
4553          p[1] = (p[0] == tc[0] ? 0 : 255);
4554          p += 2;
4555       }
4556    } else {
4557       for (i=0; i < pixel_count; ++i) {
4558          if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2])
4559             p[3] = 0;
4560          p += 4;
4561       }
4562    }
4563    return 1;
4564 }
4565 
stbi__compute_transparency16(stbi__png * z,stbi__uint16 tc[3],int out_n)4566 static int stbi__compute_transparency16(stbi__png *z, stbi__uint16 tc[3], int out_n)
4567 {
4568    stbi__context *s = z->s;
4569    stbi__uint32 i, pixel_count = s->img_x * s->img_y;
4570    stbi__uint16 *p = (stbi__uint16*) z->out;
4571 
4572    // compute color-based transparency, assuming we've
4573    // already got 65535 as the alpha value in the output
4574    STBI_ASSERT(out_n == 2 || out_n == 4);
4575 
4576    if (out_n == 2) {
4577       for (i = 0; i < pixel_count; ++i) {
4578          p[1] = (p[0] == tc[0] ? 0 : 65535);
4579          p += 2;
4580       }
4581    } else {
4582       for (i = 0; i < pixel_count; ++i) {
4583          if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2])
4584             p[3] = 0;
4585          p += 4;
4586       }
4587    }
4588    return 1;
4589 }
4590 
stbi__expand_png_palette(stbi__png * a,stbi_uc * palette,int len,int pal_img_n)4591 static int stbi__expand_png_palette(stbi__png *a, stbi_uc *palette, int len, int pal_img_n)
4592 {
4593    stbi__uint32 i, pixel_count = a->s->img_x * a->s->img_y;
4594    stbi_uc *p, *temp_out, *orig = a->out;
4595 
4596    p = (stbi_uc *) stbi__malloc_mad2(pixel_count, pal_img_n, 0);
4597    if (p == NULL) return stbi__err("outofmem", "Out of memory");
4598 
4599    // between here and free(out) below, exitting would leak
4600    temp_out = p;
4601 
4602    if (pal_img_n == 3) {
4603       for (i=0; i < pixel_count; ++i) {
4604          int n = orig[i]*4;
4605          p[0] = palette[n  ];
4606          p[1] = palette[n+1];
4607          p[2] = palette[n+2];
4608          p += 3;
4609       }
4610    } else {
4611       for (i=0; i < pixel_count; ++i) {
4612          int n = orig[i]*4;
4613          p[0] = palette[n  ];
4614          p[1] = palette[n+1];
4615          p[2] = palette[n+2];
4616          p[3] = palette[n+3];
4617          p += 4;
4618       }
4619    }
4620    STBI_FREE(a->out);
4621    a->out = temp_out;
4622 
4623    STBI_NOTUSED(len);
4624 
4625    return 1;
4626 }
4627 
4628 static int stbi__unpremultiply_on_load = 0;
4629 static int stbi__de_iphone_flag = 0;
4630 
stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply)4631 STBIDEF void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply)
4632 {
4633    stbi__unpremultiply_on_load = flag_true_if_should_unpremultiply;
4634 }
4635 
stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert)4636 STBIDEF void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert)
4637 {
4638    stbi__de_iphone_flag = flag_true_if_should_convert;
4639 }
4640 
stbi__de_iphone(stbi__png * z)4641 static void stbi__de_iphone(stbi__png *z)
4642 {
4643    stbi__context *s = z->s;
4644    stbi__uint32 i, pixel_count = s->img_x * s->img_y;
4645    stbi_uc *p = z->out;
4646 
4647    if (s->img_out_n == 3) {  // convert bgr to rgb
4648       for (i=0; i < pixel_count; ++i) {
4649          stbi_uc t = p[0];
4650          p[0] = p[2];
4651          p[2] = t;
4652          p += 3;
4653       }
4654    } else {
4655       STBI_ASSERT(s->img_out_n == 4);
4656       if (stbi__unpremultiply_on_load) {
4657          // convert bgr to rgb and unpremultiply
4658          for (i=0; i < pixel_count; ++i) {
4659             stbi_uc a = p[3];
4660             stbi_uc t = p[0];
4661             if (a) {
4662                stbi_uc half = a / 2;
4663                p[0] = (p[2] * 255 + half) / a;
4664                p[1] = (p[1] * 255 + half) / a;
4665                p[2] = ( t   * 255 + half) / a;
4666             } else {
4667                p[0] = p[2];
4668                p[2] = t;
4669             }
4670             p += 4;
4671          }
4672       } else {
4673          // convert bgr to rgb
4674          for (i=0; i < pixel_count; ++i) {
4675             stbi_uc t = p[0];
4676             p[0] = p[2];
4677             p[2] = t;
4678             p += 4;
4679          }
4680       }
4681    }
4682 }
4683 
4684 #define STBI__PNG_TYPE(a,b,c,d)  (((a) << 24) + ((b) << 16) + ((c) << 8) + (d))
4685 
stbi__parse_png_file(stbi__png * z,int scan,int req_comp)4686 static int stbi__parse_png_file(stbi__png *z, int scan, int req_comp)
4687 {
4688    stbi_uc palette[1024], pal_img_n=0;
4689    stbi_uc has_trans=0, tc[3];
4690    stbi__uint16 tc16[3];
4691    stbi__uint32 ioff=0, idata_limit=0, i, pal_len=0;
4692    int first=1,k,interlace=0, color=0, is_iphone=0;
4693    stbi__context *s = z->s;
4694 
4695    z->expanded = NULL;
4696    z->idata = NULL;
4697    z->out = NULL;
4698 
4699    if (!stbi__check_png_header(s)) return 0;
4700 
4701    if (scan == STBI__SCAN_type) return 1;
4702 
4703    for (;;) {
4704       stbi__pngchunk c = stbi__get_chunk_header(s);
4705       switch (c.type) {
4706          case STBI__PNG_TYPE('C','g','B','I'):
4707             is_iphone = 1;
4708             stbi__skip(s, c.length);
4709             break;
4710          case STBI__PNG_TYPE('I','H','D','R'): {
4711             int comp,filter;
4712             if (!first) return stbi__err("multiple IHDR","Corrupt PNG");
4713             first = 0;
4714             if (c.length != 13) return stbi__err("bad IHDR len","Corrupt PNG");
4715             s->img_x = stbi__get32be(s); if (s->img_x > (1 << 24)) return stbi__err("too large","Very large image (corrupt?)");
4716             s->img_y = stbi__get32be(s); if (s->img_y > (1 << 24)) return stbi__err("too large","Very large image (corrupt?)");
4717             z->depth = stbi__get8(s);  if (z->depth != 1 && z->depth != 2 && z->depth != 4 && z->depth != 8 && z->depth != 16)  return stbi__err("1/2/4/8/16-bit only","PNG not supported: 1/2/4/8/16-bit only");
4718             color = stbi__get8(s);  if (color > 6)         return stbi__err("bad ctype","Corrupt PNG");
4719             if (color == 3 && z->depth == 16)                  return stbi__err("bad ctype","Corrupt PNG");
4720             if (color == 3) pal_img_n = 3; else if (color & 1) return stbi__err("bad ctype","Corrupt PNG");
4721             comp  = stbi__get8(s);  if (comp) return stbi__err("bad comp method","Corrupt PNG");
4722             filter= stbi__get8(s);  if (filter) return stbi__err("bad filter method","Corrupt PNG");
4723             interlace = stbi__get8(s); if (interlace>1) return stbi__err("bad interlace method","Corrupt PNG");
4724             if (!s->img_x || !s->img_y) return stbi__err("0-pixel image","Corrupt PNG");
4725             if (!pal_img_n) {
4726                s->img_n = (color & 2 ? 3 : 1) + (color & 4 ? 1 : 0);
4727                if ((1 << 30) / s->img_x / s->img_n < s->img_y) return stbi__err("too large", "Image too large to decode");
4728                if (scan == STBI__SCAN_header) return 1;
4729             } else {
4730                // if paletted, then pal_n is our final components, and
4731                // img_n is # components to decompress/filter.
4732                s->img_n = 1;
4733                if ((1 << 30) / s->img_x / 4 < s->img_y) return stbi__err("too large","Corrupt PNG");
4734                // if SCAN_header, have to scan to see if we have a tRNS
4735             }
4736             break;
4737          }
4738 
4739          case STBI__PNG_TYPE('P','L','T','E'):  {
4740             if (first) return stbi__err("first not IHDR", "Corrupt PNG");
4741             if (c.length > 256*3) return stbi__err("invalid PLTE","Corrupt PNG");
4742             pal_len = c.length / 3;
4743             if (pal_len * 3 != c.length) return stbi__err("invalid PLTE","Corrupt PNG");
4744             for (i=0; i < pal_len; ++i) {
4745                palette[i*4+0] = stbi__get8(s);
4746                palette[i*4+1] = stbi__get8(s);
4747                palette[i*4+2] = stbi__get8(s);
4748                palette[i*4+3] = 255;
4749             }
4750             break;
4751          }
4752 
4753          case STBI__PNG_TYPE('t','R','N','S'): {
4754             if (first) return stbi__err("first not IHDR", "Corrupt PNG");
4755             if (z->idata) return stbi__err("tRNS after IDAT","Corrupt PNG");
4756             if (pal_img_n) {
4757                if (scan == STBI__SCAN_header) { s->img_n = 4; return 1; }
4758                if (pal_len == 0) return stbi__err("tRNS before PLTE","Corrupt PNG");
4759                if (c.length > pal_len) return stbi__err("bad tRNS len","Corrupt PNG");
4760                pal_img_n = 4;
4761                for (i=0; i < c.length; ++i)
4762                   palette[i*4+3] = stbi__get8(s);
4763             } else {
4764                if (!(s->img_n & 1)) return stbi__err("tRNS with alpha","Corrupt PNG");
4765                if (c.length != (stbi__uint32) s->img_n*2) return stbi__err("bad tRNS len","Corrupt PNG");
4766                has_trans = 1;
4767                if (z->depth == 16) {
4768                   for (k = 0; k < s->img_n; ++k) tc16[k] = (stbi__uint16)stbi__get16be(s); // copy the values as-is
4769                } else {
4770                   for (k = 0; k < s->img_n; ++k) tc[k] = (stbi_uc)(stbi__get16be(s) & 255) * stbi__depth_scale_table[z->depth]; // non 8-bit images will be larger
4771                }
4772             }
4773             break;
4774          }
4775 
4776          case STBI__PNG_TYPE('I','D','A','T'): {
4777             if (first) return stbi__err("first not IHDR", "Corrupt PNG");
4778             if (pal_img_n && !pal_len) return stbi__err("no PLTE","Corrupt PNG");
4779             if (scan == STBI__SCAN_header) { s->img_n = pal_img_n; return 1; }
4780             if ((int)(ioff + c.length) < (int)ioff) return 0;
4781             if (ioff + c.length > idata_limit) {
4782                stbi__uint32 idata_limit_old = idata_limit;
4783                stbi_uc *p;
4784                if (idata_limit == 0) idata_limit = c.length > 4096 ? c.length : 4096;
4785                while (ioff + c.length > idata_limit)
4786                   idata_limit *= 2;
4787                STBI_NOTUSED(idata_limit_old);
4788                p = (stbi_uc *) STBI_REALLOC_SIZED(z->idata, idata_limit_old, idata_limit); if (p == NULL) return stbi__err("outofmem", "Out of memory");
4789                z->idata = p;
4790             }
4791             if (!stbi__getn(s, z->idata+ioff,c.length)) return stbi__err("outofdata","Corrupt PNG");
4792             ioff += c.length;
4793             break;
4794          }
4795 
4796          case STBI__PNG_TYPE('I','E','N','D'): {
4797             stbi__uint32 raw_len, bpl;
4798             if (first) return stbi__err("first not IHDR", "Corrupt PNG");
4799             if (scan != STBI__SCAN_load) return 1;
4800             if (z->idata == NULL) return stbi__err("no IDAT","Corrupt PNG");
4801             // initial guess for decoded data size to avoid unnecessary reallocs
4802             bpl = (s->img_x * z->depth + 7) / 8; // bytes per line, per component
4803             raw_len = bpl * s->img_y * s->img_n /* pixels */ + s->img_y /* filter mode per row */;
4804             z->expanded = (stbi_uc *) stbi_zlib_decode_malloc_guesssize_headerflag((char *) z->idata, ioff, raw_len, (int *) &raw_len, !is_iphone);
4805             if (z->expanded == NULL) return 0; // zlib should set error
4806             STBI_FREE(z->idata); z->idata = NULL;
4807             if ((req_comp == s->img_n+1 && req_comp != 3 && !pal_img_n) || has_trans)
4808                s->img_out_n = s->img_n+1;
4809             else
4810                s->img_out_n = s->img_n;
4811             if (!stbi__create_png_image(z, z->expanded, raw_len, s->img_out_n, z->depth, color, interlace)) return 0;
4812             if (has_trans) {
4813                if (z->depth == 16) {
4814                   if (!stbi__compute_transparency16(z, tc16, s->img_out_n)) return 0;
4815                } else {
4816                   if (!stbi__compute_transparency(z, tc, s->img_out_n)) return 0;
4817                }
4818             }
4819             if (is_iphone && stbi__de_iphone_flag && s->img_out_n > 2)
4820                stbi__de_iphone(z);
4821             if (pal_img_n) {
4822                // pal_img_n == 3 or 4
4823                s->img_n = pal_img_n; // record the actual colors we had
4824                s->img_out_n = pal_img_n;
4825                if (req_comp >= 3) s->img_out_n = req_comp;
4826                if (!stbi__expand_png_palette(z, palette, pal_len, s->img_out_n))
4827                   return 0;
4828             } else if (has_trans) {
4829                // non-paletted image with tRNS -> source image has (constant) alpha
4830                ++s->img_n;
4831             }
4832             STBI_FREE(z->expanded); z->expanded = NULL;
4833             return 1;
4834          }
4835 
4836          default:
4837             // if critical, fail
4838             if (first) return stbi__err("first not IHDR", "Corrupt PNG");
4839             if ((c.type & (1 << 29)) == 0) {
4840                #ifndef STBI_NO_FAILURE_STRINGS
4841                // not threadsafe
4842                static char invalid_chunk[] = "XXXX PNG chunk not known";
4843                invalid_chunk[0] = STBI__BYTECAST(c.type >> 24);
4844                invalid_chunk[1] = STBI__BYTECAST(c.type >> 16);
4845                invalid_chunk[2] = STBI__BYTECAST(c.type >>  8);
4846                invalid_chunk[3] = STBI__BYTECAST(c.type >>  0);
4847                #endif
4848                return stbi__err(invalid_chunk, "PNG not supported: unknown PNG chunk type");
4849             }
4850             stbi__skip(s, c.length);
4851             break;
4852       }
4853       // end of PNG chunk, read and skip CRC
4854       stbi__get32be(s);
4855    }
4856 }
4857 
stbi__do_png(stbi__png * p,int * x,int * y,int * n,int req_comp,stbi__result_info * ri)4858 static void *stbi__do_png(stbi__png *p, int *x, int *y, int *n, int req_comp, stbi__result_info *ri)
4859 {
4860    void *result=NULL;
4861    if (req_comp < 0 || req_comp > 4) return stbi__errpuc("bad req_comp", "Internal error");
4862    if (stbi__parse_png_file(p, STBI__SCAN_load, req_comp)) {
4863       if (p->depth < 8)
4864          ri->bits_per_channel = 8;
4865       else
4866          ri->bits_per_channel = p->depth;
4867       result = p->out;
4868       p->out = NULL;
4869       if (req_comp && req_comp != p->s->img_out_n) {
4870          if (ri->bits_per_channel == 8)
4871             result = stbi__convert_format((unsigned char *) result, p->s->img_out_n, req_comp, p->s->img_x, p->s->img_y);
4872          else
4873             result = stbi__convert_format16((stbi__uint16 *) result, p->s->img_out_n, req_comp, p->s->img_x, p->s->img_y);
4874          p->s->img_out_n = req_comp;
4875          if (result == NULL) return result;
4876       }
4877       *x = p->s->img_x;
4878       *y = p->s->img_y;
4879       if (n) *n = p->s->img_n;
4880    }
4881    STBI_FREE(p->out);      p->out      = NULL;
4882    STBI_FREE(p->expanded); p->expanded = NULL;
4883    STBI_FREE(p->idata);    p->idata    = NULL;
4884 
4885    return result;
4886 }
4887 
stbi__png_load(stbi__context * s,int * x,int * y,int * comp,int req_comp,stbi__result_info * ri)4888 static void *stbi__png_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
4889 {
4890    stbi__png p;
4891    p.s = s;
4892    return stbi__do_png(&p, x,y,comp,req_comp, ri);
4893 }
4894 
stbi__png_test(stbi__context * s)4895 static int stbi__png_test(stbi__context *s)
4896 {
4897    int r;
4898    r = stbi__check_png_header(s);
4899    stbi__rewind(s);
4900    return r;
4901 }
4902 
stbi__png_info_raw(stbi__png * p,int * x,int * y,int * comp)4903 static int stbi__png_info_raw(stbi__png *p, int *x, int *y, int *comp)
4904 {
4905    if (!stbi__parse_png_file(p, STBI__SCAN_header, 0)) {
4906       stbi__rewind( p->s );
4907       return 0;
4908    }
4909    if (x) *x = p->s->img_x;
4910    if (y) *y = p->s->img_y;
4911    if (comp) *comp = p->s->img_n;
4912    return 1;
4913 }
4914 
stbi__png_info(stbi__context * s,int * x,int * y,int * comp)4915 static int stbi__png_info(stbi__context *s, int *x, int *y, int *comp)
4916 {
4917    stbi__png p;
4918    p.s = s;
4919    return stbi__png_info_raw(&p, x, y, comp);
4920 }
4921 #endif
4922 
4923 // Microsoft/Windows BMP image
4924 
4925 #ifndef STBI_NO_BMP
stbi__bmp_test_raw(stbi__context * s)4926 static int stbi__bmp_test_raw(stbi__context *s)
4927 {
4928    int r;
4929    int sz;
4930    if (stbi__get8(s) != 'B') return 0;
4931    if (stbi__get8(s) != 'M') return 0;
4932    stbi__get32le(s); // discard filesize
4933    stbi__get16le(s); // discard reserved
4934    stbi__get16le(s); // discard reserved
4935    stbi__get32le(s); // discard data offset
4936    sz = stbi__get32le(s);
4937    r = (sz == 12 || sz == 40 || sz == 56 || sz == 108 || sz == 124);
4938    return r;
4939 }
4940 
stbi__bmp_test(stbi__context * s)4941 static int stbi__bmp_test(stbi__context *s)
4942 {
4943    int r = stbi__bmp_test_raw(s);
4944    stbi__rewind(s);
4945    return r;
4946 }
4947 
4948 
4949 // returns 0..31 for the highest set bit
stbi__high_bit(unsigned int z)4950 static int stbi__high_bit(unsigned int z)
4951 {
4952    int n=0;
4953    if (z == 0) return -1;
4954    if (z >= 0x10000) n += 16, z >>= 16;
4955    if (z >= 0x00100) n +=  8, z >>=  8;
4956    if (z >= 0x00010) n +=  4, z >>=  4;
4957    if (z >= 0x00004) n +=  2, z >>=  2;
4958    if (z >= 0x00002) n +=  1, z >>=  1;
4959    return n;
4960 }
4961 
stbi__bitcount(unsigned int a)4962 static int stbi__bitcount(unsigned int a)
4963 {
4964    a = (a & 0x55555555) + ((a >>  1) & 0x55555555); // max 2
4965    a = (a & 0x33333333) + ((a >>  2) & 0x33333333); // max 4
4966    a = (a + (a >> 4)) & 0x0f0f0f0f; // max 8 per 4, now 8 bits
4967    a = (a + (a >> 8)); // max 16 per 8 bits
4968    a = (a + (a >> 16)); // max 32 per 8 bits
4969    return a & 0xff;
4970 }
4971 
stbi__shiftsigned(int v,int shift,int bits)4972 static int stbi__shiftsigned(int v, int shift, int bits)
4973 {
4974    int result;
4975    int z=0;
4976 
4977    if (shift < 0) v <<= -shift;
4978    else v >>= shift;
4979    result = v;
4980 
4981    z = bits;
4982    while (z < 8) {
4983       result += v >> z;
4984       z += bits;
4985    }
4986    return result;
4987 }
4988 
4989 typedef struct
4990 {
4991    int bpp, offset, hsz;
4992    unsigned int mr,mg,mb,ma, all_a;
4993 } stbi__bmp_data;
4994 
stbi__bmp_parse_header(stbi__context * s,stbi__bmp_data * info)4995 static void *stbi__bmp_parse_header(stbi__context *s, stbi__bmp_data *info)
4996 {
4997    int hsz;
4998    if (stbi__get8(s) != 'B' || stbi__get8(s) != 'M') return stbi__errpuc("not BMP", "Corrupt BMP");
4999    stbi__get32le(s); // discard filesize
5000    stbi__get16le(s); // discard reserved
5001    stbi__get16le(s); // discard reserved
5002    info->offset = stbi__get32le(s);
5003    info->hsz = hsz = stbi__get32le(s);
5004    info->mr = info->mg = info->mb = info->ma = 0;
5005 
5006    if (hsz != 12 && hsz != 40 && hsz != 56 && hsz != 108 && hsz != 124) return stbi__errpuc("unknown BMP", "BMP type not supported: unknown");
5007    if (hsz == 12) {
5008       s->img_x = stbi__get16le(s);
5009       s->img_y = stbi__get16le(s);
5010    } else {
5011       s->img_x = stbi__get32le(s);
5012       s->img_y = stbi__get32le(s);
5013    }
5014    if (stbi__get16le(s) != 1) return stbi__errpuc("bad BMP", "bad BMP");
5015    info->bpp = stbi__get16le(s);
5016    if (info->bpp == 1) return stbi__errpuc("monochrome", "BMP type not supported: 1-bit");
5017    if (hsz != 12) {
5018       int compress = stbi__get32le(s);
5019       if (compress == 1 || compress == 2) return stbi__errpuc("BMP RLE", "BMP type not supported: RLE");
5020       stbi__get32le(s); // discard sizeof
5021       stbi__get32le(s); // discard hres
5022       stbi__get32le(s); // discard vres
5023       stbi__get32le(s); // discard colorsused
5024       stbi__get32le(s); // discard max important
5025       if (hsz == 40 || hsz == 56) {
5026          if (hsz == 56) {
5027             stbi__get32le(s);
5028             stbi__get32le(s);
5029             stbi__get32le(s);
5030             stbi__get32le(s);
5031          }
5032          if (info->bpp == 16 || info->bpp == 32) {
5033             if (compress == 0) {
5034                if (info->bpp == 32) {
5035                   info->mr = 0xffu << 16;
5036                   info->mg = 0xffu <<  8;
5037                   info->mb = 0xffu <<  0;
5038                   info->ma = 0xffu << 24;
5039                   info->all_a = 0; // if all_a is 0 at end, then we loaded alpha channel but it was all 0
5040                } else {
5041                   info->mr = 31u << 10;
5042                   info->mg = 31u <<  5;
5043                   info->mb = 31u <<  0;
5044                }
5045             } else if (compress == 3) {
5046                info->mr = stbi__get32le(s);
5047                info->mg = stbi__get32le(s);
5048                info->mb = stbi__get32le(s);
5049                // not documented, but generated by photoshop and handled by mspaint
5050                if (info->mr == info->mg && info->mg == info->mb) {
5051                   // ?!?!?
5052                   return stbi__errpuc("bad BMP", "bad BMP");
5053                }
5054             } else
5055                return stbi__errpuc("bad BMP", "bad BMP");
5056          }
5057       } else {
5058          int i;
5059          if (hsz != 108 && hsz != 124)
5060             return stbi__errpuc("bad BMP", "bad BMP");
5061          info->mr = stbi__get32le(s);
5062          info->mg = stbi__get32le(s);
5063          info->mb = stbi__get32le(s);
5064          info->ma = stbi__get32le(s);
5065          stbi__get32le(s); // discard color space
5066          for (i=0; i < 12; ++i)
5067             stbi__get32le(s); // discard color space parameters
5068          if (hsz == 124) {
5069             stbi__get32le(s); // discard rendering intent
5070             stbi__get32le(s); // discard offset of profile data
5071             stbi__get32le(s); // discard size of profile data
5072             stbi__get32le(s); // discard reserved
5073          }
5074       }
5075    }
5076    return (void *) 1;
5077 }
5078 
5079 
stbi__bmp_load(stbi__context * s,int * x,int * y,int * comp,int req_comp,stbi__result_info * ri)5080 static void *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
5081 {
5082    stbi_uc *out;
5083    unsigned int mr=0,mg=0,mb=0,ma=0, all_a;
5084    stbi_uc pal[256][4];
5085    int psize=0,i,j,width;
5086    int flip_vertically, pad, target;
5087    stbi__bmp_data info;
5088    STBI_NOTUSED(ri);
5089 
5090    info.all_a = 255;
5091    if (stbi__bmp_parse_header(s, &info) == NULL)
5092       return NULL; // error code already set
5093 
5094    flip_vertically = ((int) s->img_y) > 0;
5095    s->img_y = abs((int) s->img_y);
5096 
5097    mr = info.mr;
5098    mg = info.mg;
5099    mb = info.mb;
5100    ma = info.ma;
5101    all_a = info.all_a;
5102 
5103    if (info.hsz == 12) {
5104       if (info.bpp < 24)
5105          psize = (info.offset - 14 - 24) / 3;
5106    } else {
5107       if (info.bpp < 16)
5108          psize = (info.offset - 14 - info.hsz) >> 2;
5109    }
5110 
5111    s->img_n = ma ? 4 : 3;
5112    if (req_comp && req_comp >= 3) // we can directly decode 3 or 4
5113       target = req_comp;
5114    else
5115       target = s->img_n; // if they want monochrome, we'll post-convert
5116 
5117    // sanity-check size
5118    if (!stbi__mad3sizes_valid(target, s->img_x, s->img_y, 0))
5119       return stbi__errpuc("too large", "Corrupt BMP");
5120 
5121    out = (stbi_uc *) stbi__malloc_mad3(target, s->img_x, s->img_y, 0);
5122    if (!out) return stbi__errpuc("outofmem", "Out of memory");
5123    if (info.bpp < 16) {
5124       int z=0;
5125       if (psize == 0 || psize > 256) { STBI_FREE(out); return stbi__errpuc("invalid", "Corrupt BMP"); }
5126       for (i=0; i < psize; ++i) {
5127          pal[i][2] = stbi__get8(s);
5128          pal[i][1] = stbi__get8(s);
5129          pal[i][0] = stbi__get8(s);
5130          if (info.hsz != 12) stbi__get8(s);
5131          pal[i][3] = 255;
5132       }
5133       stbi__skip(s, info.offset - 14 - info.hsz - psize * (info.hsz == 12 ? 3 : 4));
5134       if (info.bpp == 4) width = (s->img_x + 1) >> 1;
5135       else if (info.bpp == 8) width = s->img_x;
5136       else { STBI_FREE(out); return stbi__errpuc("bad bpp", "Corrupt BMP"); }
5137       pad = (-width)&3;
5138       for (j=0; j < (int) s->img_y; ++j) {
5139          for (i=0; i < (int) s->img_x; i += 2) {
5140             int v=stbi__get8(s),v2=0;
5141             if (info.bpp == 4) {
5142                v2 = v & 15;
5143                v >>= 4;
5144             }
5145             out[z++] = pal[v][0];
5146             out[z++] = pal[v][1];
5147             out[z++] = pal[v][2];
5148             if (target == 4) out[z++] = 255;
5149             if (i+1 == (int) s->img_x) break;
5150             v = (info.bpp == 8) ? stbi__get8(s) : v2;
5151             out[z++] = pal[v][0];
5152             out[z++] = pal[v][1];
5153             out[z++] = pal[v][2];
5154             if (target == 4) out[z++] = 255;
5155          }
5156          stbi__skip(s, pad);
5157       }
5158    } else {
5159       int rshift=0,gshift=0,bshift=0,ashift=0,rcount=0,gcount=0,bcount=0,acount=0;
5160       int z = 0;
5161       int easy=0;
5162       stbi__skip(s, info.offset - 14 - info.hsz);
5163       if (info.bpp == 24) width = 3 * s->img_x;
5164       else if (info.bpp == 16) width = 2*s->img_x;
5165       else /* bpp = 32 and pad = 0 */ width=0;
5166       pad = (-width) & 3;
5167       if (info.bpp == 24) {
5168          easy = 1;
5169       } else if (info.bpp == 32) {
5170          if (mb == 0xff && mg == 0xff00 && mr == 0x00ff0000 && ma == 0xff000000)
5171             easy = 2;
5172       }
5173       if (!easy) {
5174          if (!mr || !mg || !mb) { STBI_FREE(out); return stbi__errpuc("bad masks", "Corrupt BMP"); }
5175          // right shift amt to put high bit in position #7
5176          rshift = stbi__high_bit(mr)-7; rcount = stbi__bitcount(mr);
5177          gshift = stbi__high_bit(mg)-7; gcount = stbi__bitcount(mg);
5178          bshift = stbi__high_bit(mb)-7; bcount = stbi__bitcount(mb);
5179          ashift = stbi__high_bit(ma)-7; acount = stbi__bitcount(ma);
5180       }
5181       for (j=0; j < (int) s->img_y; ++j) {
5182          if (easy) {
5183             for (i=0; i < (int) s->img_x; ++i) {
5184                unsigned char a;
5185                out[z+2] = stbi__get8(s);
5186                out[z+1] = stbi__get8(s);
5187                out[z+0] = stbi__get8(s);
5188                z += 3;
5189                a = (easy == 2 ? stbi__get8(s) : 255);
5190                all_a |= a;
5191                if (target == 4) out[z++] = a;
5192             }
5193          } else {
5194             int bpp = info.bpp;
5195             for (i=0; i < (int) s->img_x; ++i) {
5196                stbi__uint32 v = (bpp == 16 ? (stbi__uint32) stbi__get16le(s) : stbi__get32le(s));
5197                int a;
5198                out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mr, rshift, rcount));
5199                out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mg, gshift, gcount));
5200                out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mb, bshift, bcount));
5201                a = (ma ? stbi__shiftsigned(v & ma, ashift, acount) : 255);
5202                all_a |= a;
5203                if (target == 4) out[z++] = STBI__BYTECAST(a);
5204             }
5205          }
5206          stbi__skip(s, pad);
5207       }
5208    }
5209 
5210    // if alpha channel is all 0s, replace with all 255s
5211    if (target == 4 && all_a == 0)
5212       for (i=4*s->img_x*s->img_y-1; i >= 0; i -= 4)
5213          out[i] = 255;
5214 
5215    if (flip_vertically) {
5216       stbi_uc t;
5217       for (j=0; j < (int) s->img_y>>1; ++j) {
5218          stbi_uc *p1 = out +      j     *s->img_x*target;
5219          stbi_uc *p2 = out + (s->img_y-1-j)*s->img_x*target;
5220          for (i=0; i < (int) s->img_x*target; ++i) {
5221             t = p1[i], p1[i] = p2[i], p2[i] = t;
5222          }
5223       }
5224    }
5225 
5226    if (req_comp && req_comp != target) {
5227       out = stbi__convert_format(out, target, req_comp, s->img_x, s->img_y);
5228       if (out == NULL) return out; // stbi__convert_format frees input on failure
5229    }
5230 
5231    *x = s->img_x;
5232    *y = s->img_y;
5233    if (comp) *comp = s->img_n;
5234    return out;
5235 }
5236 #endif
5237 
5238 // Targa Truevision - TGA
5239 // by Jonathan Dummer
5240 #ifndef STBI_NO_TGA
5241 // returns STBI_rgb or whatever, 0 on error
stbi__tga_get_comp(int bits_per_pixel,int is_grey,int * is_rgb16)5242 static int stbi__tga_get_comp(int bits_per_pixel, int is_grey, int* is_rgb16)
5243 {
5244    // only RGB or RGBA (incl. 16bit) or grey allowed
5245    if(is_rgb16) *is_rgb16 = 0;
5246    switch(bits_per_pixel) {
5247       case 8:  return STBI_grey;
5248       case 16: if(is_grey) return STBI_grey_alpha;
5249                FALLTHROUGH;
5250       case 15: if(is_rgb16) *is_rgb16 = 1;
5251             return STBI_rgb;
5252       case 24:
5253       case 32: return bits_per_pixel/8;
5254       default: return 0;
5255    }
5256 }
5257 
stbi__tga_info(stbi__context * s,int * x,int * y,int * comp)5258 static int stbi__tga_info(stbi__context *s, int *x, int *y, int *comp)
5259 {
5260     int tga_w, tga_h, tga_comp, tga_image_type, tga_bits_per_pixel, tga_colormap_bpp;
5261     int sz, tga_colormap_type;
5262     stbi__get8(s);                   // discard Offset
5263     tga_colormap_type = stbi__get8(s); // colormap type
5264     if( tga_colormap_type > 1 ) {
5265         stbi__rewind(s);
5266         return 0;      // only RGB or indexed allowed
5267     }
5268     tga_image_type = stbi__get8(s); // image type
5269     if ( tga_colormap_type == 1 ) { // colormapped (paletted) image
5270         if (tga_image_type != 1 && tga_image_type != 9) {
5271             stbi__rewind(s);
5272             return 0;
5273         }
5274         stbi__skip(s,4);       // skip index of first colormap entry and number of entries
5275         sz = stbi__get8(s);    //   check bits per palette color entry
5276         if ( (sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32) ) {
5277             stbi__rewind(s);
5278             return 0;
5279         }
5280         stbi__skip(s,4);       // skip image x and y origin
5281         tga_colormap_bpp = sz;
5282     } else { // "normal" image w/o colormap - only RGB or grey allowed, +/- RLE
5283         if ( (tga_image_type != 2) && (tga_image_type != 3) && (tga_image_type != 10) && (tga_image_type != 11) ) {
5284             stbi__rewind(s);
5285             return 0; // only RGB or grey allowed, +/- RLE
5286         }
5287         stbi__skip(s,9); // skip colormap specification and image x/y origin
5288         tga_colormap_bpp = 0;
5289     }
5290     tga_w = stbi__get16le(s);
5291     if( tga_w < 1 ) {
5292         stbi__rewind(s);
5293         return 0;   // test width
5294     }
5295     tga_h = stbi__get16le(s);
5296     if( tga_h < 1 ) {
5297         stbi__rewind(s);
5298         return 0;   // test height
5299     }
5300     tga_bits_per_pixel = stbi__get8(s); // bits per pixel
5301     stbi__get8(s); // ignore alpha bits
5302     if (tga_colormap_bpp != 0) {
5303         if((tga_bits_per_pixel != 8) && (tga_bits_per_pixel != 16)) {
5304             // when using a colormap, tga_bits_per_pixel is the size of the indexes
5305             // I don't think anything but 8 or 16bit indexes makes sense
5306             stbi__rewind(s);
5307             return 0;
5308         }
5309         tga_comp = stbi__tga_get_comp(tga_colormap_bpp, 0, NULL);
5310     } else {
5311         tga_comp = stbi__tga_get_comp(tga_bits_per_pixel, (tga_image_type == 3) || (tga_image_type == 11), NULL);
5312     }
5313     if(!tga_comp) {
5314       stbi__rewind(s);
5315       return 0;
5316     }
5317     if (x) *x = tga_w;
5318     if (y) *y = tga_h;
5319     if (comp) *comp = tga_comp;
5320     return 1;                   // seems to have passed everything
5321 }
5322 
stbi__tga_test(stbi__context * s)5323 static int stbi__tga_test(stbi__context *s)
5324 {
5325    int res = 0;
5326    int sz, tga_color_type;
5327    stbi__get8(s);      //   discard Offset
5328    tga_color_type = stbi__get8(s);   //   color type
5329    if ( tga_color_type > 1 ) goto errorEnd;   //   only RGB or indexed allowed
5330    sz = stbi__get8(s);   //   image type
5331    if ( tga_color_type == 1 ) { // colormapped (paletted) image
5332       if (sz != 1 && sz != 9) goto errorEnd; // colortype 1 demands image type 1 or 9
5333       stbi__skip(s,4);       // skip index of first colormap entry and number of entries
5334       sz = stbi__get8(s);    //   check bits per palette color entry
5335       if ( (sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32) ) goto errorEnd;
5336       stbi__skip(s,4);       // skip image x and y origin
5337    } else { // "normal" image w/o colormap
5338       if ( (sz != 2) && (sz != 3) && (sz != 10) && (sz != 11) ) goto errorEnd; // only RGB or grey allowed, +/- RLE
5339       stbi__skip(s,9); // skip colormap specification and image x/y origin
5340    }
5341    if ( stbi__get16le(s) < 1 ) goto errorEnd;      //   test width
5342    if ( stbi__get16le(s) < 1 ) goto errorEnd;      //   test height
5343    sz = stbi__get8(s);   //   bits per pixel
5344    if ( (tga_color_type == 1) && (sz != 8) && (sz != 16) ) goto errorEnd; // for colormapped images, bpp is size of an index
5345    if ( (sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32) ) goto errorEnd;
5346 
5347    res = 1; // if we got this far, everything's good and we can return 1 instead of 0
5348 
5349 errorEnd:
5350    stbi__rewind(s);
5351    return res;
5352 }
5353 
5354 // read 16bit value and convert to 24bit RGB
stbi__tga_read_rgb16(stbi__context * s,stbi_uc * out)5355 static void stbi__tga_read_rgb16(stbi__context *s, stbi_uc* out)
5356 {
5357    stbi__uint16 px = (stbi__uint16)stbi__get16le(s);
5358    stbi__uint16 fiveBitMask = 31;
5359    // we have 3 channels with 5bits each
5360    int r = (px >> 10) & fiveBitMask;
5361    int g = (px >> 5) & fiveBitMask;
5362    int b = px & fiveBitMask;
5363    // Note that this saves the data in RGB(A) order, so it doesn't need to be swapped later
5364    out[0] = (stbi_uc)((r * 255)/31);
5365    out[1] = (stbi_uc)((g * 255)/31);
5366    out[2] = (stbi_uc)((b * 255)/31);
5367 
5368    // some people claim that the most significant bit might be used for alpha
5369    // (possibly if an alpha-bit is set in the "image descriptor byte")
5370    // but that only made 16bit test images completely translucent..
5371    // so let's treat all 15 and 16bit TGAs as RGB with no alpha.
5372 }
5373 
stbi__tga_load(stbi__context * s,int * x,int * y,int * comp,int req_comp,stbi__result_info * ri)5374 static void *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
5375 {
5376    //   read in the TGA header stuff
5377    int tga_offset = stbi__get8(s);
5378    int tga_indexed = stbi__get8(s);
5379    int tga_image_type = stbi__get8(s);
5380    int tga_is_RLE = 0;
5381    int tga_palette_start = stbi__get16le(s);
5382    int tga_palette_len = stbi__get16le(s);
5383    int tga_palette_bits = stbi__get8(s);
5384    int tga_x_origin = stbi__get16le(s);
5385    int tga_y_origin = stbi__get16le(s);
5386    int tga_width = stbi__get16le(s);
5387    int tga_height = stbi__get16le(s);
5388    int tga_bits_per_pixel = stbi__get8(s);
5389    int tga_comp, tga_rgb16=0;
5390    int tga_inverted = stbi__get8(s);
5391    // int tga_alpha_bits = tga_inverted & 15; // the 4 lowest bits - unused (useless?)
5392    //   image data
5393    unsigned char *tga_data;
5394    unsigned char *tga_palette = NULL;
5395    int i, j;
5396    unsigned char raw_data[4] = {0};
5397    int RLE_count = 0;
5398    int RLE_repeating = 0;
5399    int read_next_pixel = 1;
5400    STBI_NOTUSED(ri);
5401 
5402    //   do a tiny bit of precessing
5403    if ( tga_image_type >= 8 )
5404    {
5405       tga_image_type -= 8;
5406       tga_is_RLE = 1;
5407    }
5408    tga_inverted = 1 - ((tga_inverted >> 5) & 1);
5409 
5410    //   If I'm paletted, then I'll use the number of bits from the palette
5411    if ( tga_indexed ) tga_comp = stbi__tga_get_comp(tga_palette_bits, 0, &tga_rgb16);
5412    else tga_comp = stbi__tga_get_comp(tga_bits_per_pixel, (tga_image_type == 3), &tga_rgb16);
5413 
5414    if(!tga_comp) // shouldn't really happen, stbi__tga_test() should have ensured basic consistency
5415       return stbi__errpuc("bad format", "Can't find out TGA pixelformat");
5416 
5417    //   tga info
5418    *x = tga_width;
5419    *y = tga_height;
5420    if (comp) *comp = tga_comp;
5421 
5422    if (!stbi__mad3sizes_valid(tga_width, tga_height, tga_comp, 0))
5423       return stbi__errpuc("too large", "Corrupt TGA");
5424 
5425    tga_data = (unsigned char*)stbi__malloc_mad3(tga_width, tga_height, tga_comp, 0);
5426    if (!tga_data) return stbi__errpuc("outofmem", "Out of memory");
5427 
5428    // skip to the data's starting position (offset usually = 0)
5429    stbi__skip(s, tga_offset );
5430 
5431    if ( !tga_indexed && !tga_is_RLE && !tga_rgb16 ) {
5432       for (i=0; i < tga_height; ++i) {
5433          int row = tga_inverted ? tga_height -i - 1 : i;
5434          stbi_uc *tga_row = tga_data + row*tga_width*tga_comp;
5435          stbi__getn(s, tga_row, tga_width * tga_comp);
5436       }
5437    } else  {
5438       //   do I need to load a palette?
5439       if ( tga_indexed)
5440       {
5441          //   any data to skip? (offset usually = 0)
5442          stbi__skip(s, tga_palette_start );
5443          //   load the palette
5444          tga_palette = (unsigned char*)stbi__malloc_mad2(tga_palette_len, tga_comp, 0);
5445          if (!tga_palette) {
5446             STBI_FREE(tga_data);
5447             return stbi__errpuc("outofmem", "Out of memory");
5448          }
5449          if (tga_rgb16) {
5450             stbi_uc *pal_entry = tga_palette;
5451             STBI_ASSERT(tga_comp == STBI_rgb);
5452             for (i=0; i < tga_palette_len; ++i) {
5453                stbi__tga_read_rgb16(s, pal_entry);
5454                pal_entry += tga_comp;
5455             }
5456          } else if (!stbi__getn(s, tga_palette, tga_palette_len * tga_comp)) {
5457                STBI_FREE(tga_data);
5458                STBI_FREE(tga_palette);
5459                return stbi__errpuc("bad palette", "Corrupt TGA");
5460          }
5461       }
5462       //   load the data
5463       for (i=0; i < tga_width * tga_height; ++i)
5464       {
5465          //   if I'm in RLE mode, do I need to get a RLE stbi__pngchunk?
5466          if ( tga_is_RLE )
5467          {
5468             if ( RLE_count == 0 )
5469             {
5470                //   yep, get the next byte as a RLE command
5471                int RLE_cmd = stbi__get8(s);
5472                RLE_count = 1 + (RLE_cmd & 127);
5473                RLE_repeating = RLE_cmd >> 7;
5474                read_next_pixel = 1;
5475             } else if ( !RLE_repeating )
5476             {
5477                read_next_pixel = 1;
5478             }
5479          } else
5480          {
5481             read_next_pixel = 1;
5482          }
5483          //   OK, if I need to read a pixel, do it now
5484          if ( read_next_pixel )
5485          {
5486             //   load however much data we did have
5487             if ( tga_indexed )
5488             {
5489                // read in index, then perform the lookup
5490                int pal_idx = (tga_bits_per_pixel == 8) ? stbi__get8(s) : stbi__get16le(s);
5491                if ( pal_idx >= tga_palette_len ) {
5492                   // invalid index
5493                   pal_idx = 0;
5494                }
5495                pal_idx *= tga_comp;
5496                for (j = 0; j < tga_comp; ++j) {
5497                   raw_data[j] = tga_palette[pal_idx+j];
5498                }
5499             } else if(tga_rgb16) {
5500                STBI_ASSERT(tga_comp == STBI_rgb);
5501                stbi__tga_read_rgb16(s, raw_data);
5502             } else {
5503                //   read in the data raw
5504                for (j = 0; j < tga_comp; ++j) {
5505                   raw_data[j] = stbi__get8(s);
5506                }
5507             }
5508             //   clear the reading flag for the next pixel
5509             read_next_pixel = 0;
5510          } // end of reading a pixel
5511 
5512          // copy data
5513          for (j = 0; j < tga_comp; ++j)
5514            tga_data[i*tga_comp+j] = raw_data[j];
5515 
5516          //   in case we're in RLE mode, keep counting down
5517          --RLE_count;
5518       }
5519       //   do I need to invert the image?
5520       if ( tga_inverted )
5521       {
5522          for (j = 0; j*2 < tga_height; ++j)
5523          {
5524             int index1 = j * tga_width * tga_comp;
5525             int index2 = (tga_height - 1 - j) * tga_width * tga_comp;
5526             for (i = tga_width * tga_comp; i > 0; --i)
5527             {
5528                unsigned char temp = tga_data[index1];
5529                tga_data[index1] = tga_data[index2];
5530                tga_data[index2] = temp;
5531                ++index1;
5532                ++index2;
5533             }
5534          }
5535       }
5536       //   clear my palette, if I had one
5537       if ( tga_palette != NULL )
5538       {
5539          STBI_FREE( tga_palette );
5540       }
5541    }
5542 
5543    // swap RGB - if the source data was RGB16, it already is in the right order
5544    if (tga_comp >= 3 && !tga_rgb16)
5545    {
5546       unsigned char* tga_pixel = tga_data;
5547       for (i=0; i < tga_width * tga_height; ++i)
5548       {
5549          unsigned char temp = tga_pixel[0];
5550          tga_pixel[0] = tga_pixel[2];
5551          tga_pixel[2] = temp;
5552          tga_pixel += tga_comp;
5553       }
5554    }
5555 
5556    // convert to target component count
5557    if (req_comp && req_comp != tga_comp)
5558       tga_data = stbi__convert_format(tga_data, tga_comp, req_comp, tga_width, tga_height);
5559 
5560    //   the things I do to get rid of an error message, and yet keep
5561    //   Microsoft's C compilers happy... [8^(
5562    tga_palette_start = tga_palette_len = tga_palette_bits =
5563          tga_x_origin = tga_y_origin = 0;
5564    //   OK, done
5565    return tga_data;
5566 }
5567 #endif
5568 
5569 // *************************************************************************************************
5570 // Photoshop PSD loader -- PD by Thatcher Ulrich, integration by Nicolas Schulz, tweaked by STB
5571 
5572 #ifndef STBI_NO_PSD
stbi__psd_test(stbi__context * s)5573 static int stbi__psd_test(stbi__context *s)
5574 {
5575    int r = (stbi__get32be(s) == 0x38425053);
5576    stbi__rewind(s);
5577    return r;
5578 }
5579 
stbi__psd_decode_rle(stbi__context * s,stbi_uc * p,int pixelCount)5580 static int stbi__psd_decode_rle(stbi__context *s, stbi_uc *p, int pixelCount)
5581 {
5582    int count, nleft, len;
5583 
5584    count = 0;
5585    while ((nleft = pixelCount - count) > 0) {
5586       len = stbi__get8(s);
5587       if (len == 128) {
5588          // No-op.
5589       } else if (len < 128) {
5590          // Copy next len+1 bytes literally.
5591          len++;
5592          if (len > nleft) return 0; // corrupt data
5593          count += len;
5594          while (len) {
5595             *p = stbi__get8(s);
5596             p += 4;
5597             len--;
5598          }
5599       } else if (len > 128) {
5600          stbi_uc   val;
5601          // Next -len+1 bytes in the dest are replicated from next source byte.
5602          // (Interpret len as a negative 8-bit int.)
5603          len = 257 - len;
5604          if (len > nleft) return 0; // corrupt data
5605          val = stbi__get8(s);
5606          count += len;
5607          while (len) {
5608             *p = val;
5609             p += 4;
5610             len--;
5611          }
5612       }
5613    }
5614 
5615    return 1;
5616 }
5617 
stbi__psd_load(stbi__context * s,int * x,int * y,int * comp,int req_comp,stbi__result_info * ri,int bpc)5618 static void *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc)
5619 {
5620    int pixelCount;
5621    int channelCount, compression;
5622    int channel, i;
5623    int bitdepth;
5624    int w,h;
5625    stbi_uc *out;
5626    STBI_NOTUSED(ri);
5627 
5628    // Check identifier
5629    if (stbi__get32be(s) != 0x38425053)   // "8BPS"
5630       return stbi__errpuc("not PSD", "Corrupt PSD image");
5631 
5632    // Check file type version.
5633    if (stbi__get16be(s) != 1)
5634       return stbi__errpuc("wrong version", "Unsupported version of PSD image");
5635 
5636    // Skip 6 reserved bytes.
5637    stbi__skip(s, 6 );
5638 
5639    // Read the number of channels (R, G, B, A, etc).
5640    channelCount = stbi__get16be(s);
5641    if (channelCount < 0 || channelCount > 16)
5642       return stbi__errpuc("wrong channel count", "Unsupported number of channels in PSD image");
5643 
5644    // Read the rows and columns of the image.
5645    h = stbi__get32be(s);
5646    w = stbi__get32be(s);
5647 
5648    // Make sure the depth is 8 bits.
5649    bitdepth = stbi__get16be(s);
5650    if (bitdepth != 8 && bitdepth != 16)
5651       return stbi__errpuc("unsupported bit depth", "PSD bit depth is not 8 or 16 bit");
5652 
5653    // Make sure the color mode is RGB.
5654    // Valid options are:
5655    //   0: Bitmap
5656    //   1: Grayscale
5657    //   2: Indexed color
5658    //   3: RGB color
5659    //   4: CMYK color
5660    //   7: Multichannel
5661    //   8: Duotone
5662    //   9: Lab color
5663    if (stbi__get16be(s) != 3)
5664       return stbi__errpuc("wrong color format", "PSD is not in RGB color format");
5665 
5666    // Skip the Mode Data.  (It's the palette for indexed color; other info for other modes.)
5667    stbi__skip(s,stbi__get32be(s) );
5668 
5669    // Skip the image resources.  (resolution, pen tool paths, etc)
5670    stbi__skip(s, stbi__get32be(s) );
5671 
5672    // Skip the reserved data.
5673    stbi__skip(s, stbi__get32be(s) );
5674 
5675    // Find out if the data is compressed.
5676    // Known values:
5677    //   0: no compression
5678    //   1: RLE compressed
5679    compression = stbi__get16be(s);
5680    if (compression > 1)
5681       return stbi__errpuc("bad compression", "PSD has an unknown compression format");
5682 
5683    // Check size
5684    if (!stbi__mad3sizes_valid(4, w, h, 0))
5685       return stbi__errpuc("too large", "Corrupt PSD");
5686 
5687    // Create the destination image.
5688 
5689    if (!compression && bitdepth == 16 && bpc == 16) {
5690       out = (stbi_uc *) stbi__malloc_mad3(8, w, h, 0);
5691       ri->bits_per_channel = 16;
5692    } else
5693       out = (stbi_uc *) stbi__malloc(4 * w*h);
5694 
5695    if (!out) return stbi__errpuc("outofmem", "Out of memory");
5696    pixelCount = w*h;
5697 
5698    // Initialize the data to zero.
5699    //memset( out, 0, pixelCount * 4 );
5700 
5701    // Finally, the image data.
5702    if (compression) {
5703       // RLE as used by .PSD and .TIFF
5704       // Loop until you get the number of unpacked bytes you are expecting:
5705       //     Read the next source byte into n.
5706       //     If n is between 0 and 127 inclusive, copy the next n+1 bytes literally.
5707       //     Else if n is between -127 and -1 inclusive, copy the next byte -n+1 times.
5708       //     Else if n is 128, noop.
5709       // Endloop
5710 
5711       // The RLE-compressed data is preceeded by a 2-byte data count for each row in the data,
5712       // which we're going to just skip.
5713       stbi__skip(s, h * channelCount * 2 );
5714 
5715       // Read the RLE data by channel.
5716       for (channel = 0; channel < 4; channel++) {
5717          stbi_uc *p;
5718 
5719          p = out+channel;
5720          if (channel >= channelCount) {
5721             // Fill this channel with default data.
5722             for (i = 0; i < pixelCount; i++, p += 4)
5723                *p = (channel == 3 ? 255 : 0);
5724          } else {
5725             // Read the RLE data.
5726             if (!stbi__psd_decode_rle(s, p, pixelCount)) {
5727                STBI_FREE(out);
5728                return stbi__errpuc("corrupt", "bad RLE data");
5729             }
5730          }
5731       }
5732 
5733    } else {
5734       // We're at the raw image data.  It's each channel in order (Red, Green, Blue, Alpha, ...)
5735       // where each channel consists of an 8-bit (or 16-bit) value for each pixel in the image.
5736 
5737       // Read the data by channel.
5738       for (channel = 0; channel < 4; channel++) {
5739          if (channel >= channelCount) {
5740             // Fill this channel with default data.
5741             if (bitdepth == 16 && bpc == 16) {
5742                stbi__uint16 *q = ((stbi__uint16 *) out) + channel;
5743                stbi__uint16 val = channel == 3 ? 65535 : 0;
5744                for (i = 0; i < pixelCount; i++, q += 4)
5745                   *q = val;
5746             } else {
5747                stbi_uc *p = out+channel;
5748                stbi_uc val = channel == 3 ? 255 : 0;
5749                for (i = 0; i < pixelCount; i++, p += 4)
5750                   *p = val;
5751             }
5752          } else {
5753             if (ri->bits_per_channel == 16) {    // output bpc
5754                stbi__uint16 *q = ((stbi__uint16 *) out) + channel;
5755                for (i = 0; i < pixelCount; i++, q += 4)
5756                   *q = (stbi__uint16) stbi__get16be(s);
5757             } else {
5758                stbi_uc *p = out+channel;
5759                if (bitdepth == 16) {  // input bpc
5760                   for (i = 0; i < pixelCount; i++, p += 4)
5761                      *p = (stbi_uc) (stbi__get16be(s) >> 8);
5762                } else {
5763                   for (i = 0; i < pixelCount; i++, p += 4)
5764                      *p = stbi__get8(s);
5765                }
5766             }
5767          }
5768       }
5769    }
5770 
5771    // remove weird white matte from PSD
5772    if (channelCount >= 4) {
5773       if (ri->bits_per_channel == 16) {
5774          for (i=0; i < w*h; ++i) {
5775             stbi__uint16 *pixel = (stbi__uint16 *) out + 4*i;
5776             if (pixel[3] != 0 && pixel[3] != 65535) {
5777                float a = pixel[3] / 65535.0f;
5778                float ra = 1.0f / a;
5779                float inv_a = 65535.0f * (1 - ra);
5780                pixel[0] = (stbi__uint16) (pixel[0]*ra + inv_a);
5781                pixel[1] = (stbi__uint16) (pixel[1]*ra + inv_a);
5782                pixel[2] = (stbi__uint16) (pixel[2]*ra + inv_a);
5783             }
5784          }
5785       } else {
5786          for (i=0; i < w*h; ++i) {
5787             unsigned char *pixel = out + 4*i;
5788             if (pixel[3] != 0 && pixel[3] != 255) {
5789                float a = pixel[3] / 255.0f;
5790                float ra = 1.0f / a;
5791                float inv_a = 255.0f * (1 - ra);
5792                pixel[0] = (unsigned char) (pixel[0]*ra + inv_a);
5793                pixel[1] = (unsigned char) (pixel[1]*ra + inv_a);
5794                pixel[2] = (unsigned char) (pixel[2]*ra + inv_a);
5795             }
5796          }
5797       }
5798    }
5799 
5800    // convert to desired output format
5801    if (req_comp && req_comp != 4) {
5802       if (ri->bits_per_channel == 16)
5803          out = (stbi_uc *) stbi__convert_format16((stbi__uint16 *) out, 4, req_comp, w, h);
5804       else
5805          out = stbi__convert_format(out, 4, req_comp, w, h);
5806       if (out == NULL) return out; // stbi__convert_format frees input on failure
5807    }
5808 
5809    if (comp) *comp = 4;
5810    *y = h;
5811    *x = w;
5812 
5813    return out;
5814 }
5815 #endif
5816 
5817 // *************************************************************************************************
5818 // Softimage PIC loader
5819 // by Tom Seddon
5820 //
5821 // See http://softimage.wiki.softimage.com/index.php/INFO:_PIC_file_format
5822 // See http://ozviz.wasp.uwa.edu.au/~pbourke/dataformats/softimagepic/
5823 
5824 #ifndef STBI_NO_PIC
stbi__pic_is4(stbi__context * s,const char * str)5825 static int stbi__pic_is4(stbi__context *s,const char *str)
5826 {
5827    int i;
5828    for (i=0; i<4; ++i)
5829       if (stbi__get8(s) != (stbi_uc)str[i])
5830          return 0;
5831 
5832    return 1;
5833 }
5834 
stbi__pic_test_core(stbi__context * s)5835 static int stbi__pic_test_core(stbi__context *s)
5836 {
5837    int i;
5838 
5839    if (!stbi__pic_is4(s,"\x53\x80\xF6\x34"))
5840       return 0;
5841 
5842    for(i=0;i<84;++i)
5843       stbi__get8(s);
5844 
5845    if (!stbi__pic_is4(s,"PICT"))
5846       return 0;
5847 
5848    return 1;
5849 }
5850 
5851 typedef struct
5852 {
5853    stbi_uc size,type,channel;
5854 } stbi__pic_packet;
5855 
stbi__readval(stbi__context * s,int channel,stbi_uc * dest)5856 static stbi_uc *stbi__readval(stbi__context *s, int channel, stbi_uc *dest)
5857 {
5858    int mask=0x80, i;
5859 
5860    for (i=0; i<4; ++i, mask>>=1) {
5861       if (channel & mask) {
5862          if (stbi__at_eof(s)) return stbi__errpuc("bad file","PIC file too short");
5863          dest[i]=stbi__get8(s);
5864       }
5865    }
5866 
5867    return dest;
5868 }
5869 
stbi__copyval(int channel,stbi_uc * dest,const stbi_uc * src)5870 static void stbi__copyval(int channel,stbi_uc *dest,const stbi_uc *src)
5871 {
5872    int mask=0x80,i;
5873 
5874    for (i=0;i<4; ++i, mask>>=1)
5875       if (channel&mask)
5876          dest[i]=src[i];
5877 }
5878 
stbi__pic_load_core(stbi__context * s,int width,int height,int * comp,stbi_uc * result)5879 static stbi_uc *stbi__pic_load_core(stbi__context *s,int width,int height,int *comp, stbi_uc *result)
5880 {
5881    int act_comp=0,num_packets=0,y,chained;
5882    stbi__pic_packet packets[10];
5883 
5884    // this will (should...) cater for even some bizarre stuff like having data
5885     // for the same channel in multiple packets.
5886    do {
5887       stbi__pic_packet *packet;
5888 
5889       if (num_packets==sizeof(packets)/sizeof(packets[0]))
5890          return stbi__errpuc("bad format","too many packets");
5891 
5892       packet = &packets[num_packets++];
5893 
5894       chained = stbi__get8(s);
5895       packet->size    = stbi__get8(s);
5896       packet->type    = stbi__get8(s);
5897       packet->channel = stbi__get8(s);
5898 
5899       act_comp |= packet->channel;
5900 
5901       if (stbi__at_eof(s))          return stbi__errpuc("bad file","file too short (reading packets)");
5902       if (packet->size != 8)  return stbi__errpuc("bad format","packet isn't 8bpp");
5903    } while (chained);
5904 
5905    *comp = (act_comp & 0x10 ? 4 : 3); // has alpha channel?
5906 
5907    for(y=0; y<height; ++y) {
5908       int packet_idx;
5909 
5910       for(packet_idx=0; packet_idx < num_packets; ++packet_idx) {
5911          stbi__pic_packet *packet = &packets[packet_idx];
5912          stbi_uc *dest = result+y*width*4;
5913 
5914          switch (packet->type) {
5915             default:
5916                return stbi__errpuc("bad format","packet has bad compression type");
5917 
5918             case 0: {//uncompressed
5919                int x;
5920 
5921                for(x=0;x<width;++x, dest+=4)
5922                   if (!stbi__readval(s,packet->channel,dest))
5923                      return 0;
5924                break;
5925             }
5926 
5927             case 1://Pure RLE
5928                {
5929                   int left=width, i;
5930 
5931                   while (left>0) {
5932                      stbi_uc count,value[4];
5933 
5934                      count=stbi__get8(s);
5935                      if (stbi__at_eof(s))   return stbi__errpuc("bad file","file too short (pure read count)");
5936 
5937                      if (count > left)
5938                         count = (stbi_uc) left;
5939 
5940                      if (!stbi__readval(s,packet->channel,value))  return 0;
5941 
5942                      for(i=0; i<count; ++i,dest+=4)
5943                         stbi__copyval(packet->channel,dest,value);
5944                      left -= count;
5945                   }
5946                }
5947                break;
5948 
5949             case 2: {//Mixed RLE
5950                int left=width;
5951                while (left>0) {
5952                   int count = stbi__get8(s), i;
5953                   if (stbi__at_eof(s))  return stbi__errpuc("bad file","file too short (mixed read count)");
5954 
5955                   if (count >= 128) { // Repeated
5956                      stbi_uc value[4];
5957 
5958                      if (count==128)
5959                         count = stbi__get16be(s);
5960                      else
5961                         count -= 127;
5962                      if (count > left)
5963                         return stbi__errpuc("bad file","scanline overrun");
5964 
5965                      if (!stbi__readval(s,packet->channel,value))
5966                         return 0;
5967 
5968                      for(i=0;i<count;++i, dest += 4)
5969                         stbi__copyval(packet->channel,dest,value);
5970                   } else { // Raw
5971                      ++count;
5972                      if (count>left) return stbi__errpuc("bad file","scanline overrun");
5973 
5974                      for(i=0;i<count;++i, dest+=4)
5975                         if (!stbi__readval(s,packet->channel,dest))
5976                            return 0;
5977                   }
5978                   left-=count;
5979                }
5980                break;
5981             }
5982          }
5983       }
5984    }
5985 
5986    return result;
5987 }
5988 
stbi__pic_load(stbi__context * s,int * px,int * py,int * comp,int req_comp,stbi__result_info * ri)5989 static void *stbi__pic_load(stbi__context *s,int *px,int *py,int *comp,int req_comp, stbi__result_info *ri)
5990 {
5991    stbi_uc *result;
5992    int i, x,y, internal_comp;
5993    STBI_NOTUSED(ri);
5994 
5995    if (!comp) comp = &internal_comp;
5996 
5997    for (i=0; i<92; ++i)
5998       stbi__get8(s);
5999 
6000    x = stbi__get16be(s);
6001    y = stbi__get16be(s);
6002    if (stbi__at_eof(s))  return stbi__errpuc("bad file","file too short (pic header)");
6003    if (!stbi__mad3sizes_valid(x, y, 4, 0)) return stbi__errpuc("too large", "PIC image too large to decode");
6004 
6005    stbi__get32be(s); //skip `ratio'
6006    stbi__get16be(s); //skip `fields'
6007    stbi__get16be(s); //skip `pad'
6008 
6009    // intermediate buffer is RGBA
6010    result = (stbi_uc *) stbi__malloc_mad3(x, y, 4, 0);
6011    memset(result, 0xff, x*y*4);
6012 
6013    if (!stbi__pic_load_core(s,x,y,comp, result)) {
6014       STBI_FREE(result);
6015       result=0;
6016    }
6017    *px = x;
6018    *py = y;
6019    if (req_comp == 0) req_comp = *comp;
6020    result=stbi__convert_format(result,4,req_comp,x,y);
6021 
6022    return result;
6023 }
6024 
stbi__pic_test(stbi__context * s)6025 static int stbi__pic_test(stbi__context *s)
6026 {
6027    int r = stbi__pic_test_core(s);
6028    stbi__rewind(s);
6029    return r;
6030 }
6031 #endif
6032 
6033 // *************************************************************************************************
6034 // GIF loader -- public domain by Jean-Marc Lienher -- simplified/shrunk by stb
6035 
6036 #ifndef STBI_NO_GIF
6037 typedef struct
6038 {
6039    stbi__int16 prefix;
6040    stbi_uc first;
6041    stbi_uc suffix;
6042 } stbi__gif_lzw;
6043 
6044 typedef struct
6045 {
6046    int w,h;
6047    stbi_uc *out, *old_out;             // output buffer (always 4 components)
6048    int flags, bgindex, ratio, transparent, eflags, delay;
6049    stbi_uc  pal[256][4];
6050    stbi_uc lpal[256][4];
6051    stbi__gif_lzw codes[4096];
6052    stbi_uc *color_table;
6053    int parse, step;
6054    int lflags;
6055    int start_x, start_y;
6056    int max_x, max_y;
6057    int cur_x, cur_y;
6058    int line_size;
6059 } stbi__gif;
6060 
stbi__gif_test_raw(stbi__context * s)6061 static int stbi__gif_test_raw(stbi__context *s)
6062 {
6063    int sz;
6064    if (stbi__get8(s) != 'G' || stbi__get8(s) != 'I' || stbi__get8(s) != 'F' || stbi__get8(s) != '8') return 0;
6065    sz = stbi__get8(s);
6066    if (sz != '9' && sz != '7') return 0;
6067    if (stbi__get8(s) != 'a') return 0;
6068    return 1;
6069 }
6070 
stbi__gif_test(stbi__context * s)6071 static int stbi__gif_test(stbi__context *s)
6072 {
6073    int r = stbi__gif_test_raw(s);
6074    stbi__rewind(s);
6075    return r;
6076 }
6077 
stbi__gif_parse_colortable(stbi__context * s,stbi_uc pal[256][4],int num_entries,int transp)6078 static void stbi__gif_parse_colortable(stbi__context *s, stbi_uc pal[256][4], int num_entries, int transp)
6079 {
6080    int i;
6081    for (i=0; i < num_entries; ++i) {
6082       pal[i][2] = stbi__get8(s);
6083       pal[i][1] = stbi__get8(s);
6084       pal[i][0] = stbi__get8(s);
6085       pal[i][3] = transp == i ? 0 : 255;
6086    }
6087 }
6088 
stbi__gif_header(stbi__context * s,stbi__gif * g,int * comp,int is_info)6089 static int stbi__gif_header(stbi__context *s, stbi__gif *g, int *comp, int is_info)
6090 {
6091    stbi_uc version;
6092    if (stbi__get8(s) != 'G' || stbi__get8(s) != 'I' || stbi__get8(s) != 'F' || stbi__get8(s) != '8')
6093       return stbi__err("not GIF", "Corrupt GIF");
6094 
6095    version = stbi__get8(s);
6096    if (version != '7' && version != '9')    return stbi__err("not GIF", "Corrupt GIF");
6097    if (stbi__get8(s) != 'a')                return stbi__err("not GIF", "Corrupt GIF");
6098 
6099    stbi__g_failure_reason = "";
6100    g->w = stbi__get16le(s);
6101    g->h = stbi__get16le(s);
6102    g->flags = stbi__get8(s);
6103    g->bgindex = stbi__get8(s);
6104    g->ratio = stbi__get8(s);
6105    g->transparent = -1;
6106 
6107    if (comp != 0) *comp = 4;  // can't actually tell whether it's 3 or 4 until we parse the comments
6108 
6109    if (is_info) return 1;
6110 
6111    if (g->flags & 0x80)
6112       stbi__gif_parse_colortable(s,g->pal, 2 << (g->flags & 7), -1);
6113 
6114    return 1;
6115 }
6116 
stbi__gif_info_raw(stbi__context * s,int * x,int * y,int * comp)6117 static int stbi__gif_info_raw(stbi__context *s, int *x, int *y, int *comp)
6118 {
6119    stbi__gif* g = (stbi__gif*) stbi__malloc(sizeof(stbi__gif));
6120    if (!stbi__gif_header(s, g, comp, 1)) {
6121       STBI_FREE(g);
6122       stbi__rewind( s );
6123       return 0;
6124    }
6125    if (x) *x = g->w;
6126    if (y) *y = g->h;
6127    STBI_FREE(g);
6128    return 1;
6129 }
6130 
stbi__out_gif_code(stbi__gif * g,stbi__uint16 code)6131 static void stbi__out_gif_code(stbi__gif *g, stbi__uint16 code)
6132 {
6133    stbi_uc *p, *c;
6134 
6135    // recurse to decode the prefixes, since the linked-list is backwards,
6136    // and working backwards through an interleaved image would be nasty
6137    if (g->codes[code].prefix >= 0)
6138       stbi__out_gif_code(g, g->codes[code].prefix);
6139 
6140    if (g->cur_y >= g->max_y) return;
6141 
6142    p = &g->out[g->cur_x + g->cur_y];
6143    c = &g->color_table[g->codes[code].suffix * 4];
6144 
6145    if (c[3] >= 128) {
6146       p[0] = c[2];
6147       p[1] = c[1];
6148       p[2] = c[0];
6149       p[3] = c[3];
6150    }
6151    g->cur_x += 4;
6152 
6153    if (g->cur_x >= g->max_x) {
6154       g->cur_x = g->start_x;
6155       g->cur_y += g->step;
6156 
6157       while (g->cur_y >= g->max_y && g->parse > 0) {
6158          g->step = (1 << g->parse) * g->line_size;
6159          g->cur_y = g->start_y + (g->step >> 1);
6160          --g->parse;
6161       }
6162    }
6163 }
6164 
stbi__process_gif_raster(stbi__context * s,stbi__gif * g)6165 static stbi_uc *stbi__process_gif_raster(stbi__context *s, stbi__gif *g)
6166 {
6167    stbi_uc lzw_cs;
6168    stbi__int32 len, init_code;
6169    stbi__uint32 first;
6170    stbi__int32 codesize, codemask, avail, oldcode, bits, valid_bits, clear;
6171    stbi__gif_lzw *p;
6172 
6173    lzw_cs = stbi__get8(s);
6174    if (lzw_cs > 12) return NULL;
6175    clear = 1 << lzw_cs;
6176    first = 1;
6177    codesize = lzw_cs + 1;
6178    codemask = (1 << codesize) - 1;
6179    bits = 0;
6180    valid_bits = 0;
6181    for (init_code = 0; init_code < clear; init_code++) {
6182       g->codes[init_code].prefix = -1;
6183       g->codes[init_code].first = (stbi_uc) init_code;
6184       g->codes[init_code].suffix = (stbi_uc) init_code;
6185    }
6186 
6187    // support no starting clear code
6188    avail = clear+2;
6189    oldcode = -1;
6190 
6191    len = 0;
6192    for(;;) {
6193       if (valid_bits < codesize) {
6194          if (len == 0) {
6195             len = stbi__get8(s); // start new block
6196             if (len == 0)
6197                return g->out;
6198          }
6199          --len;
6200          bits |= (stbi__int32) stbi__get8(s) << valid_bits;
6201          valid_bits += 8;
6202       } else {
6203          stbi__int32 code = bits & codemask;
6204          bits >>= codesize;
6205          valid_bits -= codesize;
6206          // @OPTIMIZE: is there some way we can accelerate the non-clear path?
6207          if (code == clear) {  // clear code
6208             codesize = lzw_cs + 1;
6209             codemask = (1 << codesize) - 1;
6210             avail = clear + 2;
6211             oldcode = -1;
6212             first = 0;
6213          } else if (code == clear + 1) { // end of stream code
6214             stbi__skip(s, len);
6215             while ((len = stbi__get8(s)) > 0)
6216                stbi__skip(s,len);
6217             return g->out;
6218          } else if (code <= avail) {
6219             if (first) return stbi__errpuc("no clear code", "Corrupt GIF");
6220 
6221             if (oldcode >= 0) {
6222                p = &g->codes[avail++];
6223                if (avail > 4096)        return stbi__errpuc("too many codes", "Corrupt GIF");
6224                p->prefix = (stbi__int16) oldcode;
6225                p->first = g->codes[oldcode].first;
6226                p->suffix = (code == avail) ? p->first : g->codes[code].first;
6227             } else if (code == avail)
6228                return stbi__errpuc("illegal code in raster", "Corrupt GIF");
6229 
6230             stbi__out_gif_code(g, (stbi__uint16) code);
6231 
6232             if ((avail & codemask) == 0 && avail <= 0x0FFF) {
6233                codesize++;
6234                codemask = (1 << codesize) - 1;
6235             }
6236 
6237             oldcode = code;
6238          } else {
6239             return stbi__errpuc("illegal code in raster", "Corrupt GIF");
6240          }
6241       }
6242    }
6243 }
6244 
stbi__fill_gif_background(stbi__gif * g,int x0,int y0,int x1,int y1)6245 static void stbi__fill_gif_background(stbi__gif *g, int x0, int y0, int x1, int y1)
6246 {
6247    int x, y;
6248    stbi_uc *c = g->pal[g->bgindex];
6249    for (y = y0; y < y1; y += 4 * g->w) {
6250       for (x = x0; x < x1; x += 4) {
6251          stbi_uc *p  = &g->out[y + x];
6252          p[0] = c[2];
6253          p[1] = c[1];
6254          p[2] = c[0];
6255          p[3] = 0;
6256       }
6257    }
6258 }
6259 
6260 // this function is designed to support animated gifs, although stb_image doesn't support it
stbi__gif_load_next(stbi__context * s,stbi__gif * g,int * comp,int req_comp)6261 static stbi_uc *stbi__gif_load_next(stbi__context *s, stbi__gif *g, int *comp, int req_comp)
6262 {
6263    int i;
6264    stbi_uc *prev_out = 0;
6265 
6266    if (g->out == 0 && !stbi__gif_header(s, g, comp,0))
6267       return 0; // stbi__g_failure_reason set by stbi__gif_header
6268 
6269    if (!stbi__mad3sizes_valid(g->w, g->h, 4, 0))
6270       return stbi__errpuc("too large", "GIF too large");
6271 
6272    prev_out = g->out;
6273    g->out = (stbi_uc *) stbi__malloc_mad3(4, g->w, g->h, 0);
6274    if (g->out == 0) return stbi__errpuc("outofmem", "Out of memory");
6275 
6276    switch ((g->eflags & 0x1C) >> 2) {
6277       case 0: // unspecified (also always used on 1st frame)
6278          stbi__fill_gif_background(g, 0, 0, 4 * g->w, 4 * g->w * g->h);
6279          break;
6280       case 1: // do not dispose
6281          if (prev_out) memcpy(g->out, prev_out, 4 * g->w * g->h);
6282          g->old_out = prev_out;
6283          break;
6284       case 2: // dispose to background
6285          if (prev_out) memcpy(g->out, prev_out, 4 * g->w * g->h);
6286          stbi__fill_gif_background(g, g->start_x, g->start_y, g->max_x, g->max_y);
6287          break;
6288       case 3: // dispose to previous
6289          if (g->old_out) {
6290             for (i = g->start_y; i < g->max_y; i += 4 * g->w)
6291                memcpy(&g->out[i + g->start_x], &g->old_out[i + g->start_x], g->max_x - g->start_x);
6292          }
6293          break;
6294    }
6295 
6296    for (;;) {
6297       switch (stbi__get8(s)) {
6298          case 0x2C: /* Image Descriptor */
6299          {
6300             int prev_trans = -1;
6301             stbi__int32 x, y, w, h;
6302             stbi_uc *o;
6303 
6304             x = stbi__get16le(s);
6305             y = stbi__get16le(s);
6306             w = stbi__get16le(s);
6307             h = stbi__get16le(s);
6308             if (((x + w) > (g->w)) || ((y + h) > (g->h)))
6309                return stbi__errpuc("bad Image Descriptor", "Corrupt GIF");
6310 
6311             g->line_size = g->w * 4;
6312             g->start_x = x * 4;
6313             g->start_y = y * g->line_size;
6314             g->max_x   = g->start_x + w * 4;
6315             g->max_y   = g->start_y + h * g->line_size;
6316             g->cur_x   = g->start_x;
6317             g->cur_y   = g->start_y;
6318 
6319             g->lflags = stbi__get8(s);
6320 
6321             if (g->lflags & 0x40) {
6322                g->step = 8 * g->line_size; // first interlaced spacing
6323                g->parse = 3;
6324             } else {
6325                g->step = g->line_size;
6326                g->parse = 0;
6327             }
6328 
6329             if (g->lflags & 0x80) {
6330                stbi__gif_parse_colortable(s,g->lpal, 2 << (g->lflags & 7), g->eflags & 0x01 ? g->transparent : -1);
6331                g->color_table = (stbi_uc *) g->lpal;
6332             } else if (g->flags & 0x80) {
6333                if (g->transparent >= 0 && (g->eflags & 0x01)) {
6334                   prev_trans = g->pal[g->transparent][3];
6335                   g->pal[g->transparent][3] = 0;
6336                }
6337                g->color_table = (stbi_uc *) g->pal;
6338             } else
6339                return stbi__errpuc("missing color table", "Corrupt GIF");
6340 
6341             o = stbi__process_gif_raster(s, g);
6342             if (o == NULL) return NULL;
6343 
6344             if (prev_trans != -1)
6345                g->pal[g->transparent][3] = (stbi_uc) prev_trans;
6346 
6347             return o;
6348          }
6349 
6350          case 0x21: // Comment Extension.
6351          {
6352             int len;
6353             if (stbi__get8(s) == 0xF9) { // Graphic Control Extension.
6354                len = stbi__get8(s);
6355                if (len == 4) {
6356                   g->eflags = stbi__get8(s);
6357                   g->delay = stbi__get16le(s);
6358                   g->transparent = stbi__get8(s);
6359                } else {
6360                   stbi__skip(s, len);
6361                   break;
6362                }
6363             }
6364             while ((len = stbi__get8(s)) != 0)
6365                stbi__skip(s, len);
6366             break;
6367          }
6368 
6369          case 0x3B: // gif stream termination code
6370             return (stbi_uc *) s; // using '1' causes warning on some compilers
6371 
6372          default:
6373             return stbi__errpuc("unknown code", "Corrupt GIF");
6374       }
6375    }
6376 
6377    STBI_NOTUSED(req_comp);
6378 }
6379 
stbi__gif_load(stbi__context * s,int * x,int * y,int * comp,int req_comp,stbi__result_info * ri)6380 static void *stbi__gif_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
6381 {
6382    stbi_uc *u = 0;
6383    stbi__gif* g = (stbi__gif*) stbi__malloc(sizeof(stbi__gif));
6384    memset(g, 0, sizeof(*g));
6385    STBI_NOTUSED(ri);
6386 
6387    u = stbi__gif_load_next(s, g, comp, req_comp);
6388    if (u == (stbi_uc *) s) u = 0;  // end of animated gif marker
6389    if (u) {
6390       *x = g->w;
6391       *y = g->h;
6392       if (req_comp && req_comp != 4)
6393          u = stbi__convert_format(u, 4, req_comp, g->w, g->h);
6394    }
6395    else if (g->out)
6396       STBI_FREE(g->out);
6397    STBI_FREE(g);
6398    return u;
6399 }
6400 
stbi__gif_info(stbi__context * s,int * x,int * y,int * comp)6401 static int stbi__gif_info(stbi__context *s, int *x, int *y, int *comp)
6402 {
6403    return stbi__gif_info_raw(s,x,y,comp);
6404 }
6405 #endif
6406 
6407 // *************************************************************************************************
6408 // Radiance RGBE HDR loader
6409 // originally by Nicolas Schulz
6410 #ifndef STBI_NO_HDR
stbi__hdr_test_core(stbi__context * s,const char * signature)6411 static int stbi__hdr_test_core(stbi__context *s, const char *signature)
6412 {
6413    int i;
6414    for (i=0; signature[i]; ++i)
6415       if (stbi__get8(s) != signature[i])
6416           return 0;
6417    stbi__rewind(s);
6418    return 1;
6419 }
6420 
stbi__hdr_test(stbi__context * s)6421 static int stbi__hdr_test(stbi__context* s)
6422 {
6423    int r = stbi__hdr_test_core(s, "#?RADIANCE\n");
6424    stbi__rewind(s);
6425    if(!r) {
6426        r = stbi__hdr_test_core(s, "#?RGBE\n");
6427        stbi__rewind(s);
6428    }
6429    return r;
6430 }
6431 
6432 #define STBI__HDR_BUFLEN  1024
stbi__hdr_gettoken(stbi__context * z,char * buffer)6433 static char *stbi__hdr_gettoken(stbi__context *z, char *buffer)
6434 {
6435    int len=0;
6436    char c = '\0';
6437 
6438    c = (char) stbi__get8(z);
6439 
6440    while (!stbi__at_eof(z) && c != '\n') {
6441       buffer[len++] = c;
6442       if (len == STBI__HDR_BUFLEN-1) {
6443          // flush to end of line
6444          while (!stbi__at_eof(z) && stbi__get8(z) != '\n')
6445             ;
6446          break;
6447       }
6448       c = (char) stbi__get8(z);
6449    }
6450 
6451    buffer[len] = 0;
6452    return buffer;
6453 }
6454 
stbi__hdr_convert(float * output,stbi_uc * input,int req_comp)6455 static void stbi__hdr_convert(float *output, stbi_uc *input, int req_comp)
6456 {
6457    if ( input[3] != 0 ) {
6458       float f1;
6459       // Exponent
6460       f1 = (float) ldexp(1.0f, input[3] - (int)(128 + 8));
6461       if (req_comp <= 2)
6462          output[0] = (input[0] + input[1] + input[2]) * f1 / 3;
6463       else {
6464          output[0] = input[0] * f1;
6465          output[1] = input[1] * f1;
6466          output[2] = input[2] * f1;
6467       }
6468       if (req_comp == 2) output[1] = 1;
6469       if (req_comp == 4) output[3] = 1;
6470    } else {
6471       switch (req_comp) {
6472          case 4: output[3] = 1;
6473                  FALLTHROUGH;
6474          case 3: output[0] = output[1] = output[2] = 0;
6475                  break;
6476          case 2: output[1] = 1;
6477                  FALLTHROUGH;
6478          case 1: output[0] = 0;
6479                  break;
6480       }
6481    }
6482 }
6483 
stbi__hdr_load(stbi__context * s,int * x,int * y,int * comp,int req_comp,stbi__result_info * ri)6484 static float *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
6485 {
6486    char buffer[STBI__HDR_BUFLEN];
6487    char *token;
6488    int valid = 0;
6489    int width, height;
6490    stbi_uc *scanline;
6491    float *hdr_data;
6492    int len;
6493    unsigned char count, value;
6494    int i, j, k, c1,c2, z;
6495    const char *headerToken;
6496    STBI_NOTUSED(ri);
6497 
6498    // Check identifier
6499    headerToken = stbi__hdr_gettoken(s,buffer);
6500    if (strcmp(headerToken, "#?RADIANCE") != 0 && strcmp(headerToken, "#?RGBE") != 0)
6501       return stbi__errpf("not HDR", "Corrupt HDR image");
6502 
6503    // Parse header
6504    for(;;) {
6505       token = stbi__hdr_gettoken(s,buffer);
6506       if (token[0] == 0) break;
6507       if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0) valid = 1;
6508    }
6509 
6510    if (!valid)    return stbi__errpf("unsupported format", "Unsupported HDR format");
6511 
6512    // Parse width and height
6513    // can't use sscanf() if we're not using stdio!
6514    token = stbi__hdr_gettoken(s,buffer);
6515    if (strncmp(token, "-Y ", 3) != 0)  return stbi__errpf("unsupported data layout", "Unsupported HDR format");
6516    token += 3;
6517    height = (int) strtol(token, &token, 10);
6518    while (*token == ' ') ++token;
6519    if (strncmp(token, "+X ", 3) != 0)  return stbi__errpf("unsupported data layout", "Unsupported HDR format");
6520    token += 3;
6521    width = (int) strtol(token, NULL, 10);
6522 
6523    *x = width;
6524    *y = height;
6525 
6526    if (comp) *comp = 3;
6527    if (req_comp == 0) req_comp = 3;
6528 
6529    if (!stbi__mad4sizes_valid(width, height, req_comp, sizeof(float), 0))
6530       return stbi__errpf("too large", "HDR image is too large");
6531 
6532    // Read data
6533    hdr_data = (float *) stbi__malloc_mad4(width, height, req_comp, sizeof(float), 0);
6534    if (!hdr_data)
6535       return stbi__errpf("outofmem", "Out of memory");
6536 
6537    // Load image data
6538    // image data is stored as some number of sca
6539    if ( width < 8 || width >= 32768) {
6540       // Read flat data
6541       for (j=0; j < height; ++j) {
6542          for (i=0; i < width; ++i) {
6543             stbi_uc rgbe[4];
6544            main_decode_loop:
6545             stbi__getn(s, rgbe, 4);
6546             stbi__hdr_convert(hdr_data + j * width * req_comp + i * req_comp, rgbe, req_comp);
6547          }
6548       }
6549    } else {
6550       // Read RLE-encoded data
6551       scanline = NULL;
6552 
6553       for (j = 0; j < height; ++j) {
6554          c1 = stbi__get8(s);
6555          c2 = stbi__get8(s);
6556          len = stbi__get8(s);
6557          if (c1 != 2 || c2 != 2 || (len & 0x80)) {
6558             // not run-length encoded, so we have to actually use THIS data as a decoded
6559             // pixel (note this can't be a valid pixel--one of RGB must be >= 128)
6560             stbi_uc rgbe[4];
6561             rgbe[0] = (stbi_uc) c1;
6562             rgbe[1] = (stbi_uc) c2;
6563             rgbe[2] = (stbi_uc) len;
6564             rgbe[3] = (stbi_uc) stbi__get8(s);
6565             stbi__hdr_convert(hdr_data, rgbe, req_comp);
6566             i = 1;
6567             j = 0;
6568             STBI_FREE(scanline);
6569             goto main_decode_loop; // yes, this makes no sense
6570          }
6571          len <<= 8;
6572          len |= stbi__get8(s);
6573          if (len != width) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("invalid decoded scanline length", "corrupt HDR"); }
6574          if (scanline == NULL) {
6575             scanline = (stbi_uc *) stbi__malloc_mad2(width, 4, 0);
6576             if (!scanline) {
6577                STBI_FREE(hdr_data);
6578                return stbi__errpf("outofmem", "Out of memory");
6579             }
6580          }
6581 
6582          for (k = 0; k < 4; ++k) {
6583             int nleft;
6584             i = 0;
6585             while ((nleft = width - i) > 0) {
6586                count = stbi__get8(s);
6587                if (count > 128) {
6588                   // Run
6589                   value = stbi__get8(s);
6590                   count -= 128;
6591                   if (count > nleft) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("corrupt", "bad RLE data in HDR"); }
6592                   for (z = 0; z < count; ++z)
6593                      scanline[i++ * 4 + k] = value;
6594                } else {
6595                   // Dump
6596                   if (count > nleft) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("corrupt", "bad RLE data in HDR"); }
6597                   for (z = 0; z < count; ++z)
6598                      scanline[i++ * 4 + k] = stbi__get8(s);
6599                }
6600             }
6601          }
6602          for (i=0; i < width; ++i)
6603             stbi__hdr_convert(hdr_data+(j*width + i)*req_comp, scanline + i*4, req_comp);
6604       }
6605       if (scanline)
6606          STBI_FREE(scanline);
6607    }
6608 
6609    return hdr_data;
6610 }
6611 
stbi__hdr_info(stbi__context * s,int * x,int * y,int * comp)6612 static int stbi__hdr_info(stbi__context *s, int *x, int *y, int *comp)
6613 {
6614    char buffer[STBI__HDR_BUFLEN];
6615    char *token;
6616    int valid = 0;
6617    int dummy;
6618 
6619    if (!x) x = &dummy;
6620    if (!y) y = &dummy;
6621    if (!comp) comp = &dummy;
6622 
6623    if (stbi__hdr_test(s) == 0) {
6624        stbi__rewind( s );
6625        return 0;
6626    }
6627 
6628    for(;;) {
6629       token = stbi__hdr_gettoken(s,buffer);
6630       if (token[0] == 0) break;
6631       if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0) valid = 1;
6632    }
6633 
6634    if (!valid) {
6635        stbi__rewind( s );
6636        return 0;
6637    }
6638    token = stbi__hdr_gettoken(s,buffer);
6639    if (strncmp(token, "-Y ", 3) != 0) {
6640        stbi__rewind( s );
6641        return 0;
6642    }
6643    token += 3;
6644    *y = (int) strtol(token, &token, 10);
6645    while (*token == ' ') ++token;
6646    if (strncmp(token, "+X ", 3) != 0) {
6647        stbi__rewind( s );
6648        return 0;
6649    }
6650    token += 3;
6651    *x = (int) strtol(token, NULL, 10);
6652    *comp = 3;
6653    return 1;
6654 }
6655 #endif // STBI_NO_HDR
6656 
6657 #ifndef STBI_NO_BMP
stbi__bmp_info(stbi__context * s,int * x,int * y,int * comp)6658 static int stbi__bmp_info(stbi__context *s, int *x, int *y, int *comp)
6659 {
6660    void *p;
6661    stbi__bmp_data info;
6662 
6663    info.all_a = 255;
6664    p = stbi__bmp_parse_header(s, &info);
6665    stbi__rewind( s );
6666    if (p == NULL)
6667       return 0;
6668    if (x) *x = s->img_x;
6669    if (y) *y = s->img_y;
6670    if (comp) *comp = info.ma ? 4 : 3;
6671    return 1;
6672 }
6673 #endif
6674 
6675 #ifndef STBI_NO_PSD
stbi__psd_info(stbi__context * s,int * x,int * y,int * comp)6676 static int stbi__psd_info(stbi__context *s, int *x, int *y, int *comp)
6677 {
6678    int channelCount, dummy;
6679    if (!x) x = &dummy;
6680    if (!y) y = &dummy;
6681    if (!comp) comp = &dummy;
6682    if (stbi__get32be(s) != 0x38425053) {
6683        stbi__rewind( s );
6684        return 0;
6685    }
6686    if (stbi__get16be(s) != 1) {
6687        stbi__rewind( s );
6688        return 0;
6689    }
6690    stbi__skip(s, 6);
6691    channelCount = stbi__get16be(s);
6692    if (channelCount < 0 || channelCount > 16) {
6693        stbi__rewind( s );
6694        return 0;
6695    }
6696    *y = stbi__get32be(s);
6697    *x = stbi__get32be(s);
6698    if (stbi__get16be(s) != 8) {
6699        stbi__rewind( s );
6700        return 0;
6701    }
6702    if (stbi__get16be(s) != 3) {
6703        stbi__rewind( s );
6704        return 0;
6705    }
6706    *comp = 4;
6707    return 1;
6708 }
6709 #endif
6710 
6711 #ifndef STBI_NO_PIC
stbi__pic_info(stbi__context * s,int * x,int * y,int * comp)6712 static int stbi__pic_info(stbi__context *s, int *x, int *y, int *comp)
6713 {
6714    int act_comp=0,num_packets=0,chained,dummy;
6715    stbi__pic_packet packets[10];
6716 
6717    if (!x) x = &dummy;
6718    if (!y) y = &dummy;
6719    if (!comp) comp = &dummy;
6720 
6721    if (!stbi__pic_is4(s,"\x53\x80\xF6\x34")) {
6722       stbi__rewind(s);
6723       return 0;
6724    }
6725 
6726    stbi__skip(s, 88);
6727 
6728    *x = stbi__get16be(s);
6729    *y = stbi__get16be(s);
6730    if (stbi__at_eof(s)) {
6731       stbi__rewind( s);
6732       return 0;
6733    }
6734    if ( (*x) != 0 && (1 << 28) / (*x) < (*y)) {
6735       stbi__rewind( s );
6736       return 0;
6737    }
6738 
6739    stbi__skip(s, 8);
6740 
6741    do {
6742       stbi__pic_packet *packet;
6743 
6744       if (num_packets==sizeof(packets)/sizeof(packets[0]))
6745          return 0;
6746 
6747       packet = &packets[num_packets++];
6748       chained = stbi__get8(s);
6749       packet->size    = stbi__get8(s);
6750       packet->type    = stbi__get8(s);
6751       packet->channel = stbi__get8(s);
6752       act_comp |= packet->channel;
6753 
6754       if (stbi__at_eof(s)) {
6755           stbi__rewind( s );
6756           return 0;
6757       }
6758       if (packet->size != 8) {
6759           stbi__rewind( s );
6760           return 0;
6761       }
6762    } while (chained);
6763 
6764    *comp = (act_comp & 0x10 ? 4 : 3);
6765 
6766    return 1;
6767 }
6768 #endif
6769 
6770 // *************************************************************************************************
6771 // Portable Gray Map and Portable Pixel Map loader
6772 // by Ken Miller
6773 //
6774 // PGM: http://netpbm.sourceforge.net/doc/pgm.html
6775 // PPM: http://netpbm.sourceforge.net/doc/ppm.html
6776 //
6777 // Known limitations:
6778 //    Does not support comments in the header section
6779 //    Does not support ASCII image data (formats P2 and P3)
6780 //    Does not support 16-bit-per-channel
6781 
6782 #ifndef STBI_NO_PNM
6783 
stbi__pnm_test(stbi__context * s)6784 static int      stbi__pnm_test(stbi__context *s)
6785 {
6786    char p, t;
6787    p = (char) stbi__get8(s);
6788    t = (char) stbi__get8(s);
6789    if (p != 'P' || (t != '5' && t != '6')) {
6790        stbi__rewind( s );
6791        return 0;
6792    }
6793    return 1;
6794 }
6795 
stbi__pnm_load(stbi__context * s,int * x,int * y,int * comp,int req_comp,stbi__result_info * ri)6796 static void *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
6797 {
6798    stbi_uc *out;
6799    STBI_NOTUSED(ri);
6800 
6801    if (!stbi__pnm_info(s, (int *)&s->img_x, (int *)&s->img_y, (int *)&s->img_n))
6802       return 0;
6803 
6804    *x = s->img_x;
6805    *y = s->img_y;
6806    if (comp) *comp = s->img_n;
6807 
6808    if (!stbi__mad3sizes_valid(s->img_n, s->img_x, s->img_y, 0))
6809       return stbi__errpuc("too large", "PNM too large");
6810 
6811    out = (stbi_uc *) stbi__malloc_mad3(s->img_n, s->img_x, s->img_y, 0);
6812    if (!out) return stbi__errpuc("outofmem", "Out of memory");
6813    stbi__getn(s, out, s->img_n * s->img_x * s->img_y);
6814 
6815    if (req_comp && req_comp != s->img_n) {
6816       out = stbi__convert_format(out, s->img_n, req_comp, s->img_x, s->img_y);
6817       if (out == NULL) return out; // stbi__convert_format frees input on failure
6818    }
6819    return out;
6820 }
6821 
stbi__pnm_isspace(char c)6822 static int      stbi__pnm_isspace(char c)
6823 {
6824    return c == ' ' || c == '\t' || c == '\n' || c == '\v' || c == '\f' || c == '\r';
6825 }
6826 
stbi__pnm_skip_whitespace(stbi__context * s,char * c)6827 static void     stbi__pnm_skip_whitespace(stbi__context *s, char *c)
6828 {
6829    for (;;) {
6830       while (!stbi__at_eof(s) && stbi__pnm_isspace(*c))
6831          *c = (char) stbi__get8(s);
6832 
6833       if (stbi__at_eof(s) || *c != '#')
6834          break;
6835 
6836       while (!stbi__at_eof(s) && *c != '\n' && *c != '\r' )
6837          *c = (char) stbi__get8(s);
6838    }
6839 }
6840 
stbi__pnm_isdigit(char c)6841 static int      stbi__pnm_isdigit(char c)
6842 {
6843    return c >= '0' && c <= '9';
6844 }
6845 
stbi__pnm_getinteger(stbi__context * s,char * c)6846 static int      stbi__pnm_getinteger(stbi__context *s, char *c)
6847 {
6848    int value = 0;
6849 
6850    while (!stbi__at_eof(s) && stbi__pnm_isdigit(*c)) {
6851       value = value*10 + (*c - '0');
6852       *c = (char) stbi__get8(s);
6853    }
6854 
6855    return value;
6856 }
6857 
stbi__pnm_info(stbi__context * s,int * x,int * y,int * comp)6858 static int      stbi__pnm_info(stbi__context *s, int *x, int *y, int *comp)
6859 {
6860    int maxv, dummy;
6861    char c, p, t;
6862 
6863    if (!x) x = &dummy;
6864    if (!y) y = &dummy;
6865    if (!comp) comp = &dummy;
6866 
6867    stbi__rewind(s);
6868 
6869    // Get identifier
6870    p = (char) stbi__get8(s);
6871    t = (char) stbi__get8(s);
6872    if (p != 'P' || (t != '5' && t != '6')) {
6873        stbi__rewind(s);
6874        return 0;
6875    }
6876 
6877    *comp = (t == '6') ? 3 : 1;  // '5' is 1-component .pgm; '6' is 3-component .ppm
6878 
6879    c = (char) stbi__get8(s);
6880    stbi__pnm_skip_whitespace(s, &c);
6881 
6882    *x = stbi__pnm_getinteger(s, &c); // read width
6883    stbi__pnm_skip_whitespace(s, &c);
6884 
6885    *y = stbi__pnm_getinteger(s, &c); // read height
6886    stbi__pnm_skip_whitespace(s, &c);
6887 
6888    maxv = stbi__pnm_getinteger(s, &c);  // read max value
6889 
6890    if (maxv > 255)
6891       return stbi__err("max value > 255", "PPM image not 8-bit");
6892    else
6893       return 1;
6894 }
6895 #endif
6896 
stbi__info_main(stbi__context * s,int * x,int * y,int * comp)6897 static int stbi__info_main(stbi__context *s, int *x, int *y, int *comp)
6898 {
6899    #ifndef STBI_NO_JPEG
6900    if (stbi__jpeg_info(s, x, y, comp)) return 1;
6901    #endif
6902 
6903    #ifndef STBI_NO_PNG
6904    if (stbi__png_info(s, x, y, comp))  return 1;
6905    #endif
6906 
6907    #ifndef STBI_NO_GIF
6908    if (stbi__gif_info(s, x, y, comp))  return 1;
6909    #endif
6910 
6911    #ifndef STBI_NO_BMP
6912    if (stbi__bmp_info(s, x, y, comp))  return 1;
6913    #endif
6914 
6915    #ifndef STBI_NO_PSD
6916    if (stbi__psd_info(s, x, y, comp))  return 1;
6917    #endif
6918 
6919    #ifndef STBI_NO_PIC
6920    if (stbi__pic_info(s, x, y, comp))  return 1;
6921    #endif
6922 
6923    #ifndef STBI_NO_PNM
6924    if (stbi__pnm_info(s, x, y, comp))  return 1;
6925    #endif
6926 
6927    #ifndef STBI_NO_HDR
6928    if (stbi__hdr_info(s, x, y, comp))  return 1;
6929    #endif
6930 
6931    // test tga last because it's a crappy test!
6932    #ifndef STBI_NO_TGA
6933    if (stbi__tga_info(s, x, y, comp))
6934        return 1;
6935    #endif
6936    return stbi__err("unknown image type", "Image not of any known type, or corrupt");
6937 }
6938 
6939 #ifndef STBI_NO_STDIO
stbi_info(char const * filename,int * x,int * y,int * comp)6940 STBIDEF int stbi_info(char const *filename, int *x, int *y, int *comp)
6941 {
6942     FILE *f = stbi__fopen(filename, "rb");
6943     int result;
6944     if (!f) return stbi__err("can't fopen", "Unable to open file");
6945     result = stbi_info_from_file(f, x, y, comp);
6946     fclose(f);
6947     return result;
6948 }
6949 
stbi_info_from_file(FILE * f,int * x,int * y,int * comp)6950 STBIDEF int stbi_info_from_file(FILE *f, int *x, int *y, int *comp)
6951 {
6952    int r;
6953    stbi__context s;
6954    long pos = ftell(f);
6955    stbi__start_file(&s, f);
6956    r = stbi__info_main(&s,x,y,comp);
6957    fseek(f,pos,SEEK_SET);
6958    return r;
6959 }
6960 #endif // !STBI_NO_STDIO
6961 
stbi_info_from_memory(stbi_uc const * buffer,int len,int * x,int * y,int * comp)6962 STBIDEF int stbi_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp)
6963 {
6964    stbi__context s;
6965    stbi__start_mem(&s,buffer,len);
6966    return stbi__info_main(&s,x,y,comp);
6967 }
6968 
stbi_info_from_callbacks(stbi_io_callbacks const * c,void * user,int * x,int * y,int * comp)6969 STBIDEF int stbi_info_from_callbacks(stbi_io_callbacks const *c, void *user, int *x, int *y, int *comp)
6970 {
6971    stbi__context s;
6972    stbi__start_callbacks(&s, (stbi_io_callbacks *) c, user);
6973    return stbi__info_main(&s,x,y,comp);
6974 }
6975 
6976 #endif // STB_IMAGE_IMPLEMENTATION
6977 
6978 /*
6979    revision history:
6980       2.16  (2017-07-23) all functions have 16-bit variants;
6981                          STBI_NO_STDIO works again;
6982                          compilation fixes;
6983                          fix rounding in unpremultiply;
6984                          optimize vertical flip;
6985                          disable raw_len validation;
6986                          documentation fixes
6987       2.15  (2017-03-18) fix png-1,2,4 bug; now all Imagenet JPGs decode;
6988                          warning fixes; disable run-time SSE detection on gcc;
6989                          uniform handling of optional "return" values;
6990                          thread-safe initialization of zlib tables
6991       2.14  (2017-03-03) remove deprecated STBI_JPEG_OLD; fixes for Imagenet JPGs
6992       2.13  (2016-11-29) add 16-bit API, only supported for PNG right now
6993       2.12  (2016-04-02) fix typo in 2.11 PSD fix that caused crashes
6994       2.11  (2016-04-02) allocate large structures on the stack
6995                          remove white matting for transparent PSD
6996                          fix reported channel count for PNG & BMP
6997                          re-enable SSE2 in non-gcc 64-bit
6998                          support RGB-formatted JPEG
6999                          read 16-bit PNGs (only as 8-bit)
7000       2.10  (2016-01-22) avoid warning introduced in 2.09 by STBI_REALLOC_SIZED
7001       2.09  (2016-01-16) allow comments in PNM files
7002                          16-bit-per-pixel TGA (not bit-per-component)
7003                          info() for TGA could break due to .hdr handling
7004                          info() for BMP to shares code instead of sloppy parse
7005                          can use STBI_REALLOC_SIZED if allocator doesn't support realloc
7006                          code cleanup
7007       2.08  (2015-09-13) fix to 2.07 cleanup, reading RGB PSD as RGBA
7008       2.07  (2015-09-13) fix compiler warnings
7009                          partial animated GIF support
7010                          limited 16-bpc PSD support
7011                          #ifdef unused functions
7012                          bug with < 92 byte PIC,PNM,HDR,TGA
7013       2.06  (2015-04-19) fix bug where PSD returns wrong '*comp' value
7014       2.05  (2015-04-19) fix bug in progressive JPEG handling, fix warning
7015       2.04  (2015-04-15) try to re-enable SIMD on MinGW 64-bit
7016       2.03  (2015-04-12) extra corruption checking (mmozeiko)
7017                          stbi_set_flip_vertically_on_load (nguillemot)
7018                          fix NEON support; fix mingw support
7019       2.02  (2015-01-19) fix incorrect assert, fix warning
7020       2.01  (2015-01-17) fix various warnings; suppress SIMD on gcc 32-bit without -msse2
7021       2.00b (2014-12-25) fix STBI_MALLOC in progressive JPEG
7022       2.00  (2014-12-25) optimize JPG, including x86 SSE2 & NEON SIMD (ryg)
7023                          progressive JPEG (stb)
7024                          PGM/PPM support (Ken Miller)
7025                          STBI_MALLOC,STBI_REALLOC,STBI_FREE
7026                          GIF bugfix -- seemingly never worked
7027                          STBI_NO_*, STBI_ONLY_*
7028       1.48  (2014-12-14) fix incorrectly-named assert()
7029       1.47  (2014-12-14) 1/2/4-bit PNG support, both direct and paletted (Omar Cornut & stb)
7030                          optimize PNG (ryg)
7031                          fix bug in interlaced PNG with user-specified channel count (stb)
7032       1.46  (2014-08-26)
7033               fix broken tRNS chunk (colorkey-style transparency) in non-paletted PNG
7034       1.45  (2014-08-16)
7035               fix MSVC-ARM internal compiler error by wrapping malloc
7036       1.44  (2014-08-07)
7037               various warning fixes from Ronny Chevalier
7038       1.43  (2014-07-15)
7039               fix MSVC-only compiler problem in code changed in 1.42
7040       1.42  (2014-07-09)
7041               don't define _CRT_SECURE_NO_WARNINGS (affects user code)
7042               fixes to stbi__cleanup_jpeg path
7043               added STBI_ASSERT to avoid requiring assert.h
7044       1.41  (2014-06-25)
7045               fix search&replace from 1.36 that messed up comments/error messages
7046       1.40  (2014-06-22)
7047               fix gcc struct-initialization warning
7048       1.39  (2014-06-15)
7049               fix to TGA optimization when req_comp != number of components in TGA;
7050               fix to GIF loading because BMP wasn't rewinding (whoops, no GIFs in my test suite)
7051               add support for BMP version 5 (more ignored fields)
7052       1.38  (2014-06-06)
7053               suppress MSVC warnings on integer casts truncating values
7054               fix accidental rename of 'skip' field of I/O
7055       1.37  (2014-06-04)
7056               remove duplicate typedef
7057       1.36  (2014-06-03)
7058               convert to header file single-file library
7059               if de-iphone isn't set, load iphone images color-swapped instead of returning NULL
7060       1.35  (2014-05-27)
7061               various warnings
7062               fix broken STBI_SIMD path
7063               fix bug where stbi_load_from_file no longer left file pointer in correct place
7064               fix broken non-easy path for 32-bit BMP (possibly never used)
7065               TGA optimization by Arseny Kapoulkine
7066       1.34  (unknown)
7067               use STBI_NOTUSED in stbi__resample_row_generic(), fix one more leak in tga failure case
7068       1.33  (2011-07-14)
7069               make stbi_is_hdr work in STBI_NO_HDR (as specified), minor compiler-friendly improvements
7070       1.32  (2011-07-13)
7071               support for "info" function for all supported filetypes (SpartanJ)
7072       1.31  (2011-06-20)
7073               a few more leak fixes, bug in PNG handling (SpartanJ)
7074       1.30  (2011-06-11)
7075               added ability to load files via callbacks to accomidate custom input streams (Ben Wenger)
7076               removed deprecated format-specific test/load functions
7077               removed support for installable file formats (stbi_loader) -- would have been broken for IO callbacks anyway
7078               error cases in bmp and tga give messages and don't leak (Raymond Barbiero, grisha)
7079               fix inefficiency in decoding 32-bit BMP (David Woo)
7080       1.29  (2010-08-16)
7081               various warning fixes from Aurelien Pocheville
7082       1.28  (2010-08-01)
7083               fix bug in GIF palette transparency (SpartanJ)
7084       1.27  (2010-08-01)
7085               cast-to-stbi_uc to fix warnings
7086       1.26  (2010-07-24)
7087               fix bug in file buffering for PNG reported by SpartanJ
7088       1.25  (2010-07-17)
7089               refix trans_data warning (Won Chun)
7090       1.24  (2010-07-12)
7091               perf improvements reading from files on platforms with lock-heavy fgetc()
7092               minor perf improvements for jpeg
7093               deprecated type-specific functions so we'll get feedback if they're needed
7094               attempt to fix trans_data warning (Won Chun)
7095       1.23    fixed bug in iPhone support
7096       1.22  (2010-07-10)
7097               removed image *writing* support
7098               stbi_info support from Jetro Lauha
7099               GIF support from Jean-Marc Lienher
7100               iPhone PNG-extensions from James Brown
7101               warning-fixes from Nicolas Schulz and Janez Zemva (i.stbi__err. Janez (U+017D)emva)
7102       1.21    fix use of 'stbi_uc' in header (reported by jon blow)
7103       1.20    added support for Softimage PIC, by Tom Seddon
7104       1.19    bug in interlaced PNG corruption check (found by ryg)
7105       1.18  (2008-08-02)
7106               fix a threading bug (local mutable static)
7107       1.17    support interlaced PNG
7108       1.16    major bugfix - stbi__convert_format converted one too many pixels
7109       1.15    initialize some fields for thread safety
7110       1.14    fix threadsafe conversion bug
7111               header-file-only version (#define STBI_HEADER_FILE_ONLY before including)
7112       1.13    threadsafe
7113       1.12    const qualifiers in the API
7114       1.11    Support installable IDCT, colorspace conversion routines
7115       1.10    Fixes for 64-bit (don't use "unsigned long")
7116               optimized upsampling by Fabian "ryg" Giesen
7117       1.09    Fix format-conversion for PSD code (bad global variables!)
7118       1.08    Thatcher Ulrich's PSD code integrated by Nicolas Schulz
7119       1.07    attempt to fix C++ warning/errors again
7120       1.06    attempt to fix C++ warning/errors again
7121       1.05    fix TGA loading to return correct *comp and use good luminance calc
7122       1.04    default float alpha is 1, not 255; use 'void *' for stbi_image_free
7123       1.03    bugfixes to STBI_NO_STDIO, STBI_NO_HDR
7124       1.02    support for (subset of) HDR files, float interface for preferred access to them
7125       1.01    fix bug: possible bug in handling right-side up bmps... not sure
7126               fix bug: the stbi__bmp_load() and stbi__tga_load() functions didn't work at all
7127       1.00    interface to zlib that skips zlib header
7128       0.99    correct handling of alpha in palette
7129       0.98    TGA loader by lonesock; dynamically add loaders (untested)
7130       0.97    jpeg errors on too large a file; also catch another malloc failure
7131       0.96    fix detection of invalid v value - particleman@mollyrocket forum
7132       0.95    during header scan, seek to markers in case of padding
7133       0.94    STBI_NO_STDIO to disable stdio usage; rename all #defines the same
7134       0.93    handle jpegtran output; verbose errors
7135       0.92    read 4,8,16,24,32-bit BMP files of several formats
7136       0.91    output 24-bit Windows 3.0 BMP files
7137       0.90    fix a few more warnings; bump version number to approach 1.0
7138       0.61    bugfixes due to Marc LeBlanc, Christopher Lloyd
7139       0.60    fix compiling as c++
7140       0.59    fix warnings: merge Dave Moore's -Wall fixes
7141       0.58    fix bug: zlib uncompressed mode len/nlen was wrong endian
7142       0.57    fix bug: jpg last huffman symbol before marker was >9 bits but less than 16 available
7143       0.56    fix bug: zlib uncompressed mode len vs. nlen
7144       0.55    fix bug: restart_interval not initialized to 0
7145       0.54    allow NULL for 'int *comp'
7146       0.53    fix bug in png 3->4; speedup png decoding
7147       0.52    png handles req_comp=3,4 directly; minor cleanup; jpeg comments
7148       0.51    obey req_comp requests, 1-component jpegs return as 1-component,
7149               on 'test' only check type, not whether we support this variant
7150       0.50  (2006-11-19)
7151               first released version
7152 */
7153 
7154 
7155 /*
7156 ------------------------------------------------------------------------------
7157 This software is available under 2 licenses -- choose whichever you prefer.
7158 ------------------------------------------------------------------------------
7159 ALTERNATIVE A - MIT License
7160 Copyright (c) 2017 Sean Barrett
7161 Permission is hereby granted, free of charge, to any person obtaining a copy of
7162 this software and associated documentation files (the "Software"), to deal in
7163 the Software without restriction, including without limitation the rights to
7164 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
7165 of the Software, and to permit persons to whom the Software is furnished to do
7166 so, subject to the following conditions:
7167 The above copyright notice and this permission notice shall be included in all
7168 copies or substantial portions of the Software.
7169 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
7170 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
7171 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
7172 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
7173 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
7174 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
7175 SOFTWARE.
7176 ------------------------------------------------------------------------------
7177 ALTERNATIVE B - Public Domain (www.unlicense.org)
7178 This is free and unencumbered software released into the public domain.
7179 Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
7180 software, either in source code form or as a compiled binary, for any purpose,
7181 commercial or non-commercial, and by any means.
7182 In jurisdictions that recognize copyright laws, the author or authors of this
7183 software dedicate any and all copyright interest in the software to the public
7184 domain. We make this dedication for the benefit of the public at large and to
7185 the detriment of our heirs and successors. We intend this dedication to be an
7186 overt act of relinquishment in perpetuity of all present and future rights to
7187 this software under copyright law.
7188 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
7189 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
7190 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
7191 AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
7192 ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
7193 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
7194 ------------------------------------------------------------------------------
7195 */
7196