1 /* stb_image - v2.16 - public domain image loader - http://nothings.org/stb_image.h
2                                      no warranty implied; use at your own risk
3 
4    Do this:
5       #define STB_IMAGE_IMPLEMENTATION
6    before you include this file in *one* C or C++ file to create the implementation.
7 
8    // i.e. it should look like this:
9    #include ...
10    #include ...
11    #include ...
12    #define STB_IMAGE_IMPLEMENTATION
13    #include "stb_image.h"
14 
15    You can #define STBI_ASSERT(x) before the #include to avoid using assert.h.
16    And #define STBI_MALLOC, STBI_REALLOC, and STBI_FREE to avoid using malloc,realloc,free
17 
18 
19    QUICK NOTES:
20       Primarily of interest to game developers and other people who can
21           avoid problematic images and only need the trivial interface
22 
23       JPEG baseline & progressive (12 bpc/arithmetic not supported, same as stock IJG lib)
24       PNG 1/2/4/8/16-bit-per-channel
25 
26       TGA (not sure what subset, if a subset)
27       BMP non-1bpp, non-RLE
28       PSD (composited view only, no extra channels, 8/16 bit-per-channel)
29 
30       GIF (*comp always reports as 4-channel)
31       HDR (radiance rgbE format)
32       PIC (Softimage PIC)
33       PNM (PPM and PGM binary only)
34 
35       Animated GIF still needs a proper API, but here's one way to do it:
36           http://gist.github.com/urraka/685d9a6340b26b830d49
37 
38       - decode from memory or through FILE (define STBI_NO_STDIO to remove code)
39       - decode from arbitrary I/O callbacks
40       - SIMD acceleration on x86/x64 (SSE2) and ARM (NEON)
41 
42    Full documentation under "DOCUMENTATION" below.
43 
44 
45 LICENSE
46 
47   See end of file for license information.
48 
49 RECENT REVISION HISTORY:
50 
51       2.16  (2017-07-23) all functions have 16-bit variants; optimizations; bugfixes
52       2.15  (2017-03-18) fix png-1,2,4; all Imagenet JPGs; no runtime SSE detection on GCC
53       2.14  (2017-03-03) remove deprecated STBI_JPEG_OLD; fixes for Imagenet JPGs
54       2.13  (2016-12-04) experimental 16-bit API, only for PNG so far; fixes
55       2.12  (2016-04-02) fix typo in 2.11 PSD fix that caused crashes
56       2.11  (2016-04-02) 16-bit PNGS; enable SSE2 in non-gcc x64
57                          RGB-format JPEG; remove white matting in PSD;
58                          allocate large structures on the stack;
59                          correct channel count for PNG & BMP
60       2.10  (2016-01-22) avoid warning introduced in 2.09
61       2.09  (2016-01-16) 16-bit TGA; comments in PNM files; STBI_REALLOC_SIZED
62 
63    See end of file for full revision history.
64 
65 
66  ============================    Contributors    =========================
67 
68  Image formats                          Extensions, features
69     Sean Barrett (jpeg, png, bmp)          Jetro Lauha (stbi_info)
70     Nicolas Schulz (hdr, psd)              Martin "SpartanJ" Golini (stbi_info)
71     Jonathan Dummer (tga)                  James "moose2000" Brown (iPhone PNG)
72     Jean-Marc Lienher (gif)                Ben "Disch" Wenger (io callbacks)
73     Tom Seddon (pic)                       Omar Cornut (1/2/4-bit PNG)
74     Thatcher Ulrich (psd)                  Nicolas Guillemot (vertical flip)
75     Ken Miller (pgm, ppm)                  Richard Mitton (16-bit PSD)
76     github:urraka (animated gif)           Junggon Kim (PNM comments)
77                                            Daniel Gibson (16-bit TGA)
78                                            socks-the-fox (16-bit PNG)
79                                            Jeremy Sawicki (handle all ImageNet JPGs)
80  Optimizations & bugfixes
81     Fabian "ryg" Giesen
82     Arseny Kapoulkine
83     John-Mark Allen
84 
85  Bug & warning fixes
86     Marc LeBlanc            David Woo          Guillaume George   Martins Mozeiko
87     Christpher Lloyd        Jerry Jansson      Joseph Thomson     Phil Jordan
88     Dave Moore              Roy Eltham         Hayaki Saito       Nathan Reed
89     Won Chun                Luke Graham        Johan Duparc       Nick Verigakis
90     the Horde3D community   Thomas Ruf         Ronny Chevalier    Baldur Karlsson
91     Janez Zemva             John Bartholomew   Michal Cichon      github:rlyeh
92     Jonathan Blow           Ken Hamada         Tero Hanninen      github:romigrou
93     Laurent Gomila          Cort Stratton      Sergio Gonzalez    github:svdijk
94     Aruelien Pocheville     Thibault Reuille   Cass Everitt       github:snagar
95     Ryamond Barbiero        Paul Du Bois       Engin Manap        github:Zelex
96     Michaelangel007@github  Philipp Wiesemann  Dale Weiler        github:grim210
97     Oriol Ferrer Mesia      Josh Tobin         Matthew Gregan     github:sammyhw
98     Blazej Dariusz Roszkowski                  Gregory Mullen     github:phprus
99     Christian Floisand      Kevin Schmidt                         github:poppolopoppo
100 */
101 
102 #ifndef STBI_INCLUDE_STB_IMAGE_H
103 #define STBI_INCLUDE_STB_IMAGE_H
104 
105 // DOCUMENTATION
106 //
107 // Limitations:
108 //    - no 16-bit-per-channel PNG
109 //    - no 12-bit-per-channel JPEG
110 //    - no JPEGs with arithmetic coding
111 //    - no 1-bit BMP
112 //    - GIF always returns *comp=4
113 //
114 // Basic usage (see HDR discussion below for HDR usage):
115 //    int x,y,n;
116 //    unsigned char *data = stbi_load(filename, &x, &y, &n, 0);
117 //    // ... process data if not NULL ...
118 //    // ... x = width, y = height, n = # 8-bit components per pixel ...
119 //    // ... replace '0' with '1'..'4' to force that many components per pixel
120 //    // ... but 'n' will always be the number that it would have been if you said 0
121 //    stbi_image_free(data)
122 //
123 // Standard parameters:
124 //    int *x                 -- outputs image width in pixels
125 //    int *y                 -- outputs image height in pixels
126 //    int *channels_in_file  -- outputs # of image components in image file
127 //    int desired_channels   -- if non-zero, # of image components requested in result
128 //
129 // The return value from an image loader is an 'unsigned char *' which points
130 // to the pixel data, or NULL on an allocation failure or if the image is
131 // corrupt or invalid. The pixel data consists of *y scanlines of *x pixels,
132 // with each pixel consisting of N interleaved 8-bit components; the first
133 // pixel pointed to is top-left-most in the image. There is no padding between
134 // image scanlines or between pixels, regardless of format. The number of
135 // components N is 'desired_channels' if desired_channels is non-zero, or
136 // *channels_in_file otherwise. If desired_channels is non-zero,
137 // *channels_in_file has the number of components that _would_ have been
138 // output otherwise. E.g. if you set desired_channels to 4, you will always
139 // get RGBA output, but you can check *channels_in_file to see if it's trivially
140 // opaque because e.g. there were only 3 channels in the source image.
141 //
142 // An output image with N components has the following components interleaved
143 // in this order in each pixel:
144 //
145 //     N=#comp     components
146 //       1           grey
147 //       2           grey, alpha
148 //       3           red, green, blue
149 //       4           red, green, blue, alpha
150 //
151 // If image loading fails for any reason, the return value will be NULL,
152 // and *x, *y, *channels_in_file will be unchanged. The function
153 // stbi_failure_reason() can be queried for an extremely brief, end-user
154 // unfriendly explanation of why the load failed. Define STBI_NO_FAILURE_STRINGS
155 // to avoid compiling these strings at all, and STBI_FAILURE_USERMSG to get slightly
156 // more user-friendly ones.
157 //
158 // Paletted PNG, BMP, GIF, and PIC images are automatically depalettized.
159 //
160 // ===========================================================================
161 //
162 // Philosophy
163 //
164 // stb libraries are designed with the following priorities:
165 //
166 //    1. easy to use
167 //    2. easy to maintain
168 //    3. good performance
169 //
170 // Sometimes I let "good performance" creep up in priority over "easy to maintain",
171 // and for best performance I may provide less-easy-to-use APIs that give higher
172 // performance, in addition to the easy to use ones. Nevertheless, it's important
173 // to keep in mind that from the standpoint of you, a client of this library,
174 // all you care about is #1 and #3, and stb libraries DO NOT emphasize #3 above all.
175 //
176 // Some secondary priorities arise directly from the first two, some of which
177 // make more explicit reasons why performance can't be emphasized.
178 //
179 //    - Portable ("ease of use")
180 //    - Small source code footprint ("easy to maintain")
181 //    - No dependencies ("ease of use")
182 //
183 // ===========================================================================
184 //
185 // I/O callbacks
186 //
187 // I/O callbacks allow you to read from arbitrary sources, like packaged
188 // files or some other source. Data read from callbacks are processed
189 // through a small internal buffer (currently 128 bytes) to try to reduce
190 // overhead.
191 //
192 // The three functions you must define are "read" (reads some bytes of data),
193 // "skip" (skips some bytes of data), "eof" (reports if the stream is at the end).
194 //
195 // ===========================================================================
196 //
197 // SIMD support
198 //
199 // The JPEG decoder will try to automatically use SIMD kernels on x86 when
200 // supported by the compiler. For ARM Neon support, you must explicitly
201 // request it.
202 //
203 // (The old do-it-yourself SIMD API is no longer supported in the current
204 // code.)
205 //
206 // On x86, SSE2 will automatically be used when available based on a run-time
207 // test; if not, the generic C versions are used as a fall-back. On ARM targets,
208 // the typical path is to have separate builds for NEON and non-NEON devices
209 // (at least this is true for iOS and Android). Therefore, the NEON support is
210 // toggled by a build flag: define STBI_NEON to get NEON loops.
211 //
212 // If for some reason you do not want to use any of SIMD code, or if
213 // you have issues compiling it, you can disable it entirely by
214 // defining STBI_NO_SIMD.
215 //
216 // ===========================================================================
217 //
218 // HDR image support   (disable by defining STBI_NO_HDR)
219 //
220 // stb_image now supports loading HDR images in general, and currently
221 // the Radiance .HDR file format, although the support is provided
222 // generically. You can still load any file through the existing interface;
223 // if you attempt to load an HDR file, it will be automatically remapped to
224 // LDR, assuming gamma 2.2 and an arbitrary scale factor defaulting to 1;
225 // both of these constants can be reconfigured through this interface:
226 //
227 //     stbi_hdr_to_ldr_gamma(2.2f);
228 //     stbi_hdr_to_ldr_scale(1.0f);
229 //
230 // (note, do not use _inverse_ constants; stbi_image will invert them
231 // appropriately).
232 //
233 // Additionally, there is a new, parallel interface for loading files as
234 // (linear) floats to preserve the full dynamic range:
235 //
236 //    float *data = stbi_loadf(filename, &x, &y, &n, 0);
237 //
238 // If you load LDR images through this interface, those images will
239 // be promoted to floating point values, run through the inverse of
240 // constants corresponding to the above:
241 //
242 //     stbi_ldr_to_hdr_scale(1.0f);
243 //     stbi_ldr_to_hdr_gamma(2.2f);
244 //
245 // Finally, given a filename (or an open file or memory block--see header
246 // file for details) containing image data, you can query for the "most
247 // appropriate" interface to use (that is, whether the image is HDR or
248 // not), using:
249 //
250 //     stbi_is_hdr(char *filename);
251 //
252 // ===========================================================================
253 //
254 // iPhone PNG support:
255 //
256 // By default we convert iphone-formatted PNGs back to RGB, even though
257 // they are internally encoded differently. You can disable this conversion
258 // by by calling stbi_convert_iphone_png_to_rgb(0), in which case
259 // you will always just get the native iphone "format" through (which
260 // is BGR stored in RGB).
261 //
262 // Call stbi_set_unpremultiply_on_load(1) as well to force a divide per
263 // pixel to remove any premultiplied alpha *only* if the image file explicitly
264 // says there's premultiplied data (currently only happens in iPhone images,
265 // and only if iPhone convert-to-rgb processing is on).
266 //
267 // ===========================================================================
268 //
269 // ADDITIONAL CONFIGURATION
270 //
271 //  - You can suppress implementation of any of the decoders to reduce
272 //    your code footprint by #defining one or more of the following
273 //    symbols before creating the implementation.
274 //
275 //        STBI_NO_JPEG
276 //        STBI_NO_PNG
277 //        STBI_NO_BMP
278 //        STBI_NO_PSD
279 //        STBI_NO_TGA
280 //        STBI_NO_GIF
281 //        STBI_NO_HDR
282 //        STBI_NO_PIC
283 //        STBI_NO_PNM   (.ppm and .pgm)
284 //
285 //  - You can request *only* certain decoders and suppress all other ones
286 //    (this will be more forward-compatible, as addition of new decoders
287 //    doesn't require you to disable them explicitly):
288 //
289 //        STBI_ONLY_JPEG
290 //        STBI_ONLY_PNG
291 //        STBI_ONLY_BMP
292 //        STBI_ONLY_PSD
293 //        STBI_ONLY_TGA
294 //        STBI_ONLY_GIF
295 //        STBI_ONLY_HDR
296 //        STBI_ONLY_PIC
297 //        STBI_ONLY_PNM   (.ppm and .pgm)
298 //
299 //   - If you use STBI_NO_PNG (or _ONLY_ without PNG), and you still
300 //     want the zlib decoder to be available, #define STBI_SUPPORT_ZLIB
301 //
302 
303 
304 #ifndef STBI_NO_STDIO
305 #include <stdio.h>
306 #endif // STBI_NO_STDIO
307 
308 #define STBI_VERSION 1
309 
310 enum
311 {
312    STBI_default = 0, // only used for desired_channels
313 
314    STBI_grey       = 1,
315    STBI_grey_alpha = 2,
316    STBI_rgb        = 3,
317    STBI_rgb_alpha  = 4
318 };
319 
320 typedef unsigned char stbi_uc;
321 typedef unsigned short stbi_us;
322 
323 #ifdef __cplusplus
324 extern "C" {
325 #endif
326 
327 #ifdef STB_IMAGE_STATIC
328 #define STBIDEF static
329 #else
330 #define STBIDEF extern
331 #endif
332 
333 //////////////////////////////////////////////////////////////////////////////
334 //
335 // PRIMARY API - works on images of any type
336 //
337 
338 //
339 // load image by filename, open file, or memory buffer
340 //
341 
342 typedef struct
343 {
344    int      (*read)  (void *user,char *data,int size);   // fill 'data' with 'size' bytes.  return number of bytes actually read
345    void     (*skip)  (void *user,int n);                 // skip the next 'n' bytes, or 'unget' the last -n bytes if negative
346    int      (*eof)   (void *user);                       // returns nonzero if we are at end of file/data
347 } stbi_io_callbacks;
348 
349 ////////////////////////////////////
350 //
351 // 8-bits-per-channel interface
352 //
353 
354 STBIDEF stbi_uc *stbi_load_from_memory   (stbi_uc           const *buffer, int len   , int *x, int *y, int *channels_in_file, int desired_channels);
355 STBIDEF stbi_uc *stbi_load_from_callbacks(stbi_io_callbacks const *clbk  , void *user, int *x, int *y, int *channels_in_file, int desired_channels);
356 
357 #ifndef STBI_NO_STDIO
358 STBIDEF stbi_uc *stbi_load            (char const *filename, int *x, int *y, int *channels_in_file, int desired_channels);
359 STBIDEF stbi_uc *stbi_load_from_file  (FILE *f, int *x, int *y, int *channels_in_file, int desired_channels);
360 // for stbi_load_from_file, file pointer is left pointing immediately after image
361 #endif
362 
363 ////////////////////////////////////
364 //
365 // 16-bits-per-channel interface
366 //
367 
368 STBIDEF stbi_us *stbi_load_16_from_memory   (stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels);
369 STBIDEF stbi_us *stbi_load_16_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *channels_in_file, int desired_channels);
370 
371 #ifndef STBI_NO_STDIO
372 STBIDEF stbi_us *stbi_load_16          (char const *filename, int *x, int *y, int *channels_in_file, int desired_channels);
373 STBIDEF stbi_us *stbi_load_from_file_16(FILE *f, int *x, int *y, int *channels_in_file, int desired_channels);
374 #endif
375 
376 ////////////////////////////////////
377 //
378 // float-per-channel interface
379 //
380 #ifndef STBI_NO_LINEAR
381    STBIDEF float *stbi_loadf_from_memory     (stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels);
382    STBIDEF float *stbi_loadf_from_callbacks  (stbi_io_callbacks const *clbk, void *user, int *x, int *y,  int *channels_in_file, int desired_channels);
383 
384    #ifndef STBI_NO_STDIO
385    STBIDEF float *stbi_loadf            (char const *filename, int *x, int *y, int *channels_in_file, int desired_channels);
386    STBIDEF float *stbi_loadf_from_file  (FILE *f, int *x, int *y, int *channels_in_file, int desired_channels);
387    #endif
388 #endif
389 
390 #ifndef STBI_NO_HDR
391    STBIDEF void   stbi_hdr_to_ldr_gamma(float gamma);
392    STBIDEF void   stbi_hdr_to_ldr_scale(float scale);
393 #endif // STBI_NO_HDR
394 
395 #ifndef STBI_NO_LINEAR
396    STBIDEF void   stbi_ldr_to_hdr_gamma(float gamma);
397    STBIDEF void   stbi_ldr_to_hdr_scale(float scale);
398 #endif // STBI_NO_LINEAR
399 
400 // stbi_is_hdr is always defined, but always returns false if STBI_NO_HDR
401 STBIDEF int    stbi_is_hdr_from_callbacks(stbi_io_callbacks const *clbk, void *user);
402 STBIDEF int    stbi_is_hdr_from_memory(stbi_uc const *buffer, int len);
403 #ifndef STBI_NO_STDIO
404 STBIDEF int      stbi_is_hdr          (char const *filename);
405 STBIDEF int      stbi_is_hdr_from_file(FILE *f);
406 #endif // STBI_NO_STDIO
407 
408 
409 // get a VERY brief reason for failure
410 // NOT THREADSAFE
411 STBIDEF const char *stbi_failure_reason  (void);
412 
413 // free the loaded image -- this is just free()
414 STBIDEF void     stbi_image_free      (void *retval_from_stbi_load);
415 
416 // get image dimensions & components without fully decoding
417 STBIDEF int      stbi_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp);
418 STBIDEF int      stbi_info_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp);
419 
420 #ifndef STBI_NO_STDIO
421 STBIDEF int      stbi_info            (char const *filename,     int *x, int *y, int *comp);
422 STBIDEF int      stbi_info_from_file  (FILE *f,                  int *x, int *y, int *comp);
423 
424 #endif
425 
426 
427 
428 // for image formats that explicitly notate that they have premultiplied alpha,
429 // we just return the colors as stored in the file. set this flag to force
430 // unpremultiplication. results are undefined if the unpremultiply overflow.
431 STBIDEF void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply);
432 
433 // indicate whether we should process iphone images back to canonical format,
434 // or just pass them through "as-is"
435 STBIDEF void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert);
436 
437 // flip the image vertically, so the first pixel in the output array is the bottom left
438 STBIDEF void stbi_set_flip_vertically_on_load(int flag_true_if_should_flip);
439 
440 // ZLIB client - used by PNG, available for other purposes
441 
442 STBIDEF char *stbi_zlib_decode_malloc_guesssize(const char *buffer, int len, int initial_size, int *outlen);
443 STBIDEF char *stbi_zlib_decode_malloc_guesssize_headerflag(const char *buffer, int len, int initial_size, int *outlen, int parse_header);
444 STBIDEF char *stbi_zlib_decode_malloc(const char *buffer, int len, int *outlen);
445 STBIDEF int   stbi_zlib_decode_buffer(char *obuffer, int olen, const char *ibuffer, int ilen);
446 
447 STBIDEF char *stbi_zlib_decode_noheader_malloc(const char *buffer, int len, int *outlen);
448 STBIDEF int   stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char *ibuffer, int ilen);
449 
450 
451 #ifdef __cplusplus
452 }
453 #endif
454 
455 //
456 //
457 ////   end header file   /////////////////////////////////////////////////////
458 #endif // STBI_INCLUDE_STB_IMAGE_H
459 
460 #ifdef STB_IMAGE_IMPLEMENTATION
461 
462 #if defined(STBI_ONLY_JPEG) || defined(STBI_ONLY_PNG) || defined(STBI_ONLY_BMP) \
463   || defined(STBI_ONLY_TGA) || defined(STBI_ONLY_GIF) || defined(STBI_ONLY_PSD) \
464   || defined(STBI_ONLY_HDR) || defined(STBI_ONLY_PIC) || defined(STBI_ONLY_PNM) \
465   || defined(STBI_ONLY_ZLIB)
466    #ifndef STBI_ONLY_JPEG
467    #define STBI_NO_JPEG
468    #endif
469    #ifndef STBI_ONLY_PNG
470    #define STBI_NO_PNG
471    #endif
472    #ifndef STBI_ONLY_BMP
473    #define STBI_NO_BMP
474    #endif
475    #ifndef STBI_ONLY_PSD
476    #define STBI_NO_PSD
477    #endif
478    #ifndef STBI_ONLY_TGA
479    #define STBI_NO_TGA
480    #endif
481    #ifndef STBI_ONLY_GIF
482    #define STBI_NO_GIF
483    #endif
484    #ifndef STBI_ONLY_HDR
485    #define STBI_NO_HDR
486    #endif
487    #ifndef STBI_ONLY_PIC
488    #define STBI_NO_PIC
489    #endif
490    #ifndef STBI_ONLY_PNM
491    #define STBI_NO_PNM
492    #endif
493 #endif
494 
495 #if defined(STBI_NO_PNG) && !defined(STBI_SUPPORT_ZLIB) && !defined(STBI_NO_ZLIB)
496 #define STBI_NO_ZLIB
497 #endif
498 
499 
500 #include <stdarg.h>
501 #include <stddef.h> // ptrdiff_t on osx
502 #include <stdlib.h>
503 #include <string.h>
504 #include <limits.h>
505 
506 #if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR)
507 #include <math.h>  // ldexp
508 #endif
509 
510 #ifndef STBI_NO_STDIO
511 #include <stdio.h>
512 #endif
513 
514 #ifndef STBI_ASSERT
515 #include <assert.h>
516 #define STBI_ASSERT(x) assert(x)
517 #endif
518 
519 
520 #ifndef _MSC_VER
521    #ifdef __cplusplus
522    #define stbi_inline inline
523    #else
524    #define stbi_inline
525    #endif
526 #else
527    #define stbi_inline __forceinline
528 #endif
529 
530 
531 #ifdef _MSC_VER
532 typedef unsigned short stbi__uint16;
533 typedef   signed short stbi__int16;
534 typedef unsigned int   stbi__uint32;
535 typedef   signed int   stbi__int32;
536 #else
537 #include <stdint.h>
538 typedef uint16_t stbi__uint16;
539 typedef int16_t  stbi__int16;
540 typedef uint32_t stbi__uint32;
541 typedef int32_t  stbi__int32;
542 #endif
543 
544 // should produce compiler error if size is wrong
545 typedef unsigned char validate_uint32[sizeof(stbi__uint32)==4 ? 1 : -1];
546 
547 #ifdef _MSC_VER
548 #define STBI_NOTUSED(v)  (void)(v)
549 #else
550 #define STBI_NOTUSED(v)  (void)sizeof(v)
551 #endif
552 
553 #ifdef _MSC_VER
554 #define STBI_HAS_LROTL
555 #endif
556 
557 #ifdef STBI_HAS_LROTL
558    #define stbi_lrot(x,y)  _lrotl(x,y)
559 #else
560    #define stbi_lrot(x,y)  (((x) << (y)) | ((x) >> (32 - (y))))
561 #endif
562 
563 #if defined(STBI_MALLOC) && defined(STBI_FREE) && (defined(STBI_REALLOC) || defined(STBI_REALLOC_SIZED))
564 // ok
565 #elif !defined(STBI_MALLOC) && !defined(STBI_FREE) && !defined(STBI_REALLOC) && !defined(STBI_REALLOC_SIZED)
566 // ok
567 #else
568 #error "Must define all or none of STBI_MALLOC, STBI_FREE, and STBI_REALLOC (or STBI_REALLOC_SIZED)."
569 #endif
570 
571 #ifndef STBI_MALLOC
572 #define STBI_MALLOC(sz)           malloc(sz)
573 #define STBI_REALLOC(p,newsz)     realloc(p,newsz)
574 #define STBI_FREE(p)              free(p)
575 #endif
576 
577 #ifndef STBI_REALLOC_SIZED
578 #define STBI_REALLOC_SIZED(p,oldsz,newsz) STBI_REALLOC(p,newsz)
579 #endif
580 
581 // x86/x64 detection
582 #if defined(__x86_64__) || defined(_M_X64)
583 #define STBI__X64_TARGET
584 #elif defined(__i386) || defined(_M_IX86)
585 #define STBI__X86_TARGET
586 #endif
587 
588 #if defined(__GNUC__) && defined(STBI__X86_TARGET) && !defined(__SSE2__) && !defined(STBI_NO_SIMD)
589 // gcc doesn't support sse2 intrinsics unless you compile with -msse2,
590 // which in turn means it gets to use SSE2 everywhere. This is unfortunate,
591 // but previous attempts to provide the SSE2 functions with runtime
592 // detection caused numerous issues. The way architecture extensions are
593 // exposed in GCC/Clang is, sadly, not really suited for one-file libs.
594 // New behavior: if compiled with -msse2, we use SSE2 without any
595 // detection; if not, we don't use it at all.
596 #define STBI_NO_SIMD
597 #endif
598 
599 #if defined(__MINGW32__) && defined(STBI__X86_TARGET) && !defined(STBI_MINGW_ENABLE_SSE2) && !defined(STBI_NO_SIMD)
600 // Note that __MINGW32__ doesn't actually mean 32-bit, so we have to avoid STBI__X64_TARGET
601 //
602 // 32-bit MinGW wants ESP to be 16-byte aligned, but this is not in the
603 // Windows ABI and VC++ as well as Windows DLLs don't maintain that invariant.
604 // As a result, enabling SSE2 on 32-bit MinGW is dangerous when not
605 // simultaneously enabling "-mstackrealign".
606 //
607 // See https://github.com/nothings/stb/issues/81 for more information.
608 //
609 // So default to no SSE2 on 32-bit MinGW. If you've read this far and added
610 // -mstackrealign to your build settings, feel free to #define STBI_MINGW_ENABLE_SSE2.
611 #define STBI_NO_SIMD
612 #endif
613 
614 #if !defined(STBI_NO_SIMD) && (defined(STBI__X86_TARGET) || defined(STBI__X64_TARGET))
615 #define STBI_SSE2
616 #include <emmintrin.h>
617 
618 #ifdef _MSC_VER
619 
620 #if _MSC_VER >= 1400  // not VC6
621 #include <intrin.h> // __cpuid
stbi__cpuid3(void)622 static int stbi__cpuid3(void)
623 {
624    int info[4];
625    __cpuid(info,1);
626    return info[3];
627 }
628 #else
stbi__cpuid3(void)629 static int stbi__cpuid3(void)
630 {
631    int res;
632    __asm {
633       mov  eax,1
634       cpuid
635       mov  res,edx
636    }
637    return res;
638 }
639 #endif
640 
641 #define STBI_SIMD_ALIGN(type, name) __declspec(align(16)) type name
642 
stbi__sse2_available(void)643 static int stbi__sse2_available(void)
644 {
645    int info3 = stbi__cpuid3();
646    return ((info3 >> 26) & 1) != 0;
647 }
648 #else // assume GCC-style if not VC++
649 #define STBI_SIMD_ALIGN(type, name) type name __attribute__((aligned(16)))
650 
stbi__sse2_available(void)651 static int stbi__sse2_available(void)
652 {
653    // If we're even attempting to compile this on GCC/Clang, that means
654    // -msse2 is on, which means the compiler is allowed to use SSE2
655    // instructions at will, and so are we.
656    return 1;
657 }
658 #endif
659 #endif
660 
661 // ARM NEON
662 #if defined(STBI_NO_SIMD) && defined(STBI_NEON)
663 #undef STBI_NEON
664 #endif
665 
666 #ifdef STBI_NEON
667 #include <arm_neon.h>
668 // assume GCC or Clang on ARM targets
669 #define STBI_SIMD_ALIGN(type, name) type name __attribute__((aligned(16)))
670 #endif
671 
672 #ifndef STBI_SIMD_ALIGN
673 #define STBI_SIMD_ALIGN(type, name) type name
674 #endif
675 
676 ///////////////////////////////////////////////
677 //
678 //  stbi__context struct and start_xxx functions
679 
680 // stbi__context structure is our basic context used by all images, so it
681 // contains all the IO context, plus some basic image information
682 typedef struct
683 {
684    stbi__uint32 img_x, img_y;
685    int img_n, img_out_n;
686 
687    stbi_io_callbacks io;
688    void *io_user_data;
689 
690    int read_from_callbacks;
691    int buflen;
692    stbi_uc buffer_start[128];
693 
694    stbi_uc *img_buffer, *img_buffer_end;
695    stbi_uc *img_buffer_original, *img_buffer_original_end;
696 } stbi__context;
697 
698 
699 static void stbi__refill_buffer(stbi__context *s);
700 
701 // initialize a memory-decode context
stbi__start_mem(stbi__context * s,stbi_uc const * buffer,int len)702 static void stbi__start_mem(stbi__context *s, stbi_uc const *buffer, int len)
703 {
704    s->io.read = NULL;
705    s->read_from_callbacks = 0;
706    s->img_buffer = s->img_buffer_original = (stbi_uc *) buffer;
707    s->img_buffer_end = s->img_buffer_original_end = (stbi_uc *) buffer+len;
708 }
709 
710 // initialize a callback-based context
stbi__start_callbacks(stbi__context * s,stbi_io_callbacks * c,void * user)711 static void stbi__start_callbacks(stbi__context *s, stbi_io_callbacks *c, void *user)
712 {
713    s->io = *c;
714    s->io_user_data = user;
715    s->buflen = sizeof(s->buffer_start);
716    s->read_from_callbacks = 1;
717    s->img_buffer_original = s->buffer_start;
718    stbi__refill_buffer(s);
719    s->img_buffer_original_end = s->img_buffer_end;
720 }
721 
722 #ifndef STBI_NO_STDIO
723 
stbi__stdio_read(void * user,char * data,int size)724 static int stbi__stdio_read(void *user, char *data, int size)
725 {
726    return (int) fread(data,1,size,(FILE*) user);
727 }
728 
stbi__stdio_skip(void * user,int n)729 static void stbi__stdio_skip(void *user, int n)
730 {
731    fseek((FILE*) user, n, SEEK_CUR);
732 }
733 
stbi__stdio_eof(void * user)734 static int stbi__stdio_eof(void *user)
735 {
736    return feof((FILE*) user);
737 }
738 
739 static stbi_io_callbacks stbi__stdio_callbacks =
740 {
741    stbi__stdio_read,
742    stbi__stdio_skip,
743    stbi__stdio_eof,
744 };
745 
stbi__start_file(stbi__context * s,FILE * f)746 static void stbi__start_file(stbi__context *s, FILE *f)
747 {
748    stbi__start_callbacks(s, &stbi__stdio_callbacks, (void *) f);
749 }
750 
751 //static void stop_file(stbi__context *s) { }
752 
753 #endif // !STBI_NO_STDIO
754 
stbi__rewind(stbi__context * s)755 static void stbi__rewind(stbi__context *s)
756 {
757    // conceptually rewind SHOULD rewind to the beginning of the stream,
758    // but we just rewind to the beginning of the initial buffer, because
759    // we only use it after doing 'test', which only ever looks at at most 92 bytes
760    s->img_buffer = s->img_buffer_original;
761    s->img_buffer_end = s->img_buffer_original_end;
762 }
763 
764 enum
765 {
766    STBI_ORDER_RGB,
767    STBI_ORDER_BGR
768 };
769 
770 typedef struct
771 {
772    int bits_per_channel;
773    int num_channels;
774    int channel_order;
775 } stbi__result_info;
776 
777 #ifndef STBI_NO_JPEG
778 static int      stbi__jpeg_test(stbi__context *s);
779 static void    *stbi__jpeg_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
780 static int      stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp);
781 #endif
782 
783 #ifndef STBI_NO_PNG
784 static int      stbi__png_test(stbi__context *s);
785 static void    *stbi__png_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
786 static int      stbi__png_info(stbi__context *s, int *x, int *y, int *comp);
787 #endif
788 
789 #ifndef STBI_NO_BMP
790 static int      stbi__bmp_test(stbi__context *s);
791 static void    *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
792 static int      stbi__bmp_info(stbi__context *s, int *x, int *y, int *comp);
793 #endif
794 
795 #ifndef STBI_NO_TGA
796 static int      stbi__tga_test(stbi__context *s);
797 static void    *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
798 static int      stbi__tga_info(stbi__context *s, int *x, int *y, int *comp);
799 #endif
800 
801 #ifndef STBI_NO_PSD
802 static int      stbi__psd_test(stbi__context *s);
803 static void    *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc);
804 static int      stbi__psd_info(stbi__context *s, int *x, int *y, int *comp);
805 #endif
806 
807 #ifndef STBI_NO_HDR
808 static int      stbi__hdr_test(stbi__context *s);
809 static float   *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
810 static int      stbi__hdr_info(stbi__context *s, int *x, int *y, int *comp);
811 #endif
812 
813 #ifndef STBI_NO_PIC
814 static int      stbi__pic_test(stbi__context *s);
815 static void    *stbi__pic_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
816 static int      stbi__pic_info(stbi__context *s, int *x, int *y, int *comp);
817 #endif
818 
819 #ifndef STBI_NO_GIF
820 static int      stbi__gif_test(stbi__context *s);
821 static void    *stbi__gif_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
822 static int      stbi__gif_info(stbi__context *s, int *x, int *y, int *comp);
823 #endif
824 
825 #ifndef STBI_NO_PNM
826 static int      stbi__pnm_test(stbi__context *s);
827 static void    *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
828 static int      stbi__pnm_info(stbi__context *s, int *x, int *y, int *comp);
829 #endif
830 
831 // this is not threadsafe
832 static const char *stbi__g_failure_reason;
833 
stbi_failure_reason(void)834 STBIDEF const char *stbi_failure_reason(void)
835 {
836    return stbi__g_failure_reason;
837 }
838 
stbi__err(const char * str)839 static int stbi__err(const char *str)
840 {
841    stbi__g_failure_reason = str;
842    return 0;
843 }
844 
stbi__malloc(size_t size)845 static void *stbi__malloc(size_t size)
846 {
847     return STBI_MALLOC(size);
848 }
849 
850 // stb_image uses ints pervasively, including for offset calculations.
851 // therefore the largest decoded image size we can support with the
852 // current code, even on 64-bit targets, is INT_MAX. this is not a
853 // significant limitation for the intended use case.
854 //
855 // we do, however, need to make sure our size calculations don't
856 // overflow. hence a few helper functions for size calculations that
857 // multiply integers together, making sure that they're non-negative
858 // and no overflow occurs.
859 
860 // return 1 if the sum is valid, 0 on overflow.
861 // negative terms are considered invalid.
stbi__addsizes_valid(int a,int b)862 static int stbi__addsizes_valid(int a, int b)
863 {
864    if (b < 0) return 0;
865    // now 0 <= b <= INT_MAX, hence also
866    // 0 <= INT_MAX - b <= INTMAX.
867    // And "a + b <= INT_MAX" (which might overflow) is the
868    // same as a <= INT_MAX - b (no overflow)
869    return a <= INT_MAX - b;
870 }
871 
872 // returns 1 if the product is valid, 0 on overflow.
873 // negative factors are considered invalid.
stbi__mul2sizes_valid(int a,int b)874 static int stbi__mul2sizes_valid(int a, int b)
875 {
876    if (a < 0 || b < 0) return 0;
877    if (b == 0) return 1; // mul-by-0 is always safe
878    // portable way to check for no overflows in a*b
879    return a <= INT_MAX/b;
880 }
881 
882 // returns 1 if "a*b + add" has no negative terms/factors and doesn't overflow
stbi__mad2sizes_valid(int a,int b,int add)883 static int stbi__mad2sizes_valid(int a, int b, int add)
884 {
885    return stbi__mul2sizes_valid(a, b) && stbi__addsizes_valid(a*b, add);
886 }
887 
888 // returns 1 if "a*b*c + add" has no negative terms/factors and doesn't overflow
stbi__mad3sizes_valid(int a,int b,int c,int add)889 static int stbi__mad3sizes_valid(int a, int b, int c, int add)
890 {
891    return stbi__mul2sizes_valid(a, b) && stbi__mul2sizes_valid(a*b, c) &&
892       stbi__addsizes_valid(a*b*c, add);
893 }
894 
895 // returns 1 if "a*b*c*d + add" has no negative terms/factors and doesn't overflow
stbi__mad4sizes_valid(int a,int b,int c,int d,int add)896 static int stbi__mad4sizes_valid(int a, int b, int c, int d, int add)
897 {
898    return stbi__mul2sizes_valid(a, b) && stbi__mul2sizes_valid(a*b, c) &&
899       stbi__mul2sizes_valid(a*b*c, d) && stbi__addsizes_valid(a*b*c*d, add);
900 }
901 
902 // mallocs with size overflow checking
stbi__malloc_mad2(int a,int b,int add)903 static void *stbi__malloc_mad2(int a, int b, int add)
904 {
905    if (!stbi__mad2sizes_valid(a, b, add)) return NULL;
906    return stbi__malloc(a*b + add);
907 }
908 
stbi__malloc_mad3(int a,int b,int c,int add)909 static void *stbi__malloc_mad3(int a, int b, int c, int add)
910 {
911    if (!stbi__mad3sizes_valid(a, b, c, add)) return NULL;
912    return stbi__malloc(a*b*c + add);
913 }
914 
stbi__malloc_mad4(int a,int b,int c,int d,int add)915 static void *stbi__malloc_mad4(int a, int b, int c, int d, int add)
916 {
917    if (!stbi__mad4sizes_valid(a, b, c, d, add)) return NULL;
918    return stbi__malloc(a*b*c*d + add);
919 }
920 
921 // stbi__err - error
922 // stbi__errpf - error returning pointer to float
923 // stbi__errpuc - error returning pointer to unsigned char
924 
925 #ifdef STBI_NO_FAILURE_STRINGS
926    #define stbi__err(x,y)  0
927 #elif defined(STBI_FAILURE_USERMSG)
928    #define stbi__err(x,y)  stbi__err(y)
929 #else
930    #define stbi__err(x,y)  stbi__err(x)
931 #endif
932 
933 #define stbi__errpf(x,y)   ((float *)(size_t) (stbi__err(x,y)?NULL:NULL))
934 #define stbi__errpuc(x,y)  ((unsigned char *)(size_t) (stbi__err(x,y)?NULL:NULL))
935 
stbi_image_free(void * retval_from_stbi_load)936 STBIDEF void stbi_image_free(void *retval_from_stbi_load)
937 {
938    STBI_FREE(retval_from_stbi_load);
939 }
940 
941 #ifndef STBI_NO_LINEAR
942 static float   *stbi__ldr_to_hdr(stbi_uc *data, int x, int y, int comp);
943 #endif
944 
945 #ifndef STBI_NO_HDR
946 static stbi_uc *stbi__hdr_to_ldr(float   *data, int x, int y, int comp);
947 #endif
948 
949 static int stbi__vertically_flip_on_load = 0;
950 
stbi_set_flip_vertically_on_load(int flag_true_if_should_flip)951 STBIDEF void stbi_set_flip_vertically_on_load(int flag_true_if_should_flip)
952 {
953     stbi__vertically_flip_on_load = flag_true_if_should_flip;
954 }
955 
stbi__load_main(stbi__context * s,int * x,int * y,int * comp,int req_comp,stbi__result_info * ri,int bpc)956 static void *stbi__load_main(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc)
957 {
958    memset(ri, 0, sizeof(*ri)); // make sure it's initialized if we add new fields
959    ri->bits_per_channel = 8;   // default is 8 so most paths don't have to be changed
960    ri->channel_order = STBI_ORDER_RGB; // all current input & output are this, but this is here so we can add BGR order
961    ri->num_channels = 0;
962 
963    #ifndef STBI_NO_JPEG
964    if (stbi__jpeg_test(s)) return stbi__jpeg_load(s,x,y,comp,req_comp, ri);
965    #endif
966    #ifndef STBI_NO_PNG
967    if (stbi__png_test(s))  return stbi__png_load(s,x,y,comp,req_comp, ri);
968    #endif
969    #ifndef STBI_NO_BMP
970    if (stbi__bmp_test(s))  return stbi__bmp_load(s,x,y,comp,req_comp, ri);
971    #endif
972    #ifndef STBI_NO_GIF
973    if (stbi__gif_test(s))  return stbi__gif_load(s,x,y,comp,req_comp, ri);
974    #endif
975    #ifndef STBI_NO_PSD
976    if (stbi__psd_test(s))  return stbi__psd_load(s,x,y,comp,req_comp, ri, bpc);
977    #endif
978    #ifndef STBI_NO_PIC
979    if (stbi__pic_test(s))  return stbi__pic_load(s,x,y,comp,req_comp, ri);
980    #endif
981    #ifndef STBI_NO_PNM
982    if (stbi__pnm_test(s))  return stbi__pnm_load(s,x,y,comp,req_comp, ri);
983    #endif
984 
985    #ifndef STBI_NO_HDR
986    if (stbi__hdr_test(s)) {
987       float *hdr = stbi__hdr_load(s, x,y,comp,req_comp, ri);
988       return stbi__hdr_to_ldr(hdr, *x, *y, req_comp ? req_comp : *comp);
989    }
990    #endif
991 
992    #ifndef STBI_NO_TGA
993    // test tga last because it's a crappy test!
994    if (stbi__tga_test(s))
995       return stbi__tga_load(s,x,y,comp,req_comp, ri);
996    #endif
997 
998    return stbi__errpuc("unknown image type", "Image not of any known type, or corrupt");
999 }
1000 
stbi__convert_16_to_8(stbi__uint16 * orig,int w,int h,int channels)1001 static stbi_uc *stbi__convert_16_to_8(stbi__uint16 *orig, int w, int h, int channels)
1002 {
1003    int i;
1004    int img_len = w * h * channels;
1005    stbi_uc *reduced;
1006 
1007    reduced = (stbi_uc *) stbi__malloc(img_len);
1008    if (reduced == NULL) return stbi__errpuc("outofmem", "Out of memory");
1009 
1010    for (i = 0; i < img_len; ++i)
1011       reduced[i] = (stbi_uc)((orig[i] >> 8) & 0xFF); // top half of each byte is sufficient approx of 16->8 bit scaling
1012 
1013    STBI_FREE(orig);
1014    return reduced;
1015 }
1016 
stbi__convert_8_to_16(stbi_uc * orig,int w,int h,int channels)1017 static stbi__uint16 *stbi__convert_8_to_16(stbi_uc *orig, int w, int h, int channels)
1018 {
1019    int i;
1020    int img_len = w * h * channels;
1021    stbi__uint16 *enlarged;
1022 
1023    enlarged = (stbi__uint16 *) stbi__malloc(img_len*2);
1024    if (enlarged == NULL) return (stbi__uint16 *) stbi__errpuc("outofmem", "Out of memory");
1025 
1026    for (i = 0; i < img_len; ++i)
1027       enlarged[i] = (stbi__uint16)((orig[i] << 8) + orig[i]); // replicate to high and low byte, maps 0->0, 255->0xffff
1028 
1029    STBI_FREE(orig);
1030    return enlarged;
1031 }
1032 
stbi__vertical_flip(void * image,int w,int h,int bytes_per_pixel)1033 static void stbi__vertical_flip(void *image, int w, int h, int bytes_per_pixel)
1034 {
1035    int row;
1036    size_t bytes_per_row = (size_t)w * bytes_per_pixel;
1037    stbi_uc temp[2048];
1038    stbi_uc *bytes = (stbi_uc *)image;
1039 
1040    for (row = 0; row < (h>>1); row++) {
1041       stbi_uc *row0 = bytes + row*bytes_per_row;
1042       stbi_uc *row1 = bytes + (h - row - 1)*bytes_per_row;
1043       // swap row0 with row1
1044       size_t bytes_left = bytes_per_row;
1045       while (bytes_left) {
1046          size_t bytes_copy = (bytes_left < sizeof(temp)) ? bytes_left : sizeof(temp);
1047          memcpy(temp, row0, bytes_copy);
1048          memcpy(row0, row1, bytes_copy);
1049          memcpy(row1, temp, bytes_copy);
1050          row0 += bytes_copy;
1051          row1 += bytes_copy;
1052          bytes_left -= bytes_copy;
1053       }
1054    }
1055 }
1056 
stbi__load_and_postprocess_8bit(stbi__context * s,int * x,int * y,int * comp,int req_comp)1057 static unsigned char *stbi__load_and_postprocess_8bit(stbi__context *s, int *x, int *y, int *comp, int req_comp)
1058 {
1059    stbi__result_info ri;
1060    void *result = stbi__load_main(s, x, y, comp, req_comp, &ri, 8);
1061 
1062    if (result == NULL)
1063       return NULL;
1064 
1065    if (ri.bits_per_channel != 8) {
1066       STBI_ASSERT(ri.bits_per_channel == 16);
1067       result = stbi__convert_16_to_8((stbi__uint16 *) result, *x, *y, req_comp == 0 ? *comp : req_comp);
1068       ri.bits_per_channel = 8;
1069    }
1070 
1071    // @TODO: move stbi__convert_format to here
1072 
1073    if (stbi__vertically_flip_on_load) {
1074       int channels = req_comp ? req_comp : *comp;
1075       stbi__vertical_flip(result, *x, *y, channels * sizeof(stbi_uc));
1076    }
1077 
1078    return (unsigned char *) result;
1079 }
1080 
stbi__load_and_postprocess_16bit(stbi__context * s,int * x,int * y,int * comp,int req_comp)1081 static stbi__uint16 *stbi__load_and_postprocess_16bit(stbi__context *s, int *x, int *y, int *comp, int req_comp)
1082 {
1083    stbi__result_info ri;
1084    void *result = stbi__load_main(s, x, y, comp, req_comp, &ri, 16);
1085 
1086    if (result == NULL)
1087       return NULL;
1088 
1089    if (ri.bits_per_channel != 16) {
1090       STBI_ASSERT(ri.bits_per_channel == 8);
1091       result = stbi__convert_8_to_16((stbi_uc *) result, *x, *y, req_comp == 0 ? *comp : req_comp);
1092       ri.bits_per_channel = 16;
1093    }
1094 
1095    // @TODO: move stbi__convert_format16 to here
1096    // @TODO: special case RGB-to-Y (and RGBA-to-YA) for 8-bit-to-16-bit case to keep more precision
1097 
1098    if (stbi__vertically_flip_on_load) {
1099       int channels = req_comp ? req_comp : *comp;
1100       stbi__vertical_flip(result, *x, *y, channels * sizeof(stbi__uint16));
1101    }
1102 
1103    return (stbi__uint16 *) result;
1104 }
1105 
1106 #ifndef STBI_NO_HDR
stbi__float_postprocess(float * result,int * x,int * y,int * comp,int req_comp)1107 static void stbi__float_postprocess(float *result, int *x, int *y, int *comp, int req_comp)
1108 {
1109    if (stbi__vertically_flip_on_load && result != NULL) {
1110       int channels = req_comp ? req_comp : *comp;
1111       stbi__vertical_flip(result, *x, *y, channels * sizeof(float));
1112    }
1113 }
1114 #endif
1115 
1116 #ifndef STBI_NO_STDIO
1117 
stbi__fopen(char const * filename,char const * mode)1118 static FILE *stbi__fopen(char const *filename, char const *mode)
1119 {
1120    FILE *f;
1121 #if defined(_MSC_VER) && _MSC_VER >= 1400
1122    if (0 != fopen_s(&f, filename, mode))
1123       f=0;
1124 #else
1125    f = fopen(filename, mode);
1126 #endif
1127    return f;
1128 }
1129 
1130 
stbi_load(char const * filename,int * x,int * y,int * comp,int req_comp)1131 STBIDEF stbi_uc *stbi_load(char const *filename, int *x, int *y, int *comp, int req_comp)
1132 {
1133    FILE *f = stbi__fopen(filename, "rb");
1134    unsigned char *result;
1135    if (!f) return stbi__errpuc("can't fopen", "Unable to open file");
1136    result = stbi_load_from_file(f,x,y,comp,req_comp);
1137    fclose(f);
1138    return result;
1139 }
1140 
stbi_load_from_file(FILE * f,int * x,int * y,int * comp,int req_comp)1141 STBIDEF stbi_uc *stbi_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
1142 {
1143    unsigned char *result;
1144    stbi__context s;
1145    stbi__start_file(&s,f);
1146    result = stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp);
1147    if (result) {
1148       // need to 'unget' all the characters in the IO buffer
1149       fseek(f, - (int) (s.img_buffer_end - s.img_buffer), SEEK_CUR);
1150    }
1151    return result;
1152 }
1153 
stbi_load_from_file_16(FILE * f,int * x,int * y,int * comp,int req_comp)1154 STBIDEF stbi__uint16 *stbi_load_from_file_16(FILE *f, int *x, int *y, int *comp, int req_comp)
1155 {
1156    stbi__uint16 *result;
1157    stbi__context s;
1158    stbi__start_file(&s,f);
1159    result = stbi__load_and_postprocess_16bit(&s,x,y,comp,req_comp);
1160    if (result) {
1161       // need to 'unget' all the characters in the IO buffer
1162       fseek(f, - (int) (s.img_buffer_end - s.img_buffer), SEEK_CUR);
1163    }
1164    return result;
1165 }
1166 
stbi_load_16(char const * filename,int * x,int * y,int * comp,int req_comp)1167 STBIDEF stbi_us *stbi_load_16(char const *filename, int *x, int *y, int *comp, int req_comp)
1168 {
1169    FILE *f = stbi__fopen(filename, "rb");
1170    stbi__uint16 *result;
1171    if (!f) return (stbi_us *) stbi__errpuc("can't fopen", "Unable to open file");
1172    result = stbi_load_from_file_16(f,x,y,comp,req_comp);
1173    fclose(f);
1174    return result;
1175 }
1176 
1177 
1178 #endif //!STBI_NO_STDIO
1179 
stbi_load_16_from_memory(stbi_uc const * buffer,int len,int * x,int * y,int * channels_in_file,int desired_channels)1180 STBIDEF stbi_us *stbi_load_16_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels)
1181 {
1182    stbi__context s;
1183    stbi__start_mem(&s,buffer,len);
1184    return stbi__load_and_postprocess_16bit(&s,x,y,channels_in_file,desired_channels);
1185 }
1186 
stbi_load_16_from_callbacks(stbi_io_callbacks const * clbk,void * user,int * x,int * y,int * channels_in_file,int desired_channels)1187 STBIDEF stbi_us *stbi_load_16_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *channels_in_file, int desired_channels)
1188 {
1189    stbi__context s;
1190    stbi__start_callbacks(&s, (stbi_io_callbacks *)clbk, user);
1191    return stbi__load_and_postprocess_16bit(&s,x,y,channels_in_file,desired_channels);
1192 }
1193 
stbi_load_from_memory(stbi_uc const * buffer,int len,int * x,int * y,int * comp,int req_comp)1194 STBIDEF stbi_uc *stbi_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
1195 {
1196    stbi__context s;
1197    stbi__start_mem(&s,buffer,len);
1198    return stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp);
1199 }
1200 
stbi_load_from_callbacks(stbi_io_callbacks const * clbk,void * user,int * x,int * y,int * comp,int req_comp)1201 STBIDEF stbi_uc *stbi_load_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp)
1202 {
1203    stbi__context s;
1204    stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user);
1205    return stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp);
1206 }
1207 
1208 #ifndef STBI_NO_LINEAR
stbi__loadf_main(stbi__context * s,int * x,int * y,int * comp,int req_comp)1209 static float *stbi__loadf_main(stbi__context *s, int *x, int *y, int *comp, int req_comp)
1210 {
1211    unsigned char *data;
1212    #ifndef STBI_NO_HDR
1213    if (stbi__hdr_test(s)) {
1214       stbi__result_info ri;
1215       float *hdr_data = stbi__hdr_load(s,x,y,comp,req_comp, &ri);
1216       if (hdr_data)
1217          stbi__float_postprocess(hdr_data,x,y,comp,req_comp);
1218       return hdr_data;
1219    }
1220    #endif
1221    data = stbi__load_and_postprocess_8bit(s, x, y, comp, req_comp);
1222    if (data)
1223       return stbi__ldr_to_hdr(data, *x, *y, req_comp ? req_comp : *comp);
1224    return stbi__errpf("unknown image type", "Image not of any known type, or corrupt");
1225 }
1226 
stbi_loadf_from_memory(stbi_uc const * buffer,int len,int * x,int * y,int * comp,int req_comp)1227 STBIDEF float *stbi_loadf_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
1228 {
1229    stbi__context s;
1230    stbi__start_mem(&s,buffer,len);
1231    return stbi__loadf_main(&s,x,y,comp,req_comp);
1232 }
1233 
stbi_loadf_from_callbacks(stbi_io_callbacks const * clbk,void * user,int * x,int * y,int * comp,int req_comp)1234 STBIDEF float *stbi_loadf_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp)
1235 {
1236    stbi__context s;
1237    stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user);
1238    return stbi__loadf_main(&s,x,y,comp,req_comp);
1239 }
1240 
1241 #ifndef STBI_NO_STDIO
stbi_loadf(char const * filename,int * x,int * y,int * comp,int req_comp)1242 STBIDEF float *stbi_loadf(char const *filename, int *x, int *y, int *comp, int req_comp)
1243 {
1244    float *result;
1245    FILE *f = stbi__fopen(filename, "rb");
1246    if (!f) return stbi__errpf("can't fopen", "Unable to open file");
1247    result = stbi_loadf_from_file(f,x,y,comp,req_comp);
1248    fclose(f);
1249    return result;
1250 }
1251 
stbi_loadf_from_file(FILE * f,int * x,int * y,int * comp,int req_comp)1252 STBIDEF float *stbi_loadf_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
1253 {
1254    stbi__context s;
1255    stbi__start_file(&s,f);
1256    return stbi__loadf_main(&s,x,y,comp,req_comp);
1257 }
1258 #endif // !STBI_NO_STDIO
1259 
1260 #endif // !STBI_NO_LINEAR
1261 
1262 // these is-hdr-or-not is defined independent of whether STBI_NO_LINEAR is
1263 // defined, for API simplicity; if STBI_NO_LINEAR is defined, it always
1264 // reports false!
1265 
stbi_is_hdr_from_memory(stbi_uc const * buffer,int len)1266 STBIDEF int stbi_is_hdr_from_memory(stbi_uc const *buffer, int len)
1267 {
1268    #ifndef STBI_NO_HDR
1269    stbi__context s;
1270    stbi__start_mem(&s,buffer,len);
1271    return stbi__hdr_test(&s);
1272    #else
1273    STBI_NOTUSED(buffer);
1274    STBI_NOTUSED(len);
1275    return 0;
1276    #endif
1277 }
1278 
1279 #ifndef STBI_NO_STDIO
stbi_is_hdr(char const * filename)1280 STBIDEF int      stbi_is_hdr          (char const *filename)
1281 {
1282    FILE *f = stbi__fopen(filename, "rb");
1283    int result=0;
1284    if (f) {
1285       result = stbi_is_hdr_from_file(f);
1286       fclose(f);
1287    }
1288    return result;
1289 }
1290 
stbi_is_hdr_from_file(FILE * f)1291 STBIDEF int      stbi_is_hdr_from_file(FILE *f)
1292 {
1293    #ifndef STBI_NO_HDR
1294    stbi__context s;
1295    stbi__start_file(&s,f);
1296    return stbi__hdr_test(&s);
1297    #else
1298    STBI_NOTUSED(f);
1299    return 0;
1300    #endif
1301 }
1302 #endif // !STBI_NO_STDIO
1303 
stbi_is_hdr_from_callbacks(stbi_io_callbacks const * clbk,void * user)1304 STBIDEF int      stbi_is_hdr_from_callbacks(stbi_io_callbacks const *clbk, void *user)
1305 {
1306    #ifndef STBI_NO_HDR
1307    stbi__context s;
1308    stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user);
1309    return stbi__hdr_test(&s);
1310    #else
1311    STBI_NOTUSED(clbk);
1312    STBI_NOTUSED(user);
1313    return 0;
1314    #endif
1315 }
1316 
1317 #ifndef STBI_NO_LINEAR
1318 static float stbi__l2h_gamma=2.2f, stbi__l2h_scale=1.0f;
1319 
stbi_ldr_to_hdr_gamma(float gamma)1320 STBIDEF void   stbi_ldr_to_hdr_gamma(float gamma) { stbi__l2h_gamma = gamma; }
stbi_ldr_to_hdr_scale(float scale)1321 STBIDEF void   stbi_ldr_to_hdr_scale(float scale) { stbi__l2h_scale = scale; }
1322 #endif
1323 
1324 static float stbi__h2l_gamma_i=1.0f/2.2f, stbi__h2l_scale_i=1.0f;
1325 
stbi_hdr_to_ldr_gamma(float gamma)1326 STBIDEF void   stbi_hdr_to_ldr_gamma(float gamma) { stbi__h2l_gamma_i = 1/gamma; }
stbi_hdr_to_ldr_scale(float scale)1327 STBIDEF void   stbi_hdr_to_ldr_scale(float scale) { stbi__h2l_scale_i = 1/scale; }
1328 
1329 
1330 //////////////////////////////////////////////////////////////////////////////
1331 //
1332 // Common code used by all image loaders
1333 //
1334 
1335 enum
1336 {
1337    STBI__SCAN_load=0,
1338    STBI__SCAN_type,
1339    STBI__SCAN_header
1340 };
1341 
stbi__refill_buffer(stbi__context * s)1342 static void stbi__refill_buffer(stbi__context *s)
1343 {
1344    int n = (s->io.read)(s->io_user_data,(char*)s->buffer_start,s->buflen);
1345    if (n == 0) {
1346       // at end of file, treat same as if from memory, but need to handle case
1347       // where s->img_buffer isn't pointing to safe memory, e.g. 0-byte file
1348       s->read_from_callbacks = 0;
1349       s->img_buffer = s->buffer_start;
1350       s->img_buffer_end = s->buffer_start+1;
1351       *s->img_buffer = 0;
1352    } else {
1353       s->img_buffer = s->buffer_start;
1354       s->img_buffer_end = s->buffer_start + n;
1355    }
1356 }
1357 
stbi__get8(stbi__context * s)1358 stbi_inline static stbi_uc stbi__get8(stbi__context *s)
1359 {
1360    if (s->img_buffer < s->img_buffer_end)
1361       return *s->img_buffer++;
1362    if (s->read_from_callbacks) {
1363       stbi__refill_buffer(s);
1364       return *s->img_buffer++;
1365    }
1366    return 0;
1367 }
1368 
stbi__at_eof(stbi__context * s)1369 stbi_inline static int stbi__at_eof(stbi__context *s)
1370 {
1371    if (s->io.read) {
1372       if (!(s->io.eof)(s->io_user_data)) return 0;
1373       // if feof() is true, check if buffer = end
1374       // special case: we've only got the special 0 character at the end
1375       if (s->read_from_callbacks == 0) return 1;
1376    }
1377 
1378    return s->img_buffer >= s->img_buffer_end;
1379 }
1380 
stbi__skip(stbi__context * s,int n)1381 static void stbi__skip(stbi__context *s, int n)
1382 {
1383    if (n < 0) {
1384       s->img_buffer = s->img_buffer_end;
1385       return;
1386    }
1387    if (s->io.read) {
1388       int blen = (int) (s->img_buffer_end - s->img_buffer);
1389       if (blen < n) {
1390          s->img_buffer = s->img_buffer_end;
1391          (s->io.skip)(s->io_user_data, n - blen);
1392          return;
1393       }
1394    }
1395    s->img_buffer += n;
1396 }
1397 
stbi__getn(stbi__context * s,stbi_uc * buffer,int n)1398 static int stbi__getn(stbi__context *s, stbi_uc *buffer, int n)
1399 {
1400    if (s->io.read) {
1401       int blen = (int) (s->img_buffer_end - s->img_buffer);
1402       if (blen < n) {
1403          int res, count;
1404 
1405          memcpy(buffer, s->img_buffer, blen);
1406 
1407          count = (s->io.read)(s->io_user_data, (char*) buffer + blen, n - blen);
1408          res = (count == (n-blen));
1409          s->img_buffer = s->img_buffer_end;
1410          return res;
1411       }
1412    }
1413 
1414    if (s->img_buffer+n <= s->img_buffer_end) {
1415       memcpy(buffer, s->img_buffer, n);
1416       s->img_buffer += n;
1417       return 1;
1418    } else
1419       return 0;
1420 }
1421 
stbi__get16be(stbi__context * s)1422 static int stbi__get16be(stbi__context *s)
1423 {
1424    int z = stbi__get8(s);
1425    return (z << 8) + stbi__get8(s);
1426 }
1427 
stbi__get32be(stbi__context * s)1428 static stbi__uint32 stbi__get32be(stbi__context *s)
1429 {
1430    stbi__uint32 z = stbi__get16be(s);
1431    return (z << 16) + stbi__get16be(s);
1432 }
1433 
1434 #if defined(STBI_NO_BMP) && defined(STBI_NO_TGA) && defined(STBI_NO_GIF)
1435 // nothing
1436 #else
stbi__get16le(stbi__context * s)1437 static int stbi__get16le(stbi__context *s)
1438 {
1439    int z = stbi__get8(s);
1440    return z + (stbi__get8(s) << 8);
1441 }
1442 #endif
1443 
1444 #ifndef STBI_NO_BMP
stbi__get32le(stbi__context * s)1445 static stbi__uint32 stbi__get32le(stbi__context *s)
1446 {
1447    stbi__uint32 z = stbi__get16le(s);
1448    return z + (stbi__get16le(s) << 16);
1449 }
1450 #endif
1451 
1452 #define STBI__BYTECAST(x)  ((stbi_uc) ((x) & 255))  // truncate int to byte without warnings
1453 
1454 
1455 //////////////////////////////////////////////////////////////////////////////
1456 //
1457 //  generic converter from built-in img_n to req_comp
1458 //    individual types do this automatically as much as possible (e.g. jpeg
1459 //    does all cases internally since it needs to colorspace convert anyway,
1460 //    and it never has alpha, so very few cases ). png can automatically
1461 //    interleave an alpha=255 channel, but falls back to this for other cases
1462 //
1463 //  assume data buffer is malloced, so malloc a new one and free that one
1464 //  only failure mode is malloc failing
1465 
stbi__compute_y(int r,int g,int b)1466 static stbi_uc stbi__compute_y(int r, int g, int b)
1467 {
1468    return (stbi_uc) (((r*77) + (g*150) +  (29*b)) >> 8);
1469 }
1470 
stbi__convert_format(unsigned char * data,int img_n,int req_comp,unsigned int x,unsigned int y)1471 static unsigned char *stbi__convert_format(unsigned char *data, int img_n, int req_comp, unsigned int x, unsigned int y)
1472 {
1473    int i,j;
1474    unsigned char *good;
1475 
1476    if (req_comp == img_n) return data;
1477    STBI_ASSERT(req_comp >= 1 && req_comp <= 4);
1478 
1479    good = (unsigned char *) stbi__malloc_mad3(req_comp, x, y, 0);
1480    if (good == NULL) {
1481       STBI_FREE(data);
1482       return stbi__errpuc("outofmem", "Out of memory");
1483    }
1484 
1485    for (j=0; j < (int) y; ++j) {
1486       unsigned char *src  = data + j * x * img_n   ;
1487       unsigned char *dest = good + j * x * req_comp;
1488 
1489       #define STBI__COMBO(a,b)  ((a)*8+(b))
1490       #define STBI__CASE(a,b)   case STBI__COMBO(a,b): for(i=x-1; i >= 0; --i, src += a, dest += b)
1491       // convert source image with img_n components to one with req_comp components;
1492       // avoid switch per pixel, so use switch per scanline and massive macros
1493       switch (STBI__COMBO(img_n, req_comp)) {
1494          STBI__CASE(1,2) { dest[0]=src[0], dest[1]=255;                                     } break;
1495          STBI__CASE(1,3) { dest[0]=dest[1]=dest[2]=src[0];                                  } break;
1496          STBI__CASE(1,4) { dest[0]=dest[1]=dest[2]=src[0], dest[3]=255;                     } break;
1497          STBI__CASE(2,1) { dest[0]=src[0];                                                  } break;
1498          STBI__CASE(2,3) { dest[0]=dest[1]=dest[2]=src[0];                                  } break;
1499          STBI__CASE(2,4) { dest[0]=dest[1]=dest[2]=src[0], dest[3]=src[1];                  } break;
1500          STBI__CASE(3,4) { dest[0]=src[0],dest[1]=src[1],dest[2]=src[2],dest[3]=255;        } break;
1501          STBI__CASE(3,1) { dest[0]=stbi__compute_y(src[0],src[1],src[2]);                   } break;
1502          STBI__CASE(3,2) { dest[0]=stbi__compute_y(src[0],src[1],src[2]), dest[1] = 255;    } break;
1503          STBI__CASE(4,1) { dest[0]=stbi__compute_y(src[0],src[1],src[2]);                   } break;
1504          STBI__CASE(4,2) { dest[0]=stbi__compute_y(src[0],src[1],src[2]), dest[1] = src[3]; } break;
1505          STBI__CASE(4,3) { dest[0]=src[0],dest[1]=src[1],dest[2]=src[2];                    } break;
1506          default: STBI_ASSERT(0);
1507       }
1508       #undef STBI__CASE
1509    }
1510 
1511    STBI_FREE(data);
1512    return good;
1513 }
1514 
stbi__compute_y_16(int r,int g,int b)1515 static stbi__uint16 stbi__compute_y_16(int r, int g, int b)
1516 {
1517    return (stbi__uint16) (((r*77) + (g*150) +  (29*b)) >> 8);
1518 }
1519 
stbi__convert_format16(stbi__uint16 * data,int img_n,int req_comp,unsigned int x,unsigned int y)1520 static stbi__uint16 *stbi__convert_format16(stbi__uint16 *data, int img_n, int req_comp, unsigned int x, unsigned int y)
1521 {
1522    int i,j;
1523    stbi__uint16 *good;
1524 
1525    if (req_comp == img_n) return data;
1526    STBI_ASSERT(req_comp >= 1 && req_comp <= 4);
1527 
1528    good = (stbi__uint16 *) stbi__malloc(req_comp * x * y * 2);
1529    if (good == NULL) {
1530       STBI_FREE(data);
1531       return (stbi__uint16 *) stbi__errpuc("outofmem", "Out of memory");
1532    }
1533 
1534    for (j=0; j < (int) y; ++j) {
1535       stbi__uint16 *src  = data + j * x * img_n   ;
1536       stbi__uint16 *dest = good + j * x * req_comp;
1537 
1538       #define STBI__COMBO(a,b)  ((a)*8+(b))
1539       #define STBI__CASE(a,b)   case STBI__COMBO(a,b): for(i=x-1; i >= 0; --i, src += a, dest += b)
1540       // convert source image with img_n components to one with req_comp components;
1541       // avoid switch per pixel, so use switch per scanline and massive macros
1542       switch (STBI__COMBO(img_n, req_comp)) {
1543          STBI__CASE(1,2) { dest[0]=src[0], dest[1]=0xffff;                                     } break;
1544          STBI__CASE(1,3) { dest[0]=dest[1]=dest[2]=src[0];                                     } break;
1545          STBI__CASE(1,4) { dest[0]=dest[1]=dest[2]=src[0], dest[3]=0xffff;                     } break;
1546          STBI__CASE(2,1) { dest[0]=src[0];                                                     } break;
1547          STBI__CASE(2,3) { dest[0]=dest[1]=dest[2]=src[0];                                     } break;
1548          STBI__CASE(2,4) { dest[0]=dest[1]=dest[2]=src[0], dest[3]=src[1];                     } break;
1549          STBI__CASE(3,4) { dest[0]=src[0],dest[1]=src[1],dest[2]=src[2],dest[3]=0xffff;        } break;
1550          STBI__CASE(3,1) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]);                   } break;
1551          STBI__CASE(3,2) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]), dest[1] = 0xffff; } break;
1552          STBI__CASE(4,1) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]);                   } break;
1553          STBI__CASE(4,2) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]), dest[1] = src[3]; } break;
1554          STBI__CASE(4,3) { dest[0]=src[0],dest[1]=src[1],dest[2]=src[2];                       } break;
1555          default: STBI_ASSERT(0);
1556       }
1557       #undef STBI__CASE
1558    }
1559 
1560    STBI_FREE(data);
1561    return good;
1562 }
1563 
1564 #ifndef STBI_NO_LINEAR
stbi__ldr_to_hdr(stbi_uc * data,int x,int y,int comp)1565 static float   *stbi__ldr_to_hdr(stbi_uc *data, int x, int y, int comp)
1566 {
1567    int i,k,n;
1568    float *output;
1569    if (!data) return NULL;
1570    output = (float *) stbi__malloc_mad4(x, y, comp, sizeof(float), 0);
1571    if (output == NULL) { STBI_FREE(data); return stbi__errpf("outofmem", "Out of memory"); }
1572    // compute number of non-alpha components
1573    if (comp & 1) n = comp; else n = comp-1;
1574    for (i=0; i < x*y; ++i) {
1575       for (k=0; k < n; ++k) {
1576          output[i*comp + k] = (float) (pow(data[i*comp+k]/255.0f, stbi__l2h_gamma) * stbi__l2h_scale);
1577       }
1578       if (k < comp) output[i*comp + k] = data[i*comp+k]/255.0f;
1579    }
1580    STBI_FREE(data);
1581    return output;
1582 }
1583 #endif
1584 
1585 #ifndef STBI_NO_HDR
1586 #define stbi__float2int(x)   ((int) (x))
stbi__hdr_to_ldr(float * data,int x,int y,int comp)1587 static stbi_uc *stbi__hdr_to_ldr(float   *data, int x, int y, int comp)
1588 {
1589    int i,k,n;
1590    stbi_uc *output;
1591    if (!data) return NULL;
1592    output = (stbi_uc *) stbi__malloc_mad3(x, y, comp, 0);
1593    if (output == NULL) { STBI_FREE(data); return stbi__errpuc("outofmem", "Out of memory"); }
1594    // compute number of non-alpha components
1595    if (comp & 1) n = comp; else n = comp-1;
1596    for (i=0; i < x*y; ++i) {
1597       for (k=0; k < n; ++k) {
1598          float z = (float) pow(data[i*comp+k]*stbi__h2l_scale_i, stbi__h2l_gamma_i) * 255 + 0.5f;
1599          if (z < 0) z = 0;
1600          if (z > 255) z = 255;
1601          output[i*comp + k] = (stbi_uc) stbi__float2int(z);
1602       }
1603       if (k < comp) {
1604          float z = data[i*comp+k] * 255 + 0.5f;
1605          if (z < 0) z = 0;
1606          if (z > 255) z = 255;
1607          output[i*comp + k] = (stbi_uc) stbi__float2int(z);
1608       }
1609    }
1610    STBI_FREE(data);
1611    return output;
1612 }
1613 #endif
1614 
1615 //////////////////////////////////////////////////////////////////////////////
1616 //
1617 //  "baseline" JPEG/JFIF decoder
1618 //
1619 //    simple implementation
1620 //      - doesn't support delayed output of y-dimension
1621 //      - simple interface (only one output format: 8-bit interleaved RGB)
1622 //      - doesn't try to recover corrupt jpegs
1623 //      - doesn't allow partial loading, loading multiple at once
1624 //      - still fast on x86 (copying globals into locals doesn't help x86)
1625 //      - allocates lots of intermediate memory (full size of all components)
1626 //        - non-interleaved case requires this anyway
1627 //        - allows good upsampling (see next)
1628 //    high-quality
1629 //      - upsampled channels are bilinearly interpolated, even across blocks
1630 //      - quality integer IDCT derived from IJG's 'slow'
1631 //    performance
1632 //      - fast huffman; reasonable integer IDCT
1633 //      - some SIMD kernels for common paths on targets with SSE2/NEON
1634 //      - uses a lot of intermediate memory, could cache poorly
1635 
1636 #ifndef STBI_NO_JPEG
1637 
1638 // huffman decoding acceleration
1639 #define FAST_BITS   9  // larger handles more cases; smaller stomps less cache
1640 
1641 typedef struct
1642 {
1643    stbi_uc  fast[1 << FAST_BITS];
1644    // weirdly, repacking this into AoS is a 10% speed loss, instead of a win
1645    stbi__uint16 code[256];
1646    stbi_uc  values[256];
1647    stbi_uc  size[257];
1648    unsigned int maxcode[18];
1649    int    delta[17];   // old 'firstsymbol' - old 'firstcode'
1650 } stbi__huffman;
1651 
1652 typedef struct
1653 {
1654    stbi__context *s;
1655    stbi__huffman huff_dc[4];
1656    stbi__huffman huff_ac[4];
1657    stbi__uint16 dequant[4][64];
1658    stbi__int16 fast_ac[4][1 << FAST_BITS];
1659 
1660 // sizes for components, interleaved MCUs
1661    int img_h_max, img_v_max;
1662    int img_mcu_x, img_mcu_y;
1663    int img_mcu_w, img_mcu_h;
1664 
1665 // definition of jpeg image component
1666    struct
1667    {
1668       int id;
1669       int h,v;
1670       int tq;
1671       int hd,ha;
1672       int dc_pred;
1673 
1674       int x,y,w2,h2;
1675       stbi_uc *data;
1676       void *raw_data, *raw_coeff;
1677       stbi_uc *linebuf;
1678       short   *coeff;   // progressive only
1679       int      coeff_w, coeff_h; // number of 8x8 coefficient blocks
1680    } img_comp[4];
1681 
1682    stbi__uint32   code_buffer; // jpeg entropy-coded buffer
1683    int            code_bits;   // number of valid bits
1684    unsigned char  marker;      // marker seen while filling entropy buffer
1685    int            nomore;      // flag if we saw a marker so must stop
1686 
1687    int            progressive;
1688    int            spec_start;
1689    int            spec_end;
1690    int            succ_high;
1691    int            succ_low;
1692    int            eob_run;
1693    int            jfif;
1694    int            app14_color_transform; // Adobe APP14 tag
1695    int            rgb;
1696 
1697    int scan_n, order[4];
1698    int restart_interval, todo;
1699 
1700 // kernels
1701    void (*idct_block_kernel)(stbi_uc *out, int out_stride, short data[64]);
1702    void (*YCbCr_to_RGB_kernel)(stbi_uc *out, const stbi_uc *y, const stbi_uc *pcb, const stbi_uc *pcr, int count, int step);
1703    stbi_uc *(*resample_row_hv_2_kernel)(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs);
1704 } stbi__jpeg;
1705 
stbi__build_huffman(stbi__huffman * h,int * count)1706 static int stbi__build_huffman(stbi__huffman *h, int *count)
1707 {
1708    int i,j,k=0,code;
1709    // build size list for each symbol (from JPEG spec)
1710    for (i=0; i < 16; ++i)
1711       for (j=0; j < count[i]; ++j)
1712          h->size[k++] = (stbi_uc) (i+1);
1713    h->size[k] = 0;
1714 
1715    // compute actual symbols (from jpeg spec)
1716    code = 0;
1717    k = 0;
1718    for(j=1; j <= 16; ++j) {
1719       // compute delta to add to code to compute symbol id
1720       h->delta[j] = k - code;
1721       if (h->size[k] == j) {
1722          while (h->size[k] == j)
1723             h->code[k++] = (stbi__uint16) (code++);
1724          if (code-1 >= (1 << j)) return stbi__err("bad code lengths","Corrupt JPEG");
1725       }
1726       // compute largest code + 1 for this size, preshifted as needed later
1727       h->maxcode[j] = code << (16-j);
1728       code <<= 1;
1729    }
1730    h->maxcode[j] = 0xffffffff;
1731 
1732    // build non-spec acceleration table; 255 is flag for not-accelerated
1733    memset(h->fast, 255, 1 << FAST_BITS);
1734    for (i=0; i < k; ++i) {
1735       int s = h->size[i];
1736       if (s <= FAST_BITS) {
1737          int c = h->code[i] << (FAST_BITS-s);
1738          int m = 1 << (FAST_BITS-s);
1739          for (j=0; j < m; ++j) {
1740             h->fast[c+j] = (stbi_uc) i;
1741          }
1742       }
1743    }
1744    return 1;
1745 }
1746 
1747 // build a table that decodes both magnitude and value of small ACs in
1748 // one go.
stbi__build_fast_ac(stbi__int16 * fast_ac,stbi__huffman * h)1749 static void stbi__build_fast_ac(stbi__int16 *fast_ac, stbi__huffman *h)
1750 {
1751    int i;
1752    for (i=0; i < (1 << FAST_BITS); ++i) {
1753       stbi_uc fast = h->fast[i];
1754       fast_ac[i] = 0;
1755       if (fast < 255) {
1756          int rs = h->values[fast];
1757          int run = (rs >> 4) & 15;
1758          int magbits = rs & 15;
1759          int len = h->size[fast];
1760 
1761          if (magbits && len + magbits <= FAST_BITS) {
1762             // magnitude code followed by receive_extend code
1763             int k = ((i << len) & ((1 << FAST_BITS) - 1)) >> (FAST_BITS - magbits);
1764             int m = 1 << (magbits - 1);
1765             if (k < m) k += (~0U << magbits) + 1;
1766             // if the result is small enough, we can fit it in fast_ac table
1767             if (k >= -128 && k <= 127)
1768                fast_ac[i] = (stbi__int16) ((k << 8) + (run << 4) + (len + magbits));
1769          }
1770       }
1771    }
1772 }
1773 
stbi__grow_buffer_unsafe(stbi__jpeg * j)1774 static void stbi__grow_buffer_unsafe(stbi__jpeg *j)
1775 {
1776    do {
1777       int b = j->nomore ? 0 : stbi__get8(j->s);
1778       if (b == 0xff) {
1779          int c = stbi__get8(j->s);
1780          while (c == 0xff) c = stbi__get8(j->s); // consume fill bytes
1781          if (c != 0) {
1782             j->marker = (unsigned char) c;
1783             j->nomore = 1;
1784             return;
1785          }
1786       }
1787       j->code_buffer |= b << (24 - j->code_bits);
1788       j->code_bits += 8;
1789    } while (j->code_bits <= 24);
1790 }
1791 
1792 // (1 << n) - 1
1793 static stbi__uint32 stbi__bmask[17]={0,1,3,7,15,31,63,127,255,511,1023,2047,4095,8191,16383,32767,65535};
1794 
1795 // decode a jpeg huffman value from the bitstream
stbi__jpeg_huff_decode(stbi__jpeg * j,stbi__huffman * h)1796 stbi_inline static int stbi__jpeg_huff_decode(stbi__jpeg *j, stbi__huffman *h)
1797 {
1798    unsigned int temp;
1799    int c,k;
1800 
1801    if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
1802 
1803    // look at the top FAST_BITS and determine what symbol ID it is,
1804    // if the code is <= FAST_BITS
1805    c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1);
1806    k = h->fast[c];
1807    if (k < 255) {
1808       int s = h->size[k];
1809       if (s > j->code_bits)
1810          return -1;
1811       j->code_buffer <<= s;
1812       j->code_bits -= s;
1813       return h->values[k];
1814    }
1815 
1816    // naive test is to shift the code_buffer down so k bits are
1817    // valid, then test against maxcode. To speed this up, we've
1818    // preshifted maxcode left so that it has (16-k) 0s at the
1819    // end; in other words, regardless of the number of bits, it
1820    // wants to be compared against something shifted to have 16;
1821    // that way we don't need to shift inside the loop.
1822    temp = j->code_buffer >> 16;
1823    for (k=FAST_BITS+1 ; ; ++k)
1824       if (temp < h->maxcode[k])
1825          break;
1826    if (k == 17) {
1827       // error! code not found
1828       j->code_bits -= 16;
1829       return -1;
1830    }
1831 
1832    if (k > j->code_bits)
1833       return -1;
1834 
1835    // convert the huffman code to the symbol id
1836    c = ((j->code_buffer >> (32 - k)) & stbi__bmask[k]) + h->delta[k];
1837    STBI_ASSERT((((j->code_buffer) >> (32 - h->size[c])) & stbi__bmask[h->size[c]]) == h->code[c]);
1838 
1839    // convert the id to a symbol
1840    j->code_bits -= k;
1841    j->code_buffer <<= k;
1842    return h->values[c];
1843 }
1844 
1845 // bias[n] = (-1<<n) + 1
1846 static int const stbi__jbias[16] = {0,-1,-3,-7,-15,-31,-63,-127,-255,-511,-1023,-2047,-4095,-8191,-16383,-32767};
1847 
1848 // combined JPEG 'receive' and JPEG 'extend', since baseline
1849 // always extends everything it receives.
stbi__extend_receive(stbi__jpeg * j,int n)1850 stbi_inline static int stbi__extend_receive(stbi__jpeg *j, int n)
1851 {
1852    unsigned int k;
1853    int sgn;
1854    if (j->code_bits < n) stbi__grow_buffer_unsafe(j);
1855 
1856    sgn = (stbi__int32)j->code_buffer >> 31; // sign bit is always in MSB
1857    k = stbi_lrot(j->code_buffer, n);
1858    STBI_ASSERT(n >= 0 && n < (int) (sizeof(stbi__bmask)/sizeof(*stbi__bmask)));
1859    j->code_buffer = k & ~stbi__bmask[n];
1860    k &= stbi__bmask[n];
1861    j->code_bits -= n;
1862    return k + (stbi__jbias[n] & ~sgn);
1863 }
1864 
1865 // get some unsigned bits
stbi__jpeg_get_bits(stbi__jpeg * j,int n)1866 stbi_inline static int stbi__jpeg_get_bits(stbi__jpeg *j, int n)
1867 {
1868    unsigned int k;
1869    if (j->code_bits < n) stbi__grow_buffer_unsafe(j);
1870    k = stbi_lrot(j->code_buffer, n);
1871    j->code_buffer = k & ~stbi__bmask[n];
1872    k &= stbi__bmask[n];
1873    j->code_bits -= n;
1874    return k;
1875 }
1876 
stbi__jpeg_get_bit(stbi__jpeg * j)1877 stbi_inline static int stbi__jpeg_get_bit(stbi__jpeg *j)
1878 {
1879    unsigned int k;
1880    if (j->code_bits < 1) stbi__grow_buffer_unsafe(j);
1881    k = j->code_buffer;
1882    j->code_buffer <<= 1;
1883    --j->code_bits;
1884    return k & 0x80000000;
1885 }
1886 
1887 // given a value that's at position X in the zigzag stream,
1888 // where does it appear in the 8x8 matrix coded as row-major?
1889 static stbi_uc stbi__jpeg_dezigzag[64+15] =
1890 {
1891     0,  1,  8, 16,  9,  2,  3, 10,
1892    17, 24, 32, 25, 18, 11,  4,  5,
1893    12, 19, 26, 33, 40, 48, 41, 34,
1894    27, 20, 13,  6,  7, 14, 21, 28,
1895    35, 42, 49, 56, 57, 50, 43, 36,
1896    29, 22, 15, 23, 30, 37, 44, 51,
1897    58, 59, 52, 45, 38, 31, 39, 46,
1898    53, 60, 61, 54, 47, 55, 62, 63,
1899    // let corrupt input sample past end
1900    63, 63, 63, 63, 63, 63, 63, 63,
1901    63, 63, 63, 63, 63, 63, 63
1902 };
1903 
1904 // decode one 64-entry block--
stbi__jpeg_decode_block(stbi__jpeg * j,short data[64],stbi__huffman * hdc,stbi__huffman * hac,stbi__int16 * fac,int b,stbi__uint16 * dequant)1905 static int stbi__jpeg_decode_block(stbi__jpeg *j, short data[64], stbi__huffman *hdc, stbi__huffman *hac, stbi__int16 *fac, int b, stbi__uint16 *dequant)
1906 {
1907    int diff,dc,k;
1908    int t;
1909 
1910    if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
1911    t = stbi__jpeg_huff_decode(j, hdc);
1912    if (t < 0) return stbi__err("bad huffman code","Corrupt JPEG");
1913 
1914    // 0 all the ac values now so we can do it 32-bits at a time
1915    memset(data,0,64*sizeof(data[0]));
1916 
1917    diff = t ? stbi__extend_receive(j, t) : 0;
1918    dc = j->img_comp[b].dc_pred + diff;
1919    j->img_comp[b].dc_pred = dc;
1920    data[0] = (short) (dc * dequant[0]);
1921 
1922    // decode AC components, see JPEG spec
1923    k = 1;
1924    do {
1925       unsigned int zig;
1926       int c,r,s;
1927       if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
1928       c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1);
1929       r = fac[c];
1930       if (r) { // fast-AC path
1931          k += (r >> 4) & 15; // run
1932          s = r & 15; // combined length
1933          j->code_buffer <<= s;
1934          j->code_bits -= s;
1935          // decode into unzigzag'd location
1936          zig = stbi__jpeg_dezigzag[k++];
1937          data[zig] = (short) ((r >> 8) * dequant[zig]);
1938       } else {
1939          int rs = stbi__jpeg_huff_decode(j, hac);
1940          if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG");
1941          s = rs & 15;
1942          r = rs >> 4;
1943          if (s == 0) {
1944             if (rs != 0xf0) break; // end block
1945             k += 16;
1946          } else {
1947             k += r;
1948             // decode into unzigzag'd location
1949             zig = stbi__jpeg_dezigzag[k++];
1950             data[zig] = (short) (stbi__extend_receive(j,s) * dequant[zig]);
1951          }
1952       }
1953    } while (k < 64);
1954    return 1;
1955 }
1956 
stbi__jpeg_decode_block_prog_dc(stbi__jpeg * j,short data[64],stbi__huffman * hdc,int b)1957 static int stbi__jpeg_decode_block_prog_dc(stbi__jpeg *j, short data[64], stbi__huffman *hdc, int b)
1958 {
1959    int diff,dc;
1960    int t;
1961    if (j->spec_end != 0) return stbi__err("can't merge dc and ac", "Corrupt JPEG");
1962 
1963    if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
1964 
1965    if (j->succ_high == 0) {
1966       // first scan for DC coefficient, must be first
1967       memset(data,0,64*sizeof(data[0])); // 0 all the ac values now
1968       t = stbi__jpeg_huff_decode(j, hdc);
1969       diff = t ? stbi__extend_receive(j, t) : 0;
1970 
1971       dc = j->img_comp[b].dc_pred + diff;
1972       j->img_comp[b].dc_pred = dc;
1973       data[0] = (short) (dc << j->succ_low);
1974    } else {
1975       // refinement scan for DC coefficient
1976       if (stbi__jpeg_get_bit(j))
1977          data[0] += (short) (1 << j->succ_low);
1978    }
1979    return 1;
1980 }
1981 
1982 // @OPTIMIZE: store non-zigzagged during the decode passes,
1983 // and only de-zigzag when dequantizing
stbi__jpeg_decode_block_prog_ac(stbi__jpeg * j,short data[64],stbi__huffman * hac,stbi__int16 * fac)1984 static int stbi__jpeg_decode_block_prog_ac(stbi__jpeg *j, short data[64], stbi__huffman *hac, stbi__int16 *fac)
1985 {
1986    int k;
1987    if (j->spec_start == 0) return stbi__err("can't merge dc and ac", "Corrupt JPEG");
1988 
1989    if (j->succ_high == 0) {
1990       int shift = j->succ_low;
1991 
1992       if (j->eob_run) {
1993          --j->eob_run;
1994          return 1;
1995       }
1996 
1997       k = j->spec_start;
1998       do {
1999          unsigned int zig;
2000          int c,r,s;
2001          if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
2002          c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1);
2003          r = fac[c];
2004          if (r) { // fast-AC path
2005             k += (r >> 4) & 15; // run
2006             s = r & 15; // combined length
2007             j->code_buffer <<= s;
2008             j->code_bits -= s;
2009             zig = stbi__jpeg_dezigzag[k++];
2010             data[zig] = (short) ((r >> 8) << shift);
2011          } else {
2012             int rs = stbi__jpeg_huff_decode(j, hac);
2013             if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG");
2014             s = rs & 15;
2015             r = rs >> 4;
2016             if (s == 0) {
2017                if (r < 15) {
2018                   j->eob_run = (1 << r);
2019                   if (r)
2020                      j->eob_run += stbi__jpeg_get_bits(j, r);
2021                   --j->eob_run;
2022                   break;
2023                }
2024                k += 16;
2025             } else {
2026                k += r;
2027                zig = stbi__jpeg_dezigzag[k++];
2028                data[zig] = (short) (stbi__extend_receive(j,s) << shift);
2029             }
2030          }
2031       } while (k <= j->spec_end);
2032    } else {
2033       // refinement scan for these AC coefficients
2034 
2035       short bit = (short) (1 << j->succ_low);
2036 
2037       if (j->eob_run) {
2038          --j->eob_run;
2039          for (k = j->spec_start; k <= j->spec_end; ++k) {
2040             short *p = &data[stbi__jpeg_dezigzag[k]];
2041             if (*p != 0)
2042                if (stbi__jpeg_get_bit(j))
2043                   if ((*p & bit)==0) {
2044                      if (*p > 0)
2045                         *p += bit;
2046                      else
2047                         *p -= bit;
2048                   }
2049          }
2050       } else {
2051          k = j->spec_start;
2052          do {
2053             int r,s;
2054             int rs = stbi__jpeg_huff_decode(j, hac); // @OPTIMIZE see if we can use the fast path here, advance-by-r is so slow, eh
2055             if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG");
2056             s = rs & 15;
2057             r = rs >> 4;
2058             if (s == 0) {
2059                if (r < 15) {
2060                   j->eob_run = (1 << r) - 1;
2061                   if (r)
2062                      j->eob_run += stbi__jpeg_get_bits(j, r);
2063                   r = 64; // force end of block
2064                } else {
2065                   // r=15 s=0 should write 16 0s, so we just do
2066                   // a run of 15 0s and then write s (which is 0),
2067                   // so we don't have to do anything special here
2068                }
2069             } else {
2070                if (s != 1) return stbi__err("bad huffman code", "Corrupt JPEG");
2071                // sign bit
2072                if (stbi__jpeg_get_bit(j))
2073                   s = bit;
2074                else
2075                   s = -bit;
2076             }
2077 
2078             // advance by r
2079             while (k <= j->spec_end) {
2080                short *p = &data[stbi__jpeg_dezigzag[k++]];
2081                if (*p != 0) {
2082                   if (stbi__jpeg_get_bit(j))
2083                      if ((*p & bit)==0) {
2084                         if (*p > 0)
2085                            *p += bit;
2086                         else
2087                            *p -= bit;
2088                      }
2089                } else {
2090                   if (r == 0) {
2091                      *p = (short) s;
2092                      break;
2093                   }
2094                   --r;
2095                }
2096             }
2097          } while (k <= j->spec_end);
2098       }
2099    }
2100    return 1;
2101 }
2102 
2103 // take a -128..127 value and stbi__clamp it and convert to 0..255
stbi__clamp(int x)2104 stbi_inline static stbi_uc stbi__clamp(int x)
2105 {
2106    // trick to use a single test to catch both cases
2107    if ((unsigned int) x > 255) {
2108       if (x < 0) return 0;
2109       if (x > 255) return 255;
2110    }
2111    return (stbi_uc) x;
2112 }
2113 
2114 #define stbi__f2f(x)  ((int) (((x) * 4096 + 0.5)))
2115 #define stbi__fsh(x)  ((x) << 12)
2116 
2117 // derived from jidctint -- DCT_ISLOW
2118 #define STBI__IDCT_1D(s0,s1,s2,s3,s4,s5,s6,s7) \
2119    int t0,t1,t2,t3,p1,p2,p3,p4,p5,x0,x1,x2,x3; \
2120    p2 = s2;                                    \
2121    p3 = s6;                                    \
2122    p1 = (p2+p3) * stbi__f2f(0.5411961f);       \
2123    t2 = p1 + p3*stbi__f2f(-1.847759065f);      \
2124    t3 = p1 + p2*stbi__f2f( 0.765366865f);      \
2125    p2 = s0;                                    \
2126    p3 = s4;                                    \
2127    t0 = stbi__fsh(p2+p3);                      \
2128    t1 = stbi__fsh(p2-p3);                      \
2129    x0 = t0+t3;                                 \
2130    x3 = t0-t3;                                 \
2131    x1 = t1+t2;                                 \
2132    x2 = t1-t2;                                 \
2133    t0 = s7;                                    \
2134    t1 = s5;                                    \
2135    t2 = s3;                                    \
2136    t3 = s1;                                    \
2137    p3 = t0+t2;                                 \
2138    p4 = t1+t3;                                 \
2139    p1 = t0+t3;                                 \
2140    p2 = t1+t2;                                 \
2141    p5 = (p3+p4)*stbi__f2f( 1.175875602f);      \
2142    t0 = t0*stbi__f2f( 0.298631336f);           \
2143    t1 = t1*stbi__f2f( 2.053119869f);           \
2144    t2 = t2*stbi__f2f( 3.072711026f);           \
2145    t3 = t3*stbi__f2f( 1.501321110f);           \
2146    p1 = p5 + p1*stbi__f2f(-0.899976223f);      \
2147    p2 = p5 + p2*stbi__f2f(-2.562915447f);      \
2148    p3 = p3*stbi__f2f(-1.961570560f);           \
2149    p4 = p4*stbi__f2f(-0.390180644f);           \
2150    t3 += p1+p4;                                \
2151    t2 += p2+p3;                                \
2152    t1 += p2+p4;                                \
2153    t0 += p1+p3;
2154 
stbi__idct_block(stbi_uc * out,int out_stride,short data[64])2155 static void stbi__idct_block(stbi_uc *out, int out_stride, short data[64])
2156 {
2157    int i,val[64],*v=val;
2158    stbi_uc *o;
2159    short *d = data;
2160 
2161    // columns
2162    for (i=0; i < 8; ++i,++d, ++v) {
2163       // if all zeroes, shortcut -- this avoids dequantizing 0s and IDCTing
2164       if (d[ 8]==0 && d[16]==0 && d[24]==0 && d[32]==0
2165            && d[40]==0 && d[48]==0 && d[56]==0) {
2166          //    no shortcut                 0     seconds
2167          //    (1|2|3|4|5|6|7)==0          0     seconds
2168          //    all separate               -0.047 seconds
2169          //    1 && 2|3 && 4|5 && 6|7:    -0.047 seconds
2170          int dcterm = d[0] << 2;
2171          v[0] = v[8] = v[16] = v[24] = v[32] = v[40] = v[48] = v[56] = dcterm;
2172       } else {
2173          STBI__IDCT_1D(d[ 0],d[ 8],d[16],d[24],d[32],d[40],d[48],d[56])
2174          // constants scaled things up by 1<<12; let's bring them back
2175          // down, but keep 2 extra bits of precision
2176          x0 += 512; x1 += 512; x2 += 512; x3 += 512;
2177          v[ 0] = (x0+t3) >> 10;
2178          v[56] = (x0-t3) >> 10;
2179          v[ 8] = (x1+t2) >> 10;
2180          v[48] = (x1-t2) >> 10;
2181          v[16] = (x2+t1) >> 10;
2182          v[40] = (x2-t1) >> 10;
2183          v[24] = (x3+t0) >> 10;
2184          v[32] = (x3-t0) >> 10;
2185       }
2186    }
2187 
2188    for (i=0, v=val, o=out; i < 8; ++i,v+=8,o+=out_stride) {
2189       // no fast case since the first 1D IDCT spread components out
2190       STBI__IDCT_1D(v[0],v[1],v[2],v[3],v[4],v[5],v[6],v[7])
2191       // constants scaled things up by 1<<12, plus we had 1<<2 from first
2192       // loop, plus horizontal and vertical each scale by sqrt(8) so together
2193       // we've got an extra 1<<3, so 1<<17 total we need to remove.
2194       // so we want to round that, which means adding 0.5 * 1<<17,
2195       // aka 65536. Also, we'll end up with -128 to 127 that we want
2196       // to encode as 0..255 by adding 128, so we'll add that before the shift
2197       x0 += 65536 + (128<<17);
2198       x1 += 65536 + (128<<17);
2199       x2 += 65536 + (128<<17);
2200       x3 += 65536 + (128<<17);
2201       // tried computing the shifts into temps, or'ing the temps to see
2202       // if any were out of range, but that was slower
2203       o[0] = stbi__clamp((x0+t3) >> 17);
2204       o[7] = stbi__clamp((x0-t3) >> 17);
2205       o[1] = stbi__clamp((x1+t2) >> 17);
2206       o[6] = stbi__clamp((x1-t2) >> 17);
2207       o[2] = stbi__clamp((x2+t1) >> 17);
2208       o[5] = stbi__clamp((x2-t1) >> 17);
2209       o[3] = stbi__clamp((x3+t0) >> 17);
2210       o[4] = stbi__clamp((x3-t0) >> 17);
2211    }
2212 }
2213 
2214 #ifdef STBI_SSE2
2215 // sse2 integer IDCT. not the fastest possible implementation but it
2216 // produces bit-identical results to the generic C version so it's
2217 // fully "transparent".
stbi__idct_simd(stbi_uc * out,int out_stride,short data[64])2218 static void stbi__idct_simd(stbi_uc *out, int out_stride, short data[64])
2219 {
2220    // This is constructed to match our regular (generic) integer IDCT exactly.
2221    __m128i row0, row1, row2, row3, row4, row5, row6, row7;
2222    __m128i tmp;
2223 
2224    // dot product constant: even elems=x, odd elems=y
2225    #define dct_const(x,y)  _mm_setr_epi16((x),(y),(x),(y),(x),(y),(x),(y))
2226 
2227    // out(0) = c0[even]*x + c0[odd]*y   (c0, x, y 16-bit, out 32-bit)
2228    // out(1) = c1[even]*x + c1[odd]*y
2229    #define dct_rot(out0,out1, x,y,c0,c1) \
2230       __m128i c0##lo = _mm_unpacklo_epi16((x),(y)); \
2231       __m128i c0##hi = _mm_unpackhi_epi16((x),(y)); \
2232       __m128i out0##_l = _mm_madd_epi16(c0##lo, c0); \
2233       __m128i out0##_h = _mm_madd_epi16(c0##hi, c0); \
2234       __m128i out1##_l = _mm_madd_epi16(c0##lo, c1); \
2235       __m128i out1##_h = _mm_madd_epi16(c0##hi, c1)
2236 
2237    // out = in << 12  (in 16-bit, out 32-bit)
2238    #define dct_widen(out, in) \
2239       __m128i out##_l = _mm_srai_epi32(_mm_unpacklo_epi16(_mm_setzero_si128(), (in)), 4); \
2240       __m128i out##_h = _mm_srai_epi32(_mm_unpackhi_epi16(_mm_setzero_si128(), (in)), 4)
2241 
2242    // wide add
2243    #define dct_wadd(out, a, b) \
2244       __m128i out##_l = _mm_add_epi32(a##_l, b##_l); \
2245       __m128i out##_h = _mm_add_epi32(a##_h, b##_h)
2246 
2247    // wide sub
2248    #define dct_wsub(out, a, b) \
2249       __m128i out##_l = _mm_sub_epi32(a##_l, b##_l); \
2250       __m128i out##_h = _mm_sub_epi32(a##_h, b##_h)
2251 
2252    // butterfly a/b, add bias, then shift by "s" and pack
2253    #define dct_bfly32o(out0, out1, a,b,bias,s) \
2254       { \
2255          __m128i abiased_l = _mm_add_epi32(a##_l, bias); \
2256          __m128i abiased_h = _mm_add_epi32(a##_h, bias); \
2257          dct_wadd(sum, abiased, b); \
2258          dct_wsub(dif, abiased, b); \
2259          out0 = _mm_packs_epi32(_mm_srai_epi32(sum_l, s), _mm_srai_epi32(sum_h, s)); \
2260          out1 = _mm_packs_epi32(_mm_srai_epi32(dif_l, s), _mm_srai_epi32(dif_h, s)); \
2261       }
2262 
2263    // 8-bit interleave step (for transposes)
2264    #define dct_interleave8(a, b) \
2265       tmp = a; \
2266       a = _mm_unpacklo_epi8(a, b); \
2267       b = _mm_unpackhi_epi8(tmp, b)
2268 
2269    // 16-bit interleave step (for transposes)
2270    #define dct_interleave16(a, b) \
2271       tmp = a; \
2272       a = _mm_unpacklo_epi16(a, b); \
2273       b = _mm_unpackhi_epi16(tmp, b)
2274 
2275    #define dct_pass(bias,shift) \
2276       { \
2277          /* even part */ \
2278          dct_rot(t2e,t3e, row2,row6, rot0_0,rot0_1); \
2279          __m128i sum04 = _mm_add_epi16(row0, row4); \
2280          __m128i dif04 = _mm_sub_epi16(row0, row4); \
2281          dct_widen(t0e, sum04); \
2282          dct_widen(t1e, dif04); \
2283          dct_wadd(x0, t0e, t3e); \
2284          dct_wsub(x3, t0e, t3e); \
2285          dct_wadd(x1, t1e, t2e); \
2286          dct_wsub(x2, t1e, t2e); \
2287          /* odd part */ \
2288          dct_rot(y0o,y2o, row7,row3, rot2_0,rot2_1); \
2289          dct_rot(y1o,y3o, row5,row1, rot3_0,rot3_1); \
2290          __m128i sum17 = _mm_add_epi16(row1, row7); \
2291          __m128i sum35 = _mm_add_epi16(row3, row5); \
2292          dct_rot(y4o,y5o, sum17,sum35, rot1_0,rot1_1); \
2293          dct_wadd(x4, y0o, y4o); \
2294          dct_wadd(x5, y1o, y5o); \
2295          dct_wadd(x6, y2o, y5o); \
2296          dct_wadd(x7, y3o, y4o); \
2297          dct_bfly32o(row0,row7, x0,x7,bias,shift); \
2298          dct_bfly32o(row1,row6, x1,x6,bias,shift); \
2299          dct_bfly32o(row2,row5, x2,x5,bias,shift); \
2300          dct_bfly32o(row3,row4, x3,x4,bias,shift); \
2301       }
2302 
2303    __m128i rot0_0 = dct_const(stbi__f2f(0.5411961f), stbi__f2f(0.5411961f) + stbi__f2f(-1.847759065f));
2304    __m128i rot0_1 = dct_const(stbi__f2f(0.5411961f) + stbi__f2f( 0.765366865f), stbi__f2f(0.5411961f));
2305    __m128i rot1_0 = dct_const(stbi__f2f(1.175875602f) + stbi__f2f(-0.899976223f), stbi__f2f(1.175875602f));
2306    __m128i rot1_1 = dct_const(stbi__f2f(1.175875602f), stbi__f2f(1.175875602f) + stbi__f2f(-2.562915447f));
2307    __m128i rot2_0 = dct_const(stbi__f2f(-1.961570560f) + stbi__f2f( 0.298631336f), stbi__f2f(-1.961570560f));
2308    __m128i rot2_1 = dct_const(stbi__f2f(-1.961570560f), stbi__f2f(-1.961570560f) + stbi__f2f( 3.072711026f));
2309    __m128i rot3_0 = dct_const(stbi__f2f(-0.390180644f) + stbi__f2f( 2.053119869f), stbi__f2f(-0.390180644f));
2310    __m128i rot3_1 = dct_const(stbi__f2f(-0.390180644f), stbi__f2f(-0.390180644f) + stbi__f2f( 1.501321110f));
2311 
2312    // rounding biases in column/row passes, see stbi__idct_block for explanation.
2313    __m128i bias_0 = _mm_set1_epi32(512);
2314    __m128i bias_1 = _mm_set1_epi32(65536 + (128<<17));
2315 
2316    // load
2317    row0 = _mm_load_si128((const __m128i *) (data + 0*8));
2318    row1 = _mm_load_si128((const __m128i *) (data + 1*8));
2319    row2 = _mm_load_si128((const __m128i *) (data + 2*8));
2320    row3 = _mm_load_si128((const __m128i *) (data + 3*8));
2321    row4 = _mm_load_si128((const __m128i *) (data + 4*8));
2322    row5 = _mm_load_si128((const __m128i *) (data + 5*8));
2323    row6 = _mm_load_si128((const __m128i *) (data + 6*8));
2324    row7 = _mm_load_si128((const __m128i *) (data + 7*8));
2325 
2326    // column pass
2327    dct_pass(bias_0, 10);
2328 
2329    {
2330       // 16bit 8x8 transpose pass 1
2331       dct_interleave16(row0, row4);
2332       dct_interleave16(row1, row5);
2333       dct_interleave16(row2, row6);
2334       dct_interleave16(row3, row7);
2335 
2336       // transpose pass 2
2337       dct_interleave16(row0, row2);
2338       dct_interleave16(row1, row3);
2339       dct_interleave16(row4, row6);
2340       dct_interleave16(row5, row7);
2341 
2342       // transpose pass 3
2343       dct_interleave16(row0, row1);
2344       dct_interleave16(row2, row3);
2345       dct_interleave16(row4, row5);
2346       dct_interleave16(row6, row7);
2347    }
2348 
2349    // row pass
2350    dct_pass(bias_1, 17);
2351 
2352    {
2353       // pack
2354       __m128i p0 = _mm_packus_epi16(row0, row1); // a0a1a2a3...a7b0b1b2b3...b7
2355       __m128i p1 = _mm_packus_epi16(row2, row3);
2356       __m128i p2 = _mm_packus_epi16(row4, row5);
2357       __m128i p3 = _mm_packus_epi16(row6, row7);
2358 
2359       // 8bit 8x8 transpose pass 1
2360       dct_interleave8(p0, p2); // a0e0a1e1...
2361       dct_interleave8(p1, p3); // c0g0c1g1...
2362 
2363       // transpose pass 2
2364       dct_interleave8(p0, p1); // a0c0e0g0...
2365       dct_interleave8(p2, p3); // b0d0f0h0...
2366 
2367       // transpose pass 3
2368       dct_interleave8(p0, p2); // a0b0c0d0...
2369       dct_interleave8(p1, p3); // a4b4c4d4...
2370 
2371       // store
2372       _mm_storel_epi64((__m128i *) out, p0); out += out_stride;
2373       _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p0, 0x4e)); out += out_stride;
2374       _mm_storel_epi64((__m128i *) out, p2); out += out_stride;
2375       _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p2, 0x4e)); out += out_stride;
2376       _mm_storel_epi64((__m128i *) out, p1); out += out_stride;
2377       _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p1, 0x4e)); out += out_stride;
2378       _mm_storel_epi64((__m128i *) out, p3); out += out_stride;
2379       _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p3, 0x4e));
2380    }
2381 
2382 #undef dct_const
2383 #undef dct_rot
2384 #undef dct_widen
2385 #undef dct_wadd
2386 #undef dct_wsub
2387 #undef dct_bfly32o
2388 #undef dct_interleave8
2389 #undef dct_interleave16
2390 #undef dct_pass
2391 }
2392 
2393 #endif // STBI_SSE2
2394 
2395 #ifdef STBI_NEON
2396 
2397 // NEON integer IDCT. should produce bit-identical
2398 // results to the generic C version.
stbi__idct_simd(stbi_uc * out,int out_stride,short data[64])2399 static void stbi__idct_simd(stbi_uc *out, int out_stride, short data[64])
2400 {
2401    int16x8_t row0, row1, row2, row3, row4, row5, row6, row7;
2402 
2403    int16x4_t rot0_0 = vdup_n_s16(stbi__f2f(0.5411961f));
2404    int16x4_t rot0_1 = vdup_n_s16(stbi__f2f(-1.847759065f));
2405    int16x4_t rot0_2 = vdup_n_s16(stbi__f2f( 0.765366865f));
2406    int16x4_t rot1_0 = vdup_n_s16(stbi__f2f( 1.175875602f));
2407    int16x4_t rot1_1 = vdup_n_s16(stbi__f2f(-0.899976223f));
2408    int16x4_t rot1_2 = vdup_n_s16(stbi__f2f(-2.562915447f));
2409    int16x4_t rot2_0 = vdup_n_s16(stbi__f2f(-1.961570560f));
2410    int16x4_t rot2_1 = vdup_n_s16(stbi__f2f(-0.390180644f));
2411    int16x4_t rot3_0 = vdup_n_s16(stbi__f2f( 0.298631336f));
2412    int16x4_t rot3_1 = vdup_n_s16(stbi__f2f( 2.053119869f));
2413    int16x4_t rot3_2 = vdup_n_s16(stbi__f2f( 3.072711026f));
2414    int16x4_t rot3_3 = vdup_n_s16(stbi__f2f( 1.501321110f));
2415 
2416 #define dct_long_mul(out, inq, coeff) \
2417    int32x4_t out##_l = vmull_s16(vget_low_s16(inq), coeff); \
2418    int32x4_t out##_h = vmull_s16(vget_high_s16(inq), coeff)
2419 
2420 #define dct_long_mac(out, acc, inq, coeff) \
2421    int32x4_t out##_l = vmlal_s16(acc##_l, vget_low_s16(inq), coeff); \
2422    int32x4_t out##_h = vmlal_s16(acc##_h, vget_high_s16(inq), coeff)
2423 
2424 #define dct_widen(out, inq) \
2425    int32x4_t out##_l = vshll_n_s16(vget_low_s16(inq), 12); \
2426    int32x4_t out##_h = vshll_n_s16(vget_high_s16(inq), 12)
2427 
2428 // wide add
2429 #define dct_wadd(out, a, b) \
2430    int32x4_t out##_l = vaddq_s32(a##_l, b##_l); \
2431    int32x4_t out##_h = vaddq_s32(a##_h, b##_h)
2432 
2433 // wide sub
2434 #define dct_wsub(out, a, b) \
2435    int32x4_t out##_l = vsubq_s32(a##_l, b##_l); \
2436    int32x4_t out##_h = vsubq_s32(a##_h, b##_h)
2437 
2438 // butterfly a/b, then shift using "shiftop" by "s" and pack
2439 #define dct_bfly32o(out0,out1, a,b,shiftop,s) \
2440    { \
2441       dct_wadd(sum, a, b); \
2442       dct_wsub(dif, a, b); \
2443       out0 = vcombine_s16(shiftop(sum_l, s), shiftop(sum_h, s)); \
2444       out1 = vcombine_s16(shiftop(dif_l, s), shiftop(dif_h, s)); \
2445    }
2446 
2447 #define dct_pass(shiftop, shift) \
2448    { \
2449       /* even part */ \
2450       int16x8_t sum26 = vaddq_s16(row2, row6); \
2451       dct_long_mul(p1e, sum26, rot0_0); \
2452       dct_long_mac(t2e, p1e, row6, rot0_1); \
2453       dct_long_mac(t3e, p1e, row2, rot0_2); \
2454       int16x8_t sum04 = vaddq_s16(row0, row4); \
2455       int16x8_t dif04 = vsubq_s16(row0, row4); \
2456       dct_widen(t0e, sum04); \
2457       dct_widen(t1e, dif04); \
2458       dct_wadd(x0, t0e, t3e); \
2459       dct_wsub(x3, t0e, t3e); \
2460       dct_wadd(x1, t1e, t2e); \
2461       dct_wsub(x2, t1e, t2e); \
2462       /* odd part */ \
2463       int16x8_t sum15 = vaddq_s16(row1, row5); \
2464       int16x8_t sum17 = vaddq_s16(row1, row7); \
2465       int16x8_t sum35 = vaddq_s16(row3, row5); \
2466       int16x8_t sum37 = vaddq_s16(row3, row7); \
2467       int16x8_t sumodd = vaddq_s16(sum17, sum35); \
2468       dct_long_mul(p5o, sumodd, rot1_0); \
2469       dct_long_mac(p1o, p5o, sum17, rot1_1); \
2470       dct_long_mac(p2o, p5o, sum35, rot1_2); \
2471       dct_long_mul(p3o, sum37, rot2_0); \
2472       dct_long_mul(p4o, sum15, rot2_1); \
2473       dct_wadd(sump13o, p1o, p3o); \
2474       dct_wadd(sump24o, p2o, p4o); \
2475       dct_wadd(sump23o, p2o, p3o); \
2476       dct_wadd(sump14o, p1o, p4o); \
2477       dct_long_mac(x4, sump13o, row7, rot3_0); \
2478       dct_long_mac(x5, sump24o, row5, rot3_1); \
2479       dct_long_mac(x6, sump23o, row3, rot3_2); \
2480       dct_long_mac(x7, sump14o, row1, rot3_3); \
2481       dct_bfly32o(row0,row7, x0,x7,shiftop,shift); \
2482       dct_bfly32o(row1,row6, x1,x6,shiftop,shift); \
2483       dct_bfly32o(row2,row5, x2,x5,shiftop,shift); \
2484       dct_bfly32o(row3,row4, x3,x4,shiftop,shift); \
2485    }
2486 
2487    // load
2488    row0 = vld1q_s16(data + 0*8);
2489    row1 = vld1q_s16(data + 1*8);
2490    row2 = vld1q_s16(data + 2*8);
2491    row3 = vld1q_s16(data + 3*8);
2492    row4 = vld1q_s16(data + 4*8);
2493    row5 = vld1q_s16(data + 5*8);
2494    row6 = vld1q_s16(data + 6*8);
2495    row7 = vld1q_s16(data + 7*8);
2496 
2497    // add DC bias
2498    row0 = vaddq_s16(row0, vsetq_lane_s16(1024, vdupq_n_s16(0), 0));
2499 
2500    // column pass
2501    dct_pass(vrshrn_n_s32, 10);
2502 
2503    // 16bit 8x8 transpose
2504    {
2505 // these three map to a single VTRN.16, VTRN.32, and VSWP, respectively.
2506 // whether compilers actually get this is another story, sadly.
2507 #define dct_trn16(x, y) { int16x8x2_t t = vtrnq_s16(x, y); x = t.val[0]; y = t.val[1]; }
2508 #define dct_trn32(x, y) { int32x4x2_t t = vtrnq_s32(vreinterpretq_s32_s16(x), vreinterpretq_s32_s16(y)); x = vreinterpretq_s16_s32(t.val[0]); y = vreinterpretq_s16_s32(t.val[1]); }
2509 #define dct_trn64(x, y) { int16x8_t x0 = x; int16x8_t y0 = y; x = vcombine_s16(vget_low_s16(x0), vget_low_s16(y0)); y = vcombine_s16(vget_high_s16(x0), vget_high_s16(y0)); }
2510 
2511       // pass 1
2512       dct_trn16(row0, row1); // a0b0a2b2a4b4a6b6
2513       dct_trn16(row2, row3);
2514       dct_trn16(row4, row5);
2515       dct_trn16(row6, row7);
2516 
2517       // pass 2
2518       dct_trn32(row0, row2); // a0b0c0d0a4b4c4d4
2519       dct_trn32(row1, row3);
2520       dct_trn32(row4, row6);
2521       dct_trn32(row5, row7);
2522 
2523       // pass 3
2524       dct_trn64(row0, row4); // a0b0c0d0e0f0g0h0
2525       dct_trn64(row1, row5);
2526       dct_trn64(row2, row6);
2527       dct_trn64(row3, row7);
2528 
2529 #undef dct_trn16
2530 #undef dct_trn32
2531 #undef dct_trn64
2532    }
2533 
2534    // row pass
2535    // vrshrn_n_s32 only supports shifts up to 16, we need
2536    // 17. so do a non-rounding shift of 16 first then follow
2537    // up with a rounding shift by 1.
2538    dct_pass(vshrn_n_s32, 16);
2539 
2540    {
2541       // pack and round
2542       uint8x8_t p0 = vqrshrun_n_s16(row0, 1);
2543       uint8x8_t p1 = vqrshrun_n_s16(row1, 1);
2544       uint8x8_t p2 = vqrshrun_n_s16(row2, 1);
2545       uint8x8_t p3 = vqrshrun_n_s16(row3, 1);
2546       uint8x8_t p4 = vqrshrun_n_s16(row4, 1);
2547       uint8x8_t p5 = vqrshrun_n_s16(row5, 1);
2548       uint8x8_t p6 = vqrshrun_n_s16(row6, 1);
2549       uint8x8_t p7 = vqrshrun_n_s16(row7, 1);
2550 
2551       // again, these can translate into one instruction, but often don't.
2552 #define dct_trn8_8(x, y) { uint8x8x2_t t = vtrn_u8(x, y); x = t.val[0]; y = t.val[1]; }
2553 #define dct_trn8_16(x, y) { uint16x4x2_t t = vtrn_u16(vreinterpret_u16_u8(x), vreinterpret_u16_u8(y)); x = vreinterpret_u8_u16(t.val[0]); y = vreinterpret_u8_u16(t.val[1]); }
2554 #define dct_trn8_32(x, y) { uint32x2x2_t t = vtrn_u32(vreinterpret_u32_u8(x), vreinterpret_u32_u8(y)); x = vreinterpret_u8_u32(t.val[0]); y = vreinterpret_u8_u32(t.val[1]); }
2555 
2556       // sadly can't use interleaved stores here since we only write
2557       // 8 bytes to each scan line!
2558 
2559       // 8x8 8-bit transpose pass 1
2560       dct_trn8_8(p0, p1);
2561       dct_trn8_8(p2, p3);
2562       dct_trn8_8(p4, p5);
2563       dct_trn8_8(p6, p7);
2564 
2565       // pass 2
2566       dct_trn8_16(p0, p2);
2567       dct_trn8_16(p1, p3);
2568       dct_trn8_16(p4, p6);
2569       dct_trn8_16(p5, p7);
2570 
2571       // pass 3
2572       dct_trn8_32(p0, p4);
2573       dct_trn8_32(p1, p5);
2574       dct_trn8_32(p2, p6);
2575       dct_trn8_32(p3, p7);
2576 
2577       // store
2578       vst1_u8(out, p0); out += out_stride;
2579       vst1_u8(out, p1); out += out_stride;
2580       vst1_u8(out, p2); out += out_stride;
2581       vst1_u8(out, p3); out += out_stride;
2582       vst1_u8(out, p4); out += out_stride;
2583       vst1_u8(out, p5); out += out_stride;
2584       vst1_u8(out, p6); out += out_stride;
2585       vst1_u8(out, p7);
2586 
2587 #undef dct_trn8_8
2588 #undef dct_trn8_16
2589 #undef dct_trn8_32
2590    }
2591 
2592 #undef dct_long_mul
2593 #undef dct_long_mac
2594 #undef dct_widen
2595 #undef dct_wadd
2596 #undef dct_wsub
2597 #undef dct_bfly32o
2598 #undef dct_pass
2599 }
2600 
2601 #endif // STBI_NEON
2602 
2603 #define STBI__MARKER_none  0xff
2604 // if there's a pending marker from the entropy stream, return that
2605 // otherwise, fetch from the stream and get a marker. if there's no
2606 // marker, return 0xff, which is never a valid marker value
stbi__get_marker(stbi__jpeg * j)2607 static stbi_uc stbi__get_marker(stbi__jpeg *j)
2608 {
2609    stbi_uc x;
2610    if (j->marker != STBI__MARKER_none) { x = j->marker; j->marker = STBI__MARKER_none; return x; }
2611    x = stbi__get8(j->s);
2612    if (x != 0xff) return STBI__MARKER_none;
2613    while (x == 0xff)
2614       x = stbi__get8(j->s); // consume repeated 0xff fill bytes
2615    return x;
2616 }
2617 
2618 // in each scan, we'll have scan_n components, and the order
2619 // of the components is specified by order[]
2620 #define STBI__RESTART(x)     ((x) >= 0xd0 && (x) <= 0xd7)
2621 
2622 // after a restart interval, stbi__jpeg_reset the entropy decoder and
2623 // the dc prediction
stbi__jpeg_reset(stbi__jpeg * j)2624 static void stbi__jpeg_reset(stbi__jpeg *j)
2625 {
2626    j->code_bits = 0;
2627    j->code_buffer = 0;
2628    j->nomore = 0;
2629    j->img_comp[0].dc_pred = j->img_comp[1].dc_pred = j->img_comp[2].dc_pred = j->img_comp[3].dc_pred = 0;
2630    j->marker = STBI__MARKER_none;
2631    j->todo = j->restart_interval ? j->restart_interval : 0x7fffffff;
2632    j->eob_run = 0;
2633    // no more than 1<<31 MCUs if no restart_interal? that's plenty safe,
2634    // since we don't even allow 1<<30 pixels
2635 }
2636 
stbi__parse_entropy_coded_data(stbi__jpeg * z)2637 static int stbi__parse_entropy_coded_data(stbi__jpeg *z)
2638 {
2639    stbi__jpeg_reset(z);
2640    if (!z->progressive) {
2641       if (z->scan_n == 1) {
2642          int i,j;
2643          STBI_SIMD_ALIGN(short, data[64]);
2644          int n = z->order[0];
2645          // non-interleaved data, we just need to process one block at a time,
2646          // in trivial scanline order
2647          // number of blocks to do just depends on how many actual "pixels" this
2648          // component has, independent of interleaved MCU blocking and such
2649          int w = (z->img_comp[n].x+7) >> 3;
2650          int h = (z->img_comp[n].y+7) >> 3;
2651          for (j=0; j < h; ++j) {
2652             for (i=0; i < w; ++i) {
2653                int ha = z->img_comp[n].ha;
2654                if (!stbi__jpeg_decode_block(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+ha, z->fast_ac[ha], n, z->dequant[z->img_comp[n].tq])) return 0;
2655                z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*j*8+i*8, z->img_comp[n].w2, data);
2656                // every data block is an MCU, so countdown the restart interval
2657                if (--z->todo <= 0) {
2658                   if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
2659                   // if it's NOT a restart, then just bail, so we get corrupt data
2660                   // rather than no data
2661                   if (!STBI__RESTART(z->marker)) return 1;
2662                   stbi__jpeg_reset(z);
2663                }
2664             }
2665          }
2666          return 1;
2667       } else { // interleaved
2668          int i,j,k,x,y;
2669          STBI_SIMD_ALIGN(short, data[64]);
2670          for (j=0; j < z->img_mcu_y; ++j) {
2671             for (i=0; i < z->img_mcu_x; ++i) {
2672                // scan an interleaved mcu... process scan_n components in order
2673                for (k=0; k < z->scan_n; ++k) {
2674                   int n = z->order[k];
2675                   // scan out an mcu's worth of this component; that's just determined
2676                   // by the basic H and V specified for the component
2677                   for (y=0; y < z->img_comp[n].v; ++y) {
2678                      for (x=0; x < z->img_comp[n].h; ++x) {
2679                         int x2 = (i*z->img_comp[n].h + x)*8;
2680                         int y2 = (j*z->img_comp[n].v + y)*8;
2681                         int ha = z->img_comp[n].ha;
2682                         if (!stbi__jpeg_decode_block(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+ha, z->fast_ac[ha], n, z->dequant[z->img_comp[n].tq])) return 0;
2683                         z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*y2+x2, z->img_comp[n].w2, data);
2684                      }
2685                   }
2686                }
2687                // after all interleaved components, that's an interleaved MCU,
2688                // so now count down the restart interval
2689                if (--z->todo <= 0) {
2690                   if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
2691                   if (!STBI__RESTART(z->marker)) return 1;
2692                   stbi__jpeg_reset(z);
2693                }
2694             }
2695          }
2696          return 1;
2697       }
2698    } else {
2699       if (z->scan_n == 1) {
2700          int i,j;
2701          int n = z->order[0];
2702          // non-interleaved data, we just need to process one block at a time,
2703          // in trivial scanline order
2704          // number of blocks to do just depends on how many actual "pixels" this
2705          // component has, independent of interleaved MCU blocking and such
2706          int w = (z->img_comp[n].x+7) >> 3;
2707          int h = (z->img_comp[n].y+7) >> 3;
2708          for (j=0; j < h; ++j) {
2709             for (i=0; i < w; ++i) {
2710                short *data = z->img_comp[n].coeff + 64 * (i + j * z->img_comp[n].coeff_w);
2711                if (z->spec_start == 0) {
2712                   if (!stbi__jpeg_decode_block_prog_dc(z, data, &z->huff_dc[z->img_comp[n].hd], n))
2713                      return 0;
2714                } else {
2715                   int ha = z->img_comp[n].ha;
2716                   if (!stbi__jpeg_decode_block_prog_ac(z, data, &z->huff_ac[ha], z->fast_ac[ha]))
2717                      return 0;
2718                }
2719                // every data block is an MCU, so countdown the restart interval
2720                if (--z->todo <= 0) {
2721                   if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
2722                   if (!STBI__RESTART(z->marker)) return 1;
2723                   stbi__jpeg_reset(z);
2724                }
2725             }
2726          }
2727          return 1;
2728       } else { // interleaved
2729          int i,j,k,x,y;
2730          for (j=0; j < z->img_mcu_y; ++j) {
2731             for (i=0; i < z->img_mcu_x; ++i) {
2732                // scan an interleaved mcu... process scan_n components in order
2733                for (k=0; k < z->scan_n; ++k) {
2734                   int n = z->order[k];
2735                   // scan out an mcu's worth of this component; that's just determined
2736                   // by the basic H and V specified for the component
2737                   for (y=0; y < z->img_comp[n].v; ++y) {
2738                      for (x=0; x < z->img_comp[n].h; ++x) {
2739                         int x2 = (i*z->img_comp[n].h + x);
2740                         int y2 = (j*z->img_comp[n].v + y);
2741                         short *data = z->img_comp[n].coeff + 64 * (x2 + y2 * z->img_comp[n].coeff_w);
2742                         if (!stbi__jpeg_decode_block_prog_dc(z, data, &z->huff_dc[z->img_comp[n].hd], n))
2743                            return 0;
2744                      }
2745                   }
2746                }
2747                // after all interleaved components, that's an interleaved MCU,
2748                // so now count down the restart interval
2749                if (--z->todo <= 0) {
2750                   if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
2751                   if (!STBI__RESTART(z->marker)) return 1;
2752                   stbi__jpeg_reset(z);
2753                }
2754             }
2755          }
2756          return 1;
2757       }
2758    }
2759 }
2760 
stbi__jpeg_dequantize(short * data,stbi__uint16 * dequant)2761 static void stbi__jpeg_dequantize(short *data, stbi__uint16 *dequant)
2762 {
2763    int i;
2764    for (i=0; i < 64; ++i)
2765       data[i] *= dequant[i];
2766 }
2767 
stbi__jpeg_finish(stbi__jpeg * z)2768 static void stbi__jpeg_finish(stbi__jpeg *z)
2769 {
2770    if (z->progressive) {
2771       // dequantize and idct the data
2772       int i,j,n;
2773       for (n=0; n < z->s->img_n; ++n) {
2774          int w = (z->img_comp[n].x+7) >> 3;
2775          int h = (z->img_comp[n].y+7) >> 3;
2776          for (j=0; j < h; ++j) {
2777             for (i=0; i < w; ++i) {
2778                short *data = z->img_comp[n].coeff + 64 * (i + j * z->img_comp[n].coeff_w);
2779                stbi__jpeg_dequantize(data, z->dequant[z->img_comp[n].tq]);
2780                z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*j*8+i*8, z->img_comp[n].w2, data);
2781             }
2782          }
2783       }
2784    }
2785 }
2786 
stbi__process_marker(stbi__jpeg * z,int m)2787 static int stbi__process_marker(stbi__jpeg *z, int m)
2788 {
2789    int L;
2790    switch (m) {
2791       case STBI__MARKER_none: // no marker found
2792          return stbi__err("expected marker","Corrupt JPEG");
2793 
2794       case 0xDD: // DRI - specify restart interval
2795          if (stbi__get16be(z->s) != 4) return stbi__err("bad DRI len","Corrupt JPEG");
2796          z->restart_interval = stbi__get16be(z->s);
2797          return 1;
2798 
2799       case 0xDB: // DQT - define quantization table
2800          L = stbi__get16be(z->s)-2;
2801          while (L > 0) {
2802             int q = stbi__get8(z->s);
2803             int p = q >> 4, sixteen = (p != 0);
2804             int t = q & 15,i;
2805             if (p != 0 && p != 1) return stbi__err("bad DQT type","Corrupt JPEG");
2806             if (t > 3) return stbi__err("bad DQT table","Corrupt JPEG");
2807 
2808             for (i=0; i < 64; ++i)
2809                z->dequant[t][stbi__jpeg_dezigzag[i]] = (stbi__uint16)(sixteen ? stbi__get16be(z->s) : stbi__get8(z->s));
2810             L -= (sixteen ? 129 : 65);
2811          }
2812          return L==0;
2813 
2814       case 0xC4: // DHT - define huffman table
2815          L = stbi__get16be(z->s)-2;
2816          while (L > 0) {
2817             stbi_uc *v;
2818             int sizes[16],i,n=0;
2819             int q = stbi__get8(z->s);
2820             int tc = q >> 4;
2821             int th = q & 15;
2822             if (tc > 1 || th > 3) return stbi__err("bad DHT header","Corrupt JPEG");
2823             for (i=0; i < 16; ++i) {
2824                sizes[i] = stbi__get8(z->s);
2825                n += sizes[i];
2826             }
2827             L -= 17;
2828             if (tc == 0) {
2829                if (!stbi__build_huffman(z->huff_dc+th, sizes)) return 0;
2830                v = z->huff_dc[th].values;
2831             } else {
2832                if (!stbi__build_huffman(z->huff_ac+th, sizes)) return 0;
2833                v = z->huff_ac[th].values;
2834             }
2835             for (i=0; i < n; ++i)
2836                v[i] = stbi__get8(z->s);
2837             if (tc != 0)
2838                stbi__build_fast_ac(z->fast_ac[th], z->huff_ac + th);
2839             L -= n;
2840          }
2841          return L==0;
2842    }
2843 
2844    // check for comment block or APP blocks
2845    if ((m >= 0xE0 && m <= 0xEF) || m == 0xFE) {
2846       L = stbi__get16be(z->s);
2847       if (L < 2) {
2848          if (m == 0xFE)
2849             return stbi__err("bad COM len","Corrupt JPEG");
2850          else
2851             return stbi__err("bad APP len","Corrupt JPEG");
2852       }
2853       L -= 2;
2854 
2855       if (m == 0xE0 && L >= 5) { // JFIF APP0 segment
2856          static const unsigned char tag[5] = {'J','F','I','F','\0'};
2857          int ok = 1;
2858          int i;
2859          for (i=0; i < 5; ++i)
2860             if (stbi__get8(z->s) != tag[i])
2861                ok = 0;
2862          L -= 5;
2863          if (ok)
2864             z->jfif = 1;
2865       } else if (m == 0xEE && L >= 12) { // Adobe APP14 segment
2866          static const unsigned char tag[6] = {'A','d','o','b','e','\0'};
2867          int ok = 1;
2868          int i;
2869          for (i=0; i < 6; ++i)
2870             if (stbi__get8(z->s) != tag[i])
2871                ok = 0;
2872          L -= 6;
2873          if (ok) {
2874             stbi__get8(z->s); // version
2875             stbi__get16be(z->s); // flags0
2876             stbi__get16be(z->s); // flags1
2877             z->app14_color_transform = stbi__get8(z->s); // color transform
2878             L -= 6;
2879          }
2880       }
2881 
2882       stbi__skip(z->s, L);
2883       return 1;
2884    }
2885 
2886    return stbi__err("unknown marker","Corrupt JPEG");
2887 }
2888 
2889 // after we see SOS
stbi__process_scan_header(stbi__jpeg * z)2890 static int stbi__process_scan_header(stbi__jpeg *z)
2891 {
2892    int i;
2893    int Ls = stbi__get16be(z->s);
2894    z->scan_n = stbi__get8(z->s);
2895    if (z->scan_n < 1 || z->scan_n > 4 || z->scan_n > (int) z->s->img_n) return stbi__err("bad SOS component count","Corrupt JPEG");
2896    if (Ls != 6+2*z->scan_n) return stbi__err("bad SOS len","Corrupt JPEG");
2897    for (i=0; i < z->scan_n; ++i) {
2898       int id = stbi__get8(z->s), which;
2899       int q = stbi__get8(z->s);
2900       for (which = 0; which < z->s->img_n; ++which)
2901          if (z->img_comp[which].id == id)
2902             break;
2903       if (which == z->s->img_n) return 0; // no match
2904       z->img_comp[which].hd = q >> 4;   if (z->img_comp[which].hd > 3) return stbi__err("bad DC huff","Corrupt JPEG");
2905       z->img_comp[which].ha = q & 15;   if (z->img_comp[which].ha > 3) return stbi__err("bad AC huff","Corrupt JPEG");
2906       z->order[i] = which;
2907    }
2908 
2909    {
2910       int aa;
2911       z->spec_start = stbi__get8(z->s);
2912       z->spec_end   = stbi__get8(z->s); // should be 63, but might be 0
2913       aa = stbi__get8(z->s);
2914       z->succ_high = (aa >> 4);
2915       z->succ_low  = (aa & 15);
2916       if (z->progressive) {
2917          if (z->spec_start > 63 || z->spec_end > 63  || z->spec_start > z->spec_end || z->succ_high > 13 || z->succ_low > 13)
2918             return stbi__err("bad SOS", "Corrupt JPEG");
2919       } else {
2920          if (z->spec_start != 0) return stbi__err("bad SOS","Corrupt JPEG");
2921          if (z->succ_high != 0 || z->succ_low != 0) return stbi__err("bad SOS","Corrupt JPEG");
2922          z->spec_end = 63;
2923       }
2924    }
2925 
2926    return 1;
2927 }
2928 
stbi__free_jpeg_components(stbi__jpeg * z,int ncomp,int why)2929 static int stbi__free_jpeg_components(stbi__jpeg *z, int ncomp, int why)
2930 {
2931    int i;
2932    for (i=0; i < ncomp; ++i) {
2933       if (z->img_comp[i].raw_data) {
2934          STBI_FREE(z->img_comp[i].raw_data);
2935          z->img_comp[i].raw_data = NULL;
2936          z->img_comp[i].data = NULL;
2937       }
2938       if (z->img_comp[i].raw_coeff) {
2939          STBI_FREE(z->img_comp[i].raw_coeff);
2940          z->img_comp[i].raw_coeff = 0;
2941          z->img_comp[i].coeff = 0;
2942       }
2943       if (z->img_comp[i].linebuf) {
2944          STBI_FREE(z->img_comp[i].linebuf);
2945          z->img_comp[i].linebuf = NULL;
2946       }
2947    }
2948    return why;
2949 }
2950 
stbi__process_frame_header(stbi__jpeg * z,int scan)2951 static int stbi__process_frame_header(stbi__jpeg *z, int scan)
2952 {
2953    stbi__context *s = z->s;
2954    int Lf,p,i,q, h_max=1,v_max=1,c;
2955    Lf = stbi__get16be(s);         if (Lf < 11) return stbi__err("bad SOF len","Corrupt JPEG"); // JPEG
2956    p  = stbi__get8(s);            if (p != 8) return stbi__err("only 8-bit","JPEG format not supported: 8-bit only"); // JPEG baseline
2957    s->img_y = stbi__get16be(s);   if (s->img_y == 0) return stbi__err("no header height", "JPEG format not supported: delayed height"); // Legal, but we don't handle it--but neither does IJG
2958    s->img_x = stbi__get16be(s);   if (s->img_x == 0) return stbi__err("0 width","Corrupt JPEG"); // JPEG requires
2959    c = stbi__get8(s);
2960    if (c != 3 && c != 1 && c != 4) return stbi__err("bad component count","Corrupt JPEG");
2961    s->img_n = c;
2962    for (i=0; i < c; ++i) {
2963       z->img_comp[i].data = NULL;
2964       z->img_comp[i].linebuf = NULL;
2965    }
2966 
2967    if (Lf != 8+3*s->img_n) return stbi__err("bad SOF len","Corrupt JPEG");
2968 
2969    z->rgb = 0;
2970    for (i=0; i < s->img_n; ++i) {
2971       static unsigned char rgb[3] = { 'R', 'G', 'B' };
2972       z->img_comp[i].id = stbi__get8(s);
2973       if (s->img_n == 3 && z->img_comp[i].id == rgb[i])
2974          ++z->rgb;
2975       q = stbi__get8(s);
2976       z->img_comp[i].h = (q >> 4);  if (!z->img_comp[i].h || z->img_comp[i].h > 4) return stbi__err("bad H","Corrupt JPEG");
2977       z->img_comp[i].v = q & 15;    if (!z->img_comp[i].v || z->img_comp[i].v > 4) return stbi__err("bad V","Corrupt JPEG");
2978       z->img_comp[i].tq = stbi__get8(s);  if (z->img_comp[i].tq > 3) return stbi__err("bad TQ","Corrupt JPEG");
2979    }
2980 
2981    if (scan != STBI__SCAN_load) return 1;
2982 
2983    if (!stbi__mad3sizes_valid(s->img_x, s->img_y, s->img_n, 0)) return stbi__err("too large", "Image too large to decode");
2984 
2985    for (i=0; i < s->img_n; ++i) {
2986       if (z->img_comp[i].h > h_max) h_max = z->img_comp[i].h;
2987       if (z->img_comp[i].v > v_max) v_max = z->img_comp[i].v;
2988    }
2989 
2990    // compute interleaved mcu info
2991    z->img_h_max = h_max;
2992    z->img_v_max = v_max;
2993    z->img_mcu_w = h_max * 8;
2994    z->img_mcu_h = v_max * 8;
2995    // these sizes can't be more than 17 bits
2996    z->img_mcu_x = (s->img_x + z->img_mcu_w-1) / z->img_mcu_w;
2997    z->img_mcu_y = (s->img_y + z->img_mcu_h-1) / z->img_mcu_h;
2998 
2999    for (i=0; i < s->img_n; ++i) {
3000       // number of effective pixels (e.g. for non-interleaved MCU)
3001       z->img_comp[i].x = (s->img_x * z->img_comp[i].h + h_max-1) / h_max;
3002       z->img_comp[i].y = (s->img_y * z->img_comp[i].v + v_max-1) / v_max;
3003       // to simplify generation, we'll allocate enough memory to decode
3004       // the bogus oversized data from using interleaved MCUs and their
3005       // big blocks (e.g. a 16x16 iMCU on an image of width 33); we won't
3006       // discard the extra data until colorspace conversion
3007       //
3008       // img_mcu_x, img_mcu_y: <=17 bits; comp[i].h and .v are <=4 (checked earlier)
3009       // so these muls can't overflow with 32-bit ints (which we require)
3010       z->img_comp[i].w2 = z->img_mcu_x * z->img_comp[i].h * 8;
3011       z->img_comp[i].h2 = z->img_mcu_y * z->img_comp[i].v * 8;
3012       z->img_comp[i].coeff = 0;
3013       z->img_comp[i].raw_coeff = 0;
3014       z->img_comp[i].linebuf = NULL;
3015       z->img_comp[i].raw_data = stbi__malloc_mad2(z->img_comp[i].w2, z->img_comp[i].h2, 15);
3016       if (z->img_comp[i].raw_data == NULL)
3017          return stbi__free_jpeg_components(z, i+1, stbi__err("outofmem", "Out of memory"));
3018       // align blocks for idct using mmx/sse
3019       z->img_comp[i].data = (stbi_uc*) (((size_t) z->img_comp[i].raw_data + 15) & ~15);
3020       if (z->progressive) {
3021          // w2, h2 are multiples of 8 (see above)
3022          z->img_comp[i].coeff_w = z->img_comp[i].w2 / 8;
3023          z->img_comp[i].coeff_h = z->img_comp[i].h2 / 8;
3024          z->img_comp[i].raw_coeff = stbi__malloc_mad3(z->img_comp[i].w2, z->img_comp[i].h2, sizeof(short), 15);
3025          if (z->img_comp[i].raw_coeff == NULL)
3026             return stbi__free_jpeg_components(z, i+1, stbi__err("outofmem", "Out of memory"));
3027          z->img_comp[i].coeff = (short*) (((size_t) z->img_comp[i].raw_coeff + 15) & ~15);
3028       }
3029    }
3030 
3031    return 1;
3032 }
3033 
3034 // use comparisons since in some cases we handle more than one case (e.g. SOF)
3035 #define stbi__DNL(x)         ((x) == 0xdc)
3036 #define stbi__SOI(x)         ((x) == 0xd8)
3037 #define stbi__EOI(x)         ((x) == 0xd9)
3038 #define stbi__SOF(x)         ((x) == 0xc0 || (x) == 0xc1 || (x) == 0xc2)
3039 #define stbi__SOS(x)         ((x) == 0xda)
3040 
3041 #define stbi__SOF_progressive(x)   ((x) == 0xc2)
3042 
stbi__decode_jpeg_header(stbi__jpeg * z,int scan)3043 static int stbi__decode_jpeg_header(stbi__jpeg *z, int scan)
3044 {
3045    int m;
3046    z->jfif = 0;
3047    z->app14_color_transform = -1; // valid values are 0,1,2
3048    z->marker = STBI__MARKER_none; // initialize cached marker to empty
3049    m = stbi__get_marker(z);
3050    if (!stbi__SOI(m)) return stbi__err("no SOI","Corrupt JPEG");
3051    if (scan == STBI__SCAN_type) return 1;
3052    m = stbi__get_marker(z);
3053    while (!stbi__SOF(m)) {
3054       if (!stbi__process_marker(z,m)) return 0;
3055       m = stbi__get_marker(z);
3056       while (m == STBI__MARKER_none) {
3057          // some files have extra padding after their blocks, so ok, we'll scan
3058          if (stbi__at_eof(z->s)) return stbi__err("no SOF", "Corrupt JPEG");
3059          m = stbi__get_marker(z);
3060       }
3061    }
3062    z->progressive = stbi__SOF_progressive(m);
3063    if (!stbi__process_frame_header(z, scan)) return 0;
3064    return 1;
3065 }
3066 
3067 // decode image to YCbCr format
stbi__decode_jpeg_image(stbi__jpeg * j)3068 static int stbi__decode_jpeg_image(stbi__jpeg *j)
3069 {
3070    int m;
3071    for (m = 0; m < 4; m++) {
3072       j->img_comp[m].raw_data = NULL;
3073       j->img_comp[m].raw_coeff = NULL;
3074    }
3075    j->restart_interval = 0;
3076    if (!stbi__decode_jpeg_header(j, STBI__SCAN_load)) return 0;
3077    m = stbi__get_marker(j);
3078    while (!stbi__EOI(m)) {
3079       if (stbi__SOS(m)) {
3080          if (!stbi__process_scan_header(j)) return 0;
3081          if (!stbi__parse_entropy_coded_data(j)) return 0;
3082          if (j->marker == STBI__MARKER_none ) {
3083             // handle 0s at the end of image data from IP Kamera 9060
3084             while (!stbi__at_eof(j->s)) {
3085                int x = stbi__get8(j->s);
3086                if (x == 255) {
3087                   j->marker = stbi__get8(j->s);
3088                   break;
3089                }
3090             }
3091             // if we reach eof without hitting a marker, stbi__get_marker() below will fail and we'll eventually return 0
3092          }
3093       } else if (stbi__DNL(m)) {
3094          int Ld = stbi__get16be(j->s);
3095          stbi__uint32 NL = stbi__get16be(j->s);
3096          if (Ld != 4) stbi__err("bad DNL len", "Corrupt JPEG");
3097          if (NL != j->s->img_y) stbi__err("bad DNL height", "Corrupt JPEG");
3098       } else {
3099          if (!stbi__process_marker(j, m)) return 0;
3100       }
3101       m = stbi__get_marker(j);
3102    }
3103    if (j->progressive)
3104       stbi__jpeg_finish(j);
3105    return 1;
3106 }
3107 
3108 // static jfif-centered resampling (across block boundaries)
3109 
3110 typedef stbi_uc *(*resample_row_func)(stbi_uc *out, stbi_uc *in0, stbi_uc *in1,
3111                                     int w, int hs);
3112 
3113 #define stbi__div4(x) ((stbi_uc) ((x) >> 2))
3114 
resample_row_1(stbi_uc * out,stbi_uc * in_near,stbi_uc * in_far,int w,int hs)3115 static stbi_uc *resample_row_1(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
3116 {
3117    STBI_NOTUSED(out);
3118    STBI_NOTUSED(in_far);
3119    STBI_NOTUSED(w);
3120    STBI_NOTUSED(hs);
3121    return in_near;
3122 }
3123 
stbi__resample_row_v_2(stbi_uc * out,stbi_uc * in_near,stbi_uc * in_far,int w,int hs)3124 static stbi_uc* stbi__resample_row_v_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
3125 {
3126    // need to generate two samples vertically for every one in input
3127    int i;
3128    STBI_NOTUSED(hs);
3129    for (i=0; i < w; ++i)
3130       out[i] = stbi__div4(3*in_near[i] + in_far[i] + 2);
3131    return out;
3132 }
3133 
stbi__resample_row_h_2(stbi_uc * out,stbi_uc * in_near,stbi_uc * in_far,int w,int hs)3134 static stbi_uc*  stbi__resample_row_h_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
3135 {
3136    // need to generate two samples horizontally for every one in input
3137    int i;
3138    stbi_uc *input = in_near;
3139 
3140    if (w == 1) {
3141       // if only one sample, can't do any interpolation
3142       out[0] = out[1] = input[0];
3143       return out;
3144    }
3145 
3146    out[0] = input[0];
3147    out[1] = stbi__div4(input[0]*3 + input[1] + 2);
3148    for (i=1; i < w-1; ++i) {
3149       int n = 3*input[i]+2;
3150       out[i*2+0] = stbi__div4(n+input[i-1]);
3151       out[i*2+1] = stbi__div4(n+input[i+1]);
3152    }
3153    out[i*2+0] = stbi__div4(input[w-2]*3 + input[w-1] + 2);
3154    out[i*2+1] = input[w-1];
3155 
3156    STBI_NOTUSED(in_far);
3157    STBI_NOTUSED(hs);
3158 
3159    return out;
3160 }
3161 
3162 #define stbi__div16(x) ((stbi_uc) ((x) >> 4))
3163 
stbi__resample_row_hv_2(stbi_uc * out,stbi_uc * in_near,stbi_uc * in_far,int w,int hs)3164 static stbi_uc *stbi__resample_row_hv_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
3165 {
3166    // need to generate 2x2 samples for every one in input
3167    int i,t0,t1;
3168    if (w == 1) {
3169       out[0] = out[1] = stbi__div4(3*in_near[0] + in_far[0] + 2);
3170       return out;
3171    }
3172 
3173    t1 = 3*in_near[0] + in_far[0];
3174    out[0] = stbi__div4(t1+2);
3175    for (i=1; i < w; ++i) {
3176       t0 = t1;
3177       t1 = 3*in_near[i]+in_far[i];
3178       out[i*2-1] = stbi__div16(3*t0 + t1 + 8);
3179       out[i*2  ] = stbi__div16(3*t1 + t0 + 8);
3180    }
3181    out[w*2-1] = stbi__div4(t1+2);
3182 
3183    STBI_NOTUSED(hs);
3184 
3185    return out;
3186 }
3187 
3188 #if defined(STBI_SSE2) || defined(STBI_NEON)
stbi__resample_row_hv_2_simd(stbi_uc * out,stbi_uc * in_near,stbi_uc * in_far,int w,int hs)3189 static stbi_uc *stbi__resample_row_hv_2_simd(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
3190 {
3191    // need to generate 2x2 samples for every one in input
3192    int i=0,t0,t1;
3193 
3194    if (w == 1) {
3195       out[0] = out[1] = stbi__div4(3*in_near[0] + in_far[0] + 2);
3196       return out;
3197    }
3198 
3199    t1 = 3*in_near[0] + in_far[0];
3200    // process groups of 8 pixels for as long as we can.
3201    // note we can't handle the last pixel in a row in this loop
3202    // because we need to handle the filter boundary conditions.
3203    for (; i < ((w-1) & ~7); i += 8) {
3204 #if defined(STBI_SSE2)
3205       // load and perform the vertical filtering pass
3206       // this uses 3*x + y = 4*x + (y - x)
3207       __m128i zero  = _mm_setzero_si128();
3208       __m128i farb  = _mm_loadl_epi64((__m128i *) (in_far + i));
3209       __m128i nearb = _mm_loadl_epi64((__m128i *) (in_near + i));
3210       __m128i farw  = _mm_unpacklo_epi8(farb, zero);
3211       __m128i nearw = _mm_unpacklo_epi8(nearb, zero);
3212       __m128i diff  = _mm_sub_epi16(farw, nearw);
3213       __m128i nears = _mm_slli_epi16(nearw, 2);
3214       __m128i curr  = _mm_add_epi16(nears, diff); // current row
3215 
3216       // horizontal filter works the same based on shifted vers of current
3217       // row. "prev" is current row shifted right by 1 pixel; we need to
3218       // insert the previous pixel value (from t1).
3219       // "next" is current row shifted left by 1 pixel, with first pixel
3220       // of next block of 8 pixels added in.
3221       __m128i prv0 = _mm_slli_si128(curr, 2);
3222       __m128i nxt0 = _mm_srli_si128(curr, 2);
3223       __m128i prev = _mm_insert_epi16(prv0, t1, 0);
3224       __m128i next = _mm_insert_epi16(nxt0, 3*in_near[i+8] + in_far[i+8], 7);
3225 
3226       // horizontal filter, polyphase implementation since it's convenient:
3227       // even pixels = 3*cur + prev = cur*4 + (prev - cur)
3228       // odd  pixels = 3*cur + next = cur*4 + (next - cur)
3229       // note the shared term.
3230       __m128i bias  = _mm_set1_epi16(8);
3231       __m128i curs = _mm_slli_epi16(curr, 2);
3232       __m128i prvd = _mm_sub_epi16(prev, curr);
3233       __m128i nxtd = _mm_sub_epi16(next, curr);
3234       __m128i curb = _mm_add_epi16(curs, bias);
3235       __m128i even = _mm_add_epi16(prvd, curb);
3236       __m128i odd  = _mm_add_epi16(nxtd, curb);
3237 
3238       // interleave even and odd pixels, then undo scaling.
3239       __m128i int0 = _mm_unpacklo_epi16(even, odd);
3240       __m128i int1 = _mm_unpackhi_epi16(even, odd);
3241       __m128i de0  = _mm_srli_epi16(int0, 4);
3242       __m128i de1  = _mm_srli_epi16(int1, 4);
3243 
3244       // pack and write output
3245       __m128i outv = _mm_packus_epi16(de0, de1);
3246       _mm_storeu_si128((__m128i *) (out + i*2), outv);
3247 #elif defined(STBI_NEON)
3248       // load and perform the vertical filtering pass
3249       // this uses 3*x + y = 4*x + (y - x)
3250       uint8x8_t farb  = vld1_u8(in_far + i);
3251       uint8x8_t nearb = vld1_u8(in_near + i);
3252       int16x8_t diff  = vreinterpretq_s16_u16(vsubl_u8(farb, nearb));
3253       int16x8_t nears = vreinterpretq_s16_u16(vshll_n_u8(nearb, 2));
3254       int16x8_t curr  = vaddq_s16(nears, diff); // current row
3255 
3256       // horizontal filter works the same based on shifted vers of current
3257       // row. "prev" is current row shifted right by 1 pixel; we need to
3258       // insert the previous pixel value (from t1).
3259       // "next" is current row shifted left by 1 pixel, with first pixel
3260       // of next block of 8 pixels added in.
3261       int16x8_t prv0 = vextq_s16(curr, curr, 7);
3262       int16x8_t nxt0 = vextq_s16(curr, curr, 1);
3263       int16x8_t prev = vsetq_lane_s16(t1, prv0, 0);
3264       int16x8_t next = vsetq_lane_s16(3*in_near[i+8] + in_far[i+8], nxt0, 7);
3265 
3266       // horizontal filter, polyphase implementation since it's convenient:
3267       // even pixels = 3*cur + prev = cur*4 + (prev - cur)
3268       // odd  pixels = 3*cur + next = cur*4 + (next - cur)
3269       // note the shared term.
3270       int16x8_t curs = vshlq_n_s16(curr, 2);
3271       int16x8_t prvd = vsubq_s16(prev, curr);
3272       int16x8_t nxtd = vsubq_s16(next, curr);
3273       int16x8_t even = vaddq_s16(curs, prvd);
3274       int16x8_t odd  = vaddq_s16(curs, nxtd);
3275 
3276       // undo scaling and round, then store with even/odd phases interleaved
3277       uint8x8x2_t o;
3278       o.val[0] = vqrshrun_n_s16(even, 4);
3279       o.val[1] = vqrshrun_n_s16(odd,  4);
3280       vst2_u8(out + i*2, o);
3281 #endif
3282 
3283       // "previous" value for next iter
3284       t1 = 3*in_near[i+7] + in_far[i+7];
3285    }
3286 
3287    t0 = t1;
3288    t1 = 3*in_near[i] + in_far[i];
3289    out[i*2] = stbi__div16(3*t1 + t0 + 8);
3290 
3291    for (++i; i < w; ++i) {
3292       t0 = t1;
3293       t1 = 3*in_near[i]+in_far[i];
3294       out[i*2-1] = stbi__div16(3*t0 + t1 + 8);
3295       out[i*2  ] = stbi__div16(3*t1 + t0 + 8);
3296    }
3297    out[w*2-1] = stbi__div4(t1+2);
3298 
3299    STBI_NOTUSED(hs);
3300 
3301    return out;
3302 }
3303 #endif
3304 
stbi__resample_row_generic(stbi_uc * out,stbi_uc * in_near,stbi_uc * in_far,int w,int hs)3305 static stbi_uc *stbi__resample_row_generic(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
3306 {
3307    // resample with nearest-neighbor
3308    int i,j;
3309    STBI_NOTUSED(in_far);
3310    for (i=0; i < w; ++i)
3311       for (j=0; j < hs; ++j)
3312          out[i*hs+j] = in_near[i];
3313    return out;
3314 }
3315 
3316 // this is a reduced-precision calculation of YCbCr-to-RGB introduced
3317 // to make sure the code produces the same results in both SIMD and scalar
3318 #define stbi__float2fixed(x)  (((int) ((x) * 4096.0f + 0.5f)) << 8)
stbi__YCbCr_to_RGB_row(stbi_uc * out,const stbi_uc * y,const stbi_uc * pcb,const stbi_uc * pcr,int count,int step)3319 static void stbi__YCbCr_to_RGB_row(stbi_uc *out, const stbi_uc *y, const stbi_uc *pcb, const stbi_uc *pcr, int count, int step)
3320 {
3321    int i;
3322    for (i=0; i < count; ++i) {
3323       int y_fixed = (y[i] << 20) + (1<<19); // rounding
3324       int r,g,b;
3325       int cr = pcr[i] - 128;
3326       int cb = pcb[i] - 128;
3327       r = y_fixed +  cr* stbi__float2fixed(1.40200f);
3328       g = y_fixed + (cr*-stbi__float2fixed(0.71414f)) + ((cb*-stbi__float2fixed(0.34414f)) & 0xffff0000);
3329       b = y_fixed                                     +   cb* stbi__float2fixed(1.77200f);
3330       r >>= 20;
3331       g >>= 20;
3332       b >>= 20;
3333       if ((unsigned) r > 255) { if (r < 0) r = 0; else r = 255; }
3334       if ((unsigned) g > 255) { if (g < 0) g = 0; else g = 255; }
3335       if ((unsigned) b > 255) { if (b < 0) b = 0; else b = 255; }
3336       out[0] = (stbi_uc)r;
3337       out[1] = (stbi_uc)g;
3338       out[2] = (stbi_uc)b;
3339       out[3] = 255;
3340       out += step;
3341    }
3342 }
3343 
3344 #if defined(STBI_SSE2) || defined(STBI_NEON)
stbi__YCbCr_to_RGB_simd(stbi_uc * out,stbi_uc const * y,stbi_uc const * pcb,stbi_uc const * pcr,int count,int step)3345 static void stbi__YCbCr_to_RGB_simd(stbi_uc *out, stbi_uc const *y, stbi_uc const *pcb, stbi_uc const *pcr, int count, int step)
3346 {
3347    int i = 0;
3348 
3349 #ifdef STBI_SSE2
3350    // step == 3 is pretty ugly on the final interleave, and i'm not convinced
3351    // it's useful in practice (you wouldn't use it for textures, for example).
3352    // so just accelerate step == 4 case.
3353    if (step == 4) {
3354       // this is a fairly straightforward implementation and not super-optimized.
3355       __m128i signflip  = _mm_set1_epi8(-0x80);
3356       __m128i cr_const0 = _mm_set1_epi16(   (short) ( 1.40200f*4096.0f+0.5f));
3357       __m128i cr_const1 = _mm_set1_epi16( - (short) ( 0.71414f*4096.0f+0.5f));
3358       __m128i cb_const0 = _mm_set1_epi16( - (short) ( 0.34414f*4096.0f+0.5f));
3359       __m128i cb_const1 = _mm_set1_epi16(   (short) ( 1.77200f*4096.0f+0.5f));
3360       __m128i y_bias = _mm_set1_epi8((char) (unsigned char) 128);
3361       __m128i xw = _mm_set1_epi16(255); // alpha channel
3362 
3363       for (; i+7 < count; i += 8) {
3364          // load
3365          __m128i y_bytes = _mm_loadl_epi64((__m128i *) (y+i));
3366          __m128i cr_bytes = _mm_loadl_epi64((__m128i *) (pcr+i));
3367          __m128i cb_bytes = _mm_loadl_epi64((__m128i *) (pcb+i));
3368          __m128i cr_biased = _mm_xor_si128(cr_bytes, signflip); // -128
3369          __m128i cb_biased = _mm_xor_si128(cb_bytes, signflip); // -128
3370 
3371          // unpack to short (and left-shift cr, cb by 8)
3372          __m128i yw  = _mm_unpacklo_epi8(y_bias, y_bytes);
3373          __m128i crw = _mm_unpacklo_epi8(_mm_setzero_si128(), cr_biased);
3374          __m128i cbw = _mm_unpacklo_epi8(_mm_setzero_si128(), cb_biased);
3375 
3376          // color transform
3377          __m128i yws = _mm_srli_epi16(yw, 4);
3378          __m128i cr0 = _mm_mulhi_epi16(cr_const0, crw);
3379          __m128i cb0 = _mm_mulhi_epi16(cb_const0, cbw);
3380          __m128i cb1 = _mm_mulhi_epi16(cbw, cb_const1);
3381          __m128i cr1 = _mm_mulhi_epi16(crw, cr_const1);
3382          __m128i rws = _mm_add_epi16(cr0, yws);
3383          __m128i gwt = _mm_add_epi16(cb0, yws);
3384          __m128i bws = _mm_add_epi16(yws, cb1);
3385          __m128i gws = _mm_add_epi16(gwt, cr1);
3386 
3387          // descale
3388          __m128i rw = _mm_srai_epi16(rws, 4);
3389          __m128i bw = _mm_srai_epi16(bws, 4);
3390          __m128i gw = _mm_srai_epi16(gws, 4);
3391 
3392          // back to byte, set up for transpose
3393          __m128i brb = _mm_packus_epi16(rw, bw);
3394          __m128i gxb = _mm_packus_epi16(gw, xw);
3395 
3396          // transpose to interleave channels
3397          __m128i t0 = _mm_unpacklo_epi8(brb, gxb);
3398          __m128i t1 = _mm_unpackhi_epi8(brb, gxb);
3399          __m128i o0 = _mm_unpacklo_epi16(t0, t1);
3400          __m128i o1 = _mm_unpackhi_epi16(t0, t1);
3401 
3402          // store
3403          _mm_storeu_si128((__m128i *) (out + 0), o0);
3404          _mm_storeu_si128((__m128i *) (out + 16), o1);
3405          out += 32;
3406       }
3407    }
3408 #endif
3409 
3410 #ifdef STBI_NEON
3411    // in this version, step=3 support would be easy to add. but is there demand?
3412    if (step == 4) {
3413       // this is a fairly straightforward implementation and not super-optimized.
3414       uint8x8_t signflip = vdup_n_u8(0x80);
3415       int16x8_t cr_const0 = vdupq_n_s16(   (short) ( 1.40200f*4096.0f+0.5f));
3416       int16x8_t cr_const1 = vdupq_n_s16( - (short) ( 0.71414f*4096.0f+0.5f));
3417       int16x8_t cb_const0 = vdupq_n_s16( - (short) ( 0.34414f*4096.0f+0.5f));
3418       int16x8_t cb_const1 = vdupq_n_s16(   (short) ( 1.77200f*4096.0f+0.5f));
3419 
3420       for (; i+7 < count; i += 8) {
3421          // load
3422          uint8x8_t y_bytes  = vld1_u8(y + i);
3423          uint8x8_t cr_bytes = vld1_u8(pcr + i);
3424          uint8x8_t cb_bytes = vld1_u8(pcb + i);
3425          int8x8_t cr_biased = vreinterpret_s8_u8(vsub_u8(cr_bytes, signflip));
3426          int8x8_t cb_biased = vreinterpret_s8_u8(vsub_u8(cb_bytes, signflip));
3427 
3428          // expand to s16
3429          int16x8_t yws = vreinterpretq_s16_u16(vshll_n_u8(y_bytes, 4));
3430          int16x8_t crw = vshll_n_s8(cr_biased, 7);
3431          int16x8_t cbw = vshll_n_s8(cb_biased, 7);
3432 
3433          // color transform
3434          int16x8_t cr0 = vqdmulhq_s16(crw, cr_const0);
3435          int16x8_t cb0 = vqdmulhq_s16(cbw, cb_const0);
3436          int16x8_t cr1 = vqdmulhq_s16(crw, cr_const1);
3437          int16x8_t cb1 = vqdmulhq_s16(cbw, cb_const1);
3438          int16x8_t rws = vaddq_s16(yws, cr0);
3439          int16x8_t gws = vaddq_s16(vaddq_s16(yws, cb0), cr1);
3440          int16x8_t bws = vaddq_s16(yws, cb1);
3441 
3442          // undo scaling, round, convert to byte
3443          uint8x8x4_t o;
3444          o.val[0] = vqrshrun_n_s16(rws, 4);
3445          o.val[1] = vqrshrun_n_s16(gws, 4);
3446          o.val[2] = vqrshrun_n_s16(bws, 4);
3447          o.val[3] = vdup_n_u8(255);
3448 
3449          // store, interleaving r/g/b/a
3450          vst4_u8(out, o);
3451          out += 8*4;
3452       }
3453    }
3454 #endif
3455 
3456    for (; i < count; ++i) {
3457       int y_fixed = (y[i] << 20) + (1<<19); // rounding
3458       int r,g,b;
3459       int cr = pcr[i] - 128;
3460       int cb = pcb[i] - 128;
3461       r = y_fixed + cr* stbi__float2fixed(1.40200f);
3462       g = y_fixed + cr*-stbi__float2fixed(0.71414f) + ((cb*-stbi__float2fixed(0.34414f)) & 0xffff0000);
3463       b = y_fixed                                   +   cb* stbi__float2fixed(1.77200f);
3464       r >>= 20;
3465       g >>= 20;
3466       b >>= 20;
3467       if ((unsigned) r > 255) { if (r < 0) r = 0; else r = 255; }
3468       if ((unsigned) g > 255) { if (g < 0) g = 0; else g = 255; }
3469       if ((unsigned) b > 255) { if (b < 0) b = 0; else b = 255; }
3470       out[0] = (stbi_uc)r;
3471       out[1] = (stbi_uc)g;
3472       out[2] = (stbi_uc)b;
3473       out[3] = 255;
3474       out += step;
3475    }
3476 }
3477 #endif
3478 
3479 // set up the kernels
stbi__setup_jpeg(stbi__jpeg * j)3480 static void stbi__setup_jpeg(stbi__jpeg *j)
3481 {
3482    j->idct_block_kernel = stbi__idct_block;
3483    j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_row;
3484    j->resample_row_hv_2_kernel = stbi__resample_row_hv_2;
3485 
3486 #ifdef STBI_SSE2
3487    if (stbi__sse2_available()) {
3488       j->idct_block_kernel = stbi__idct_simd;
3489       j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_simd;
3490       j->resample_row_hv_2_kernel = stbi__resample_row_hv_2_simd;
3491    }
3492 #endif
3493 
3494 #ifdef STBI_NEON
3495    j->idct_block_kernel = stbi__idct_simd;
3496    j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_simd;
3497    j->resample_row_hv_2_kernel = stbi__resample_row_hv_2_simd;
3498 #endif
3499 }
3500 
3501 // clean up the temporary component buffers
stbi__cleanup_jpeg(stbi__jpeg * j)3502 static void stbi__cleanup_jpeg(stbi__jpeg *j)
3503 {
3504    stbi__free_jpeg_components(j, j->s->img_n, 0);
3505 }
3506 
3507 typedef struct
3508 {
3509    resample_row_func resample;
3510    stbi_uc *line0,*line1;
3511    int hs,vs;   // expansion factor in each axis
3512    int w_lores; // horizontal pixels pre-expansion
3513    int ystep;   // how far through vertical expansion we are
3514    int ypos;    // which pre-expansion row we're on
3515 } stbi__resample;
3516 
3517 // fast 0..255 * 0..255 => 0..255 rounded multiplication
stbi__blinn_8x8(stbi_uc x,stbi_uc y)3518 static stbi_uc stbi__blinn_8x8(stbi_uc x, stbi_uc y)
3519 {
3520    unsigned int t = x*y + 128;
3521    return (stbi_uc) ((t + (t >>8)) >> 8);
3522 }
3523 
load_jpeg_image(stbi__jpeg * z,int * out_x,int * out_y,int * comp,int req_comp)3524 static stbi_uc *load_jpeg_image(stbi__jpeg *z, int *out_x, int *out_y, int *comp, int req_comp)
3525 {
3526    int n, decode_n, is_rgb;
3527    z->s->img_n = 0; // make stbi__cleanup_jpeg safe
3528 
3529    // validate req_comp
3530    if (req_comp < 0 || req_comp > 4) return stbi__errpuc("bad req_comp", "Internal error");
3531 
3532    // load a jpeg image from whichever source, but leave in YCbCr format
3533    if (!stbi__decode_jpeg_image(z)) { stbi__cleanup_jpeg(z); return NULL; }
3534 
3535    // determine actual number of components to generate
3536    n = req_comp ? req_comp : z->s->img_n >= 3 ? 3 : 1;
3537 
3538    is_rgb = z->s->img_n == 3 && (z->rgb == 3 || (z->app14_color_transform == 0 && !z->jfif));
3539 
3540    if (z->s->img_n == 3 && n < 3 && !is_rgb)
3541       decode_n = 1;
3542    else
3543       decode_n = z->s->img_n;
3544 
3545    // resample and color-convert
3546    {
3547       int k;
3548       unsigned int i,j;
3549       stbi_uc *output;
3550       stbi_uc *coutput[4];
3551 
3552       stbi__resample res_comp[4];
3553 
3554       for (k=0; k < decode_n; ++k) {
3555          stbi__resample *r = &res_comp[k];
3556 
3557          // allocate line buffer big enough for upsampling off the edges
3558          // with upsample factor of 4
3559          z->img_comp[k].linebuf = (stbi_uc *) stbi__malloc(z->s->img_x + 3);
3560          if (!z->img_comp[k].linebuf) { stbi__cleanup_jpeg(z); return stbi__errpuc("outofmem", "Out of memory"); }
3561 
3562          r->hs      = z->img_h_max / z->img_comp[k].h;
3563          r->vs      = z->img_v_max / z->img_comp[k].v;
3564          r->ystep   = r->vs >> 1;
3565          r->w_lores = (z->s->img_x + r->hs-1) / r->hs;
3566          r->ypos    = 0;
3567          r->line0   = r->line1 = z->img_comp[k].data;
3568 
3569          if      (r->hs == 1 && r->vs == 1) r->resample = resample_row_1;
3570          else if (r->hs == 1 && r->vs == 2) r->resample = stbi__resample_row_v_2;
3571          else if (r->hs == 2 && r->vs == 1) r->resample = stbi__resample_row_h_2;
3572          else if (r->hs == 2 && r->vs == 2) r->resample = z->resample_row_hv_2_kernel;
3573          else                               r->resample = stbi__resample_row_generic;
3574       }
3575 
3576       // can't error after this so, this is safe
3577       output = (stbi_uc *) stbi__malloc_mad3(n, z->s->img_x, z->s->img_y, 1);
3578       if (!output) { stbi__cleanup_jpeg(z); return stbi__errpuc("outofmem", "Out of memory"); }
3579 
3580       // now go ahead and resample
3581       for (j=0; j < z->s->img_y; ++j) {
3582          stbi_uc *out = output + n * z->s->img_x * j;
3583          for (k=0; k < decode_n; ++k) {
3584             stbi__resample *r = &res_comp[k];
3585             int y_bot = r->ystep >= (r->vs >> 1);
3586             coutput[k] = r->resample(z->img_comp[k].linebuf,
3587                                      y_bot ? r->line1 : r->line0,
3588                                      y_bot ? r->line0 : r->line1,
3589                                      r->w_lores, r->hs);
3590             if (++r->ystep >= r->vs) {
3591                r->ystep = 0;
3592                r->line0 = r->line1;
3593                if (++r->ypos < z->img_comp[k].y)
3594                   r->line1 += z->img_comp[k].w2;
3595             }
3596          }
3597          if (n >= 3) {
3598             stbi_uc *y = coutput[0];
3599             if (z->s->img_n == 3) {
3600                if (is_rgb) {
3601                   for (i=0; i < z->s->img_x; ++i) {
3602                      out[0] = y[i];
3603                      out[1] = coutput[1][i];
3604                      out[2] = coutput[2][i];
3605                      out[3] = 255;
3606                      out += n;
3607                   }
3608                } else {
3609                   z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n);
3610                }
3611             } else if (z->s->img_n == 4) {
3612                if (z->app14_color_transform == 0) { // CMYK
3613                   for (i=0; i < z->s->img_x; ++i) {
3614                      stbi_uc m = coutput[3][i];
3615                      out[0] = stbi__blinn_8x8(coutput[0][i], m);
3616                      out[1] = stbi__blinn_8x8(coutput[1][i], m);
3617                      out[2] = stbi__blinn_8x8(coutput[2][i], m);
3618                      out[3] = 255;
3619                      out += n;
3620                   }
3621                } else if (z->app14_color_transform == 2) { // YCCK
3622                   z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n);
3623                   for (i=0; i < z->s->img_x; ++i) {
3624                      stbi_uc m = coutput[3][i];
3625                      out[0] = stbi__blinn_8x8(255 - out[0], m);
3626                      out[1] = stbi__blinn_8x8(255 - out[1], m);
3627                      out[2] = stbi__blinn_8x8(255 - out[2], m);
3628                      out += n;
3629                   }
3630                } else { // YCbCr + alpha?  Ignore the fourth channel for now
3631                   z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n);
3632                }
3633             } else
3634                for (i=0; i < z->s->img_x; ++i) {
3635                   out[0] = out[1] = out[2] = y[i];
3636                   out[3] = 255; // not used if n==3
3637                   out += n;
3638                }
3639          } else {
3640             if (is_rgb) {
3641                if (n == 1)
3642                   for (i=0; i < z->s->img_x; ++i)
3643                      *out++ = stbi__compute_y(coutput[0][i], coutput[1][i], coutput[2][i]);
3644                else {
3645                   for (i=0; i < z->s->img_x; ++i, out += 2) {
3646                      out[0] = stbi__compute_y(coutput[0][i], coutput[1][i], coutput[2][i]);
3647                      out[1] = 255;
3648                   }
3649                }
3650             } else if (z->s->img_n == 4 && z->app14_color_transform == 0) {
3651                for (i=0; i < z->s->img_x; ++i) {
3652                   stbi_uc m = coutput[3][i];
3653                   stbi_uc r = stbi__blinn_8x8(coutput[0][i], m);
3654                   stbi_uc g = stbi__blinn_8x8(coutput[1][i], m);
3655                   stbi_uc b = stbi__blinn_8x8(coutput[2][i], m);
3656                   out[0] = stbi__compute_y(r, g, b);
3657                   out[1] = 255;
3658                   out += n;
3659                }
3660             } else if (z->s->img_n == 4 && z->app14_color_transform == 2) {
3661                for (i=0; i < z->s->img_x; ++i) {
3662                   out[0] = stbi__blinn_8x8(255 - coutput[0][i], coutput[3][i]);
3663                   out[1] = 255;
3664                   out += n;
3665                }
3666             } else {
3667                stbi_uc *y = coutput[0];
3668                if (n == 1)
3669                   for (i=0; i < z->s->img_x; ++i) out[i] = y[i];
3670                else
3671                   for (i=0; i < z->s->img_x; ++i) *out++ = y[i], *out++ = 255;
3672             }
3673          }
3674       }
3675       stbi__cleanup_jpeg(z);
3676       *out_x = z->s->img_x;
3677       *out_y = z->s->img_y;
3678       if (comp) *comp = z->s->img_n >= 3 ? 3 : 1; // report original components, not output
3679       return output;
3680    }
3681 }
3682 
stbi__jpeg_load(stbi__context * s,int * x,int * y,int * comp,int req_comp,stbi__result_info * ri)3683 static void *stbi__jpeg_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
3684 {
3685    unsigned char* result;
3686    stbi__jpeg* j = (stbi__jpeg*) stbi__malloc(sizeof(stbi__jpeg));
3687    STBI_NOTUSED(ri);
3688    j->s = s;
3689    stbi__setup_jpeg(j);
3690    result = load_jpeg_image(j, x,y,comp,req_comp);
3691    STBI_FREE(j);
3692    return result;
3693 }
3694 
stbi__jpeg_test(stbi__context * s)3695 static int stbi__jpeg_test(stbi__context *s)
3696 {
3697    int r;
3698    stbi__jpeg* j = (stbi__jpeg*)stbi__malloc(sizeof(stbi__jpeg));
3699    j->s = s;
3700    stbi__setup_jpeg(j);
3701    r = stbi__decode_jpeg_header(j, STBI__SCAN_type);
3702    stbi__rewind(s);
3703    STBI_FREE(j);
3704    return r;
3705 }
3706 
stbi__jpeg_info_raw(stbi__jpeg * j,int * x,int * y,int * comp)3707 static int stbi__jpeg_info_raw(stbi__jpeg *j, int *x, int *y, int *comp)
3708 {
3709    if (!stbi__decode_jpeg_header(j, STBI__SCAN_header)) {
3710       stbi__rewind( j->s );
3711       return 0;
3712    }
3713    if (x) *x = j->s->img_x;
3714    if (y) *y = j->s->img_y;
3715    if (comp) *comp = j->s->img_n >= 3 ? 3 : 1;
3716    return 1;
3717 }
3718 
stbi__jpeg_info(stbi__context * s,int * x,int * y,int * comp)3719 static int stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp)
3720 {
3721    int result;
3722    stbi__jpeg* j = (stbi__jpeg*) (stbi__malloc(sizeof(stbi__jpeg)));
3723    j->s = s;
3724    result = stbi__jpeg_info_raw(j, x, y, comp);
3725    STBI_FREE(j);
3726    return result;
3727 }
3728 #endif
3729 
3730 // public domain zlib decode    v0.2  Sean Barrett 2006-11-18
3731 //    simple implementation
3732 //      - all input must be provided in an upfront buffer
3733 //      - all output is written to a single output buffer (can malloc/realloc)
3734 //    performance
3735 //      - fast huffman
3736 
3737 #ifndef STBI_NO_ZLIB
3738 
3739 // fast-way is faster to check than jpeg huffman, but slow way is slower
3740 #define STBI__ZFAST_BITS  9 // accelerate all cases in default tables
3741 #define STBI__ZFAST_MASK  ((1 << STBI__ZFAST_BITS) - 1)
3742 
3743 // zlib-style huffman encoding
3744 // (jpegs packs from left, zlib from right, so can't share code)
3745 typedef struct
3746 {
3747    stbi__uint16 fast[1 << STBI__ZFAST_BITS];
3748    stbi__uint16 firstcode[16];
3749    int maxcode[17];
3750    stbi__uint16 firstsymbol[16];
3751    stbi_uc  size[288];
3752    stbi__uint16 value[288];
3753 } stbi__zhuffman;
3754 
stbi__bitreverse16(int n)3755 stbi_inline static int stbi__bitreverse16(int n)
3756 {
3757   n = ((n & 0xAAAA) >>  1) | ((n & 0x5555) << 1);
3758   n = ((n & 0xCCCC) >>  2) | ((n & 0x3333) << 2);
3759   n = ((n & 0xF0F0) >>  4) | ((n & 0x0F0F) << 4);
3760   n = ((n & 0xFF00) >>  8) | ((n & 0x00FF) << 8);
3761   return n;
3762 }
3763 
stbi__bit_reverse(int v,int bits)3764 stbi_inline static int stbi__bit_reverse(int v, int bits)
3765 {
3766    STBI_ASSERT(bits <= 16);
3767    // to bit reverse n bits, reverse 16 and shift
3768    // e.g. 11 bits, bit reverse and shift away 5
3769    return stbi__bitreverse16(v) >> (16-bits);
3770 }
3771 
stbi__zbuild_huffman(stbi__zhuffman * z,const stbi_uc * sizelist,int num)3772 static int stbi__zbuild_huffman(stbi__zhuffman *z, const stbi_uc *sizelist, int num)
3773 {
3774    int i,k=0;
3775    int code, next_code[16], sizes[17];
3776 
3777    // DEFLATE spec for generating codes
3778    memset(sizes, 0, sizeof(sizes));
3779    memset(z->fast, 0, sizeof(z->fast));
3780    for (i=0; i < num; ++i)
3781       ++sizes[sizelist[i]];
3782    sizes[0] = 0;
3783    for (i=1; i < 16; ++i)
3784       if (sizes[i] > (1 << i))
3785          return stbi__err("bad sizes", "Corrupt PNG");
3786    code = 0;
3787    for (i=1; i < 16; ++i) {
3788       next_code[i] = code;
3789       z->firstcode[i] = (stbi__uint16) code;
3790       z->firstsymbol[i] = (stbi__uint16) k;
3791       code = (code + sizes[i]);
3792       if (sizes[i])
3793          if (code-1 >= (1 << i)) return stbi__err("bad codelengths","Corrupt PNG");
3794       z->maxcode[i] = code << (16-i); // preshift for inner loop
3795       code <<= 1;
3796       k += sizes[i];
3797    }
3798    z->maxcode[16] = 0x10000; // sentinel
3799    for (i=0; i < num; ++i) {
3800       int s = sizelist[i];
3801       if (s) {
3802          int c = next_code[s] - z->firstcode[s] + z->firstsymbol[s];
3803          stbi__uint16 fastv = (stbi__uint16) ((s << 9) | i);
3804          z->size [c] = (stbi_uc     ) s;
3805          z->value[c] = (stbi__uint16) i;
3806          if (s <= STBI__ZFAST_BITS) {
3807             int j = stbi__bit_reverse(next_code[s],s);
3808             while (j < (1 << STBI__ZFAST_BITS)) {
3809                z->fast[j] = fastv;
3810                j += (1 << s);
3811             }
3812          }
3813          ++next_code[s];
3814       }
3815    }
3816    return 1;
3817 }
3818 
3819 // zlib-from-memory implementation for PNG reading
3820 //    because PNG allows splitting the zlib stream arbitrarily,
3821 //    and it's annoying structurally to have PNG call ZLIB call PNG,
3822 //    we require PNG read all the IDATs and combine them into a single
3823 //    memory buffer
3824 
3825 typedef struct
3826 {
3827    stbi_uc *zbuffer, *zbuffer_end;
3828    int num_bits;
3829    stbi__uint32 code_buffer;
3830 
3831    char *zout;
3832    char *zout_start;
3833    char *zout_end;
3834    int   z_expandable;
3835 
3836    stbi__zhuffman z_length, z_distance;
3837 } stbi__zbuf;
3838 
stbi__zget8(stbi__zbuf * z)3839 stbi_inline static stbi_uc stbi__zget8(stbi__zbuf *z)
3840 {
3841    if (z->zbuffer >= z->zbuffer_end) return 0;
3842    return *z->zbuffer++;
3843 }
3844 
stbi__fill_bits(stbi__zbuf * z)3845 static void stbi__fill_bits(stbi__zbuf *z)
3846 {
3847    do {
3848       STBI_ASSERT(z->code_buffer < (1U << z->num_bits));
3849       z->code_buffer |= (unsigned int) stbi__zget8(z) << z->num_bits;
3850       z->num_bits += 8;
3851    } while (z->num_bits <= 24);
3852 }
3853 
stbi__zreceive(stbi__zbuf * z,int n)3854 stbi_inline static unsigned int stbi__zreceive(stbi__zbuf *z, int n)
3855 {
3856    unsigned int k;
3857    if (z->num_bits < n) stbi__fill_bits(z);
3858    k = z->code_buffer & ((1 << n) - 1);
3859    z->code_buffer >>= n;
3860    z->num_bits -= n;
3861    return k;
3862 }
3863 
stbi__zhuffman_decode_slowpath(stbi__zbuf * a,stbi__zhuffman * z)3864 static int stbi__zhuffman_decode_slowpath(stbi__zbuf *a, stbi__zhuffman *z)
3865 {
3866    int b,s,k;
3867    // not resolved by fast table, so compute it the slow way
3868    // use jpeg approach, which requires MSbits at top
3869    k = stbi__bit_reverse(a->code_buffer, 16);
3870    for (s=STBI__ZFAST_BITS+1; ; ++s)
3871       if (k < z->maxcode[s])
3872          break;
3873    if (s == 16) return -1; // invalid code!
3874    // code size is s, so:
3875    b = (k >> (16-s)) - z->firstcode[s] + z->firstsymbol[s];
3876    STBI_ASSERT(z->size[b] == s);
3877    a->code_buffer >>= s;
3878    a->num_bits -= s;
3879    return z->value[b];
3880 }
3881 
stbi__zhuffman_decode(stbi__zbuf * a,stbi__zhuffman * z)3882 stbi_inline static int stbi__zhuffman_decode(stbi__zbuf *a, stbi__zhuffman *z)
3883 {
3884    int b,s;
3885    if (a->num_bits < 16) stbi__fill_bits(a);
3886    b = z->fast[a->code_buffer & STBI__ZFAST_MASK];
3887    if (b) {
3888       s = b >> 9;
3889       a->code_buffer >>= s;
3890       a->num_bits -= s;
3891       return b & 511;
3892    }
3893    return stbi__zhuffman_decode_slowpath(a, z);
3894 }
3895 
stbi__zexpand(stbi__zbuf * z,char * zout,int n)3896 static int stbi__zexpand(stbi__zbuf *z, char *zout, int n)  // need to make room for n bytes
3897 {
3898    char *q;
3899    int cur, limit, old_limit;
3900    z->zout = zout;
3901    if (!z->z_expandable) return stbi__err("output buffer limit","Corrupt PNG");
3902    cur   = (int) (z->zout     - z->zout_start);
3903    limit = old_limit = (int) (z->zout_end - z->zout_start);
3904    while (cur + n > limit)
3905       limit *= 2;
3906    q = (char *) STBI_REALLOC_SIZED(z->zout_start, old_limit, limit);
3907    STBI_NOTUSED(old_limit);
3908    if (q == NULL) return stbi__err("outofmem", "Out of memory");
3909    z->zout_start = q;
3910    z->zout       = q + cur;
3911    z->zout_end   = q + limit;
3912    return 1;
3913 }
3914 
3915 static int stbi__zlength_base[31] = {
3916    3,4,5,6,7,8,9,10,11,13,
3917    15,17,19,23,27,31,35,43,51,59,
3918    67,83,99,115,131,163,195,227,258,0,0 };
3919 
3920 static int stbi__zlength_extra[31]=
3921 { 0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0,0,0 };
3922 
3923 static int stbi__zdist_base[32] = { 1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193,
3924 257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577,0,0};
3925 
3926 static int stbi__zdist_extra[32] =
3927 { 0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13};
3928 
stbi__parse_huffman_block(stbi__zbuf * a)3929 static int stbi__parse_huffman_block(stbi__zbuf *a)
3930 {
3931    char *zout = a->zout;
3932    for(;;) {
3933       int z = stbi__zhuffman_decode(a, &a->z_length);
3934       if (z < 256) {
3935          if (z < 0) return stbi__err("bad huffman code","Corrupt PNG"); // error in huffman codes
3936          if (zout >= a->zout_end) {
3937             if (!stbi__zexpand(a, zout, 1)) return 0;
3938             zout = a->zout;
3939          }
3940          *zout++ = (char) z;
3941       } else {
3942          stbi_uc *p;
3943          int len,dist;
3944          if (z == 256) {
3945             a->zout = zout;
3946             return 1;
3947          }
3948          z -= 257;
3949          len = stbi__zlength_base[z];
3950          if (stbi__zlength_extra[z]) len += stbi__zreceive(a, stbi__zlength_extra[z]);
3951          z = stbi__zhuffman_decode(a, &a->z_distance);
3952          if (z < 0) return stbi__err("bad huffman code","Corrupt PNG");
3953          dist = stbi__zdist_base[z];
3954          if (stbi__zdist_extra[z]) dist += stbi__zreceive(a, stbi__zdist_extra[z]);
3955          if (zout - a->zout_start < dist) return stbi__err("bad dist","Corrupt PNG");
3956          if (zout + len > a->zout_end) {
3957             if (!stbi__zexpand(a, zout, len)) return 0;
3958             zout = a->zout;
3959          }
3960          p = (stbi_uc *) (zout - dist);
3961          if (dist == 1) { // run of one byte; common in images.
3962             stbi_uc v = *p;
3963             if (len) { do *zout++ = v; while (--len); }
3964          } else {
3965             if (len) { do *zout++ = *p++; while (--len); }
3966          }
3967       }
3968    }
3969 }
3970 
stbi__compute_huffman_codes(stbi__zbuf * a)3971 static int stbi__compute_huffman_codes(stbi__zbuf *a)
3972 {
3973    static stbi_uc length_dezigzag[19] = { 16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15 };
3974    stbi__zhuffman z_codelength;
3975    stbi_uc lencodes[286+32+137];//padding for maximum single op
3976    stbi_uc codelength_sizes[19];
3977    int i,n;
3978 
3979    int hlit  = stbi__zreceive(a,5) + 257;
3980    int hdist = stbi__zreceive(a,5) + 1;
3981    int hclen = stbi__zreceive(a,4) + 4;
3982    int ntot  = hlit + hdist;
3983 
3984    memset(codelength_sizes, 0, sizeof(codelength_sizes));
3985    for (i=0; i < hclen; ++i) {
3986       int s = stbi__zreceive(a,3);
3987       codelength_sizes[length_dezigzag[i]] = (stbi_uc) s;
3988    }
3989    if (!stbi__zbuild_huffman(&z_codelength, codelength_sizes, 19)) return 0;
3990 
3991    n = 0;
3992    while (n < ntot) {
3993       int c = stbi__zhuffman_decode(a, &z_codelength);
3994       if (c < 0 || c >= 19) return stbi__err("bad codelengths", "Corrupt PNG");
3995       if (c < 16)
3996          lencodes[n++] = (stbi_uc) c;
3997       else {
3998          stbi_uc fill = 0;
3999          if (c == 16) {
4000             c = stbi__zreceive(a,2)+3;
4001             if (n == 0) return stbi__err("bad codelengths", "Corrupt PNG");
4002             fill = lencodes[n-1];
4003          } else if (c == 17)
4004             c = stbi__zreceive(a,3)+3;
4005          else {
4006             STBI_ASSERT(c == 18);
4007             c = stbi__zreceive(a,7)+11;
4008          }
4009          if (ntot - n < c) return stbi__err("bad codelengths", "Corrupt PNG");
4010          memset(lencodes+n, fill, c);
4011          n += c;
4012       }
4013    }
4014    if (n != ntot) return stbi__err("bad codelengths","Corrupt PNG");
4015    if (!stbi__zbuild_huffman(&a->z_length, lencodes, hlit)) return 0;
4016    if (!stbi__zbuild_huffman(&a->z_distance, lencodes+hlit, hdist)) return 0;
4017    return 1;
4018 }
4019 
stbi__parse_uncompressed_block(stbi__zbuf * a)4020 static int stbi__parse_uncompressed_block(stbi__zbuf *a)
4021 {
4022    stbi_uc header[4];
4023    int len,nlen,k;
4024    if (a->num_bits & 7)
4025       stbi__zreceive(a, a->num_bits & 7); // discard
4026    // drain the bit-packed data into header
4027    k = 0;
4028    while (a->num_bits > 0) {
4029       header[k++] = (stbi_uc) (a->code_buffer & 255); // suppress MSVC run-time check
4030       a->code_buffer >>= 8;
4031       a->num_bits -= 8;
4032    }
4033    STBI_ASSERT(a->num_bits == 0);
4034    // now fill header the normal way
4035    while (k < 4)
4036       header[k++] = stbi__zget8(a);
4037    len  = header[1] * 256 + header[0];
4038    nlen = header[3] * 256 + header[2];
4039    if (nlen != (len ^ 0xffff)) return stbi__err("zlib corrupt","Corrupt PNG");
4040    if (a->zbuffer + len > a->zbuffer_end) return stbi__err("read past buffer","Corrupt PNG");
4041    if (a->zout + len > a->zout_end)
4042       if (!stbi__zexpand(a, a->zout, len)) return 0;
4043    memcpy(a->zout, a->zbuffer, len);
4044    a->zbuffer += len;
4045    a->zout += len;
4046    return 1;
4047 }
4048 
stbi__parse_zlib_header(stbi__zbuf * a)4049 static int stbi__parse_zlib_header(stbi__zbuf *a)
4050 {
4051    int cmf   = stbi__zget8(a);
4052    int cm    = cmf & 15;
4053    /* int cinfo = cmf >> 4; */
4054    int flg   = stbi__zget8(a);
4055    if ((cmf*256+flg) % 31 != 0) return stbi__err("bad zlib header","Corrupt PNG"); // zlib spec
4056    if (flg & 32) return stbi__err("no preset dict","Corrupt PNG"); // preset dictionary not allowed in png
4057    if (cm != 8) return stbi__err("bad compression","Corrupt PNG"); // DEFLATE required for png
4058    // window = 1 << (8 + cinfo)... but who cares, we fully buffer output
4059    return 1;
4060 }
4061 
4062 static const stbi_uc stbi__zdefault_length[288] =
4063 {
4064    8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
4065    8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
4066    8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
4067    8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
4068    8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
4069    9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
4070    9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
4071    9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
4072    7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,8,8,8,8,8,8,8,8
4073 };
4074 static const stbi_uc stbi__zdefault_distance[32] =
4075 {
4076    5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5
4077 };
4078 /*
4079 Init algorithm:
4080 {
4081    int i;   // use <= to match clearly with spec
4082    for (i=0; i <= 143; ++i)     stbi__zdefault_length[i]   = 8;
4083    for (   ; i <= 255; ++i)     stbi__zdefault_length[i]   = 9;
4084    for (   ; i <= 279; ++i)     stbi__zdefault_length[i]   = 7;
4085    for (   ; i <= 287; ++i)     stbi__zdefault_length[i]   = 8;
4086 
4087    for (i=0; i <=  31; ++i)     stbi__zdefault_distance[i] = 5;
4088 }
4089 */
4090 
stbi__parse_zlib(stbi__zbuf * a,int parse_header)4091 static int stbi__parse_zlib(stbi__zbuf *a, int parse_header)
4092 {
4093    int final, type;
4094    if (parse_header)
4095       if (!stbi__parse_zlib_header(a)) return 0;
4096    a->num_bits = 0;
4097    a->code_buffer = 0;
4098    do {
4099       final = stbi__zreceive(a,1);
4100       type = stbi__zreceive(a,2);
4101       if (type == 0) {
4102          if (!stbi__parse_uncompressed_block(a)) return 0;
4103       } else if (type == 3) {
4104          return 0;
4105       } else {
4106          if (type == 1) {
4107             // use fixed code lengths
4108             if (!stbi__zbuild_huffman(&a->z_length  , stbi__zdefault_length  , 288)) return 0;
4109             if (!stbi__zbuild_huffman(&a->z_distance, stbi__zdefault_distance,  32)) return 0;
4110          } else {
4111             if (!stbi__compute_huffman_codes(a)) return 0;
4112          }
4113          if (!stbi__parse_huffman_block(a)) return 0;
4114       }
4115    } while (!final);
4116    return 1;
4117 }
4118 
stbi__do_zlib(stbi__zbuf * a,char * obuf,int olen,int exp,int parse_header)4119 static int stbi__do_zlib(stbi__zbuf *a, char *obuf, int olen, int exp, int parse_header)
4120 {
4121    a->zout_start = obuf;
4122    a->zout       = obuf;
4123    a->zout_end   = obuf + olen;
4124    a->z_expandable = exp;
4125 
4126    return stbi__parse_zlib(a, parse_header);
4127 }
4128 
stbi_zlib_decode_malloc_guesssize(const char * buffer,int len,int initial_size,int * outlen)4129 STBIDEF char *stbi_zlib_decode_malloc_guesssize(const char *buffer, int len, int initial_size, int *outlen)
4130 {
4131    stbi__zbuf a;
4132    char *p = (char *) stbi__malloc(initial_size);
4133    if (p == NULL) return NULL;
4134    a.zbuffer = (stbi_uc *) buffer;
4135    a.zbuffer_end = (stbi_uc *) buffer + len;
4136    if (stbi__do_zlib(&a, p, initial_size, 1, 1)) {
4137       if (outlen) *outlen = (int) (a.zout - a.zout_start);
4138       return a.zout_start;
4139    } else {
4140       STBI_FREE(a.zout_start);
4141       return NULL;
4142    }
4143 }
4144 
stbi_zlib_decode_malloc(char const * buffer,int len,int * outlen)4145 STBIDEF char *stbi_zlib_decode_malloc(char const *buffer, int len, int *outlen)
4146 {
4147    return stbi_zlib_decode_malloc_guesssize(buffer, len, 16384, outlen);
4148 }
4149 
stbi_zlib_decode_malloc_guesssize_headerflag(const char * buffer,int len,int initial_size,int * outlen,int parse_header)4150 STBIDEF char *stbi_zlib_decode_malloc_guesssize_headerflag(const char *buffer, int len, int initial_size, int *outlen, int parse_header)
4151 {
4152    stbi__zbuf a;
4153    char *p = (char *) stbi__malloc(initial_size);
4154    if (p == NULL) return NULL;
4155    a.zbuffer = (stbi_uc *) buffer;
4156    a.zbuffer_end = (stbi_uc *) buffer + len;
4157    if (stbi__do_zlib(&a, p, initial_size, 1, parse_header)) {
4158       if (outlen) *outlen = (int) (a.zout - a.zout_start);
4159       return a.zout_start;
4160    } else {
4161       STBI_FREE(a.zout_start);
4162       return NULL;
4163    }
4164 }
4165 
stbi_zlib_decode_buffer(char * obuffer,int olen,char const * ibuffer,int ilen)4166 STBIDEF int stbi_zlib_decode_buffer(char *obuffer, int olen, char const *ibuffer, int ilen)
4167 {
4168    stbi__zbuf a;
4169    a.zbuffer = (stbi_uc *) ibuffer;
4170    a.zbuffer_end = (stbi_uc *) ibuffer + ilen;
4171    if (stbi__do_zlib(&a, obuffer, olen, 0, 1))
4172       return (int) (a.zout - a.zout_start);
4173    else
4174       return -1;
4175 }
4176 
stbi_zlib_decode_noheader_malloc(char const * buffer,int len,int * outlen)4177 STBIDEF char *stbi_zlib_decode_noheader_malloc(char const *buffer, int len, int *outlen)
4178 {
4179    stbi__zbuf a;
4180    char *p = (char *) stbi__malloc(16384);
4181    if (p == NULL) return NULL;
4182    a.zbuffer = (stbi_uc *) buffer;
4183    a.zbuffer_end = (stbi_uc *) buffer+len;
4184    if (stbi__do_zlib(&a, p, 16384, 1, 0)) {
4185       if (outlen) *outlen = (int) (a.zout - a.zout_start);
4186       return a.zout_start;
4187    } else {
4188       STBI_FREE(a.zout_start);
4189       return NULL;
4190    }
4191 }
4192 
stbi_zlib_decode_noheader_buffer(char * obuffer,int olen,const char * ibuffer,int ilen)4193 STBIDEF int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char *ibuffer, int ilen)
4194 {
4195    stbi__zbuf a;
4196    a.zbuffer = (stbi_uc *) ibuffer;
4197    a.zbuffer_end = (stbi_uc *) ibuffer + ilen;
4198    if (stbi__do_zlib(&a, obuffer, olen, 0, 0))
4199       return (int) (a.zout - a.zout_start);
4200    else
4201       return -1;
4202 }
4203 #endif
4204 
4205 // public domain "baseline" PNG decoder   v0.10  Sean Barrett 2006-11-18
4206 //    simple implementation
4207 //      - only 8-bit samples
4208 //      - no CRC checking
4209 //      - allocates lots of intermediate memory
4210 //        - avoids problem of streaming data between subsystems
4211 //        - avoids explicit window management
4212 //    performance
4213 //      - uses stb_zlib, a PD zlib implementation with fast huffman decoding
4214 
4215 #ifndef STBI_NO_PNG
4216 typedef struct
4217 {
4218    stbi__uint32 length;
4219    stbi__uint32 type;
4220 } stbi__pngchunk;
4221 
stbi__get_chunk_header(stbi__context * s)4222 static stbi__pngchunk stbi__get_chunk_header(stbi__context *s)
4223 {
4224    stbi__pngchunk c;
4225    c.length = stbi__get32be(s);
4226    c.type   = stbi__get32be(s);
4227    return c;
4228 }
4229 
stbi__check_png_header(stbi__context * s)4230 static int stbi__check_png_header(stbi__context *s)
4231 {
4232    static stbi_uc png_sig[8] = { 137,80,78,71,13,10,26,10 };
4233    int i;
4234    for (i=0; i < 8; ++i)
4235       if (stbi__get8(s) != png_sig[i]) return stbi__err("bad png sig","Not a PNG");
4236    return 1;
4237 }
4238 
4239 typedef struct
4240 {
4241    stbi__context *s;
4242    stbi_uc *idata, *expanded, *out;
4243    int depth;
4244 } stbi__png;
4245 
4246 
4247 enum {
4248    STBI__F_none=0,
4249    STBI__F_sub=1,
4250    STBI__F_up=2,
4251    STBI__F_avg=3,
4252    STBI__F_paeth=4,
4253    // synthetic filters used for first scanline to avoid needing a dummy row of 0s
4254    STBI__F_avg_first,
4255    STBI__F_paeth_first
4256 };
4257 
4258 static stbi_uc first_row_filter[5] =
4259 {
4260    STBI__F_none,
4261    STBI__F_sub,
4262    STBI__F_none,
4263    STBI__F_avg_first,
4264    STBI__F_paeth_first
4265 };
4266 
stbi__paeth(int a,int b,int c)4267 static int stbi__paeth(int a, int b, int c)
4268 {
4269    int p = a + b - c;
4270    int pa = abs(p-a);
4271    int pb = abs(p-b);
4272    int pc = abs(p-c);
4273    if (pa <= pb && pa <= pc) return a;
4274    if (pb <= pc) return b;
4275    return c;
4276 }
4277 
4278 static stbi_uc stbi__depth_scale_table[9] = { 0, 0xff, 0x55, 0, 0x11, 0,0,0, 0x01 };
4279 
4280 // create the png data from post-deflated data
stbi__create_png_image_raw(stbi__png * a,stbi_uc * raw,stbi__uint32 raw_len,int out_n,stbi__uint32 x,stbi__uint32 y,int depth,int color)4281 static int stbi__create_png_image_raw(stbi__png *a, stbi_uc *raw, stbi__uint32 raw_len, int out_n, stbi__uint32 x, stbi__uint32 y, int depth, int color)
4282 {
4283    int bytes = (depth == 16? 2 : 1);
4284    stbi__context *s = a->s;
4285    stbi__uint32 i,j,stride = x*out_n*bytes;
4286    stbi__uint32 img_len, img_width_bytes;
4287    int k;
4288    int img_n = s->img_n; // copy it into a local for later
4289 
4290    int output_bytes = out_n*bytes;
4291    int filter_bytes = img_n*bytes;
4292    int width = x;
4293 
4294    STBI_ASSERT(out_n == s->img_n || out_n == s->img_n+1);
4295    a->out = (stbi_uc *) stbi__malloc_mad3(x, y, output_bytes, 0); // extra bytes to write off the end into
4296    if (!a->out) return stbi__err("outofmem", "Out of memory");
4297 
4298    img_width_bytes = (((img_n * x * depth) + 7) >> 3);
4299    img_len = (img_width_bytes + 1) * y;
4300    // we used to check for exact match between raw_len and img_len on non-interlaced PNGs,
4301    // but issue #276 reported a PNG in the wild that had extra data at the end (all zeros),
4302    // so just check for raw_len < img_len always.
4303    if (raw_len < img_len) return stbi__err("not enough pixels","Corrupt PNG");
4304 
4305    for (j=0; j < y; ++j) {
4306       stbi_uc *cur = a->out + stride*j;
4307       stbi_uc *prior;
4308       int filter = *raw++;
4309 
4310       if (filter > 4)
4311          return stbi__err("invalid filter","Corrupt PNG");
4312 
4313       if (depth < 8) {
4314          STBI_ASSERT(img_width_bytes <= x);
4315          cur += x*out_n - img_width_bytes; // store output to the rightmost img_len bytes, so we can decode in place
4316          filter_bytes = 1;
4317          width = img_width_bytes;
4318       }
4319       prior = cur - stride; // bugfix: need to compute this after 'cur +=' computation above
4320 
4321       // if first row, use special filter that doesn't sample previous row
4322       if (j == 0) filter = first_row_filter[filter];
4323 
4324       // handle first byte explicitly
4325       for (k=0; k < filter_bytes; ++k) {
4326          switch (filter) {
4327             case STBI__F_none       : cur[k] = raw[k]; break;
4328             case STBI__F_sub        : cur[k] = raw[k]; break;
4329             case STBI__F_up         : cur[k] = STBI__BYTECAST(raw[k] + prior[k]); break;
4330             case STBI__F_avg        : cur[k] = STBI__BYTECAST(raw[k] + (prior[k]>>1)); break;
4331             case STBI__F_paeth      : cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(0,prior[k],0)); break;
4332             case STBI__F_avg_first  : cur[k] = raw[k]; break;
4333             case STBI__F_paeth_first: cur[k] = raw[k]; break;
4334          }
4335       }
4336 
4337       if (depth == 8) {
4338          if (img_n != out_n)
4339             cur[img_n] = 255; // first pixel
4340          raw += img_n;
4341          cur += out_n;
4342          prior += out_n;
4343       } else if (depth == 16) {
4344          if (img_n != out_n) {
4345             cur[filter_bytes]   = 255; // first pixel top byte
4346             cur[filter_bytes+1] = 255; // first pixel bottom byte
4347          }
4348          raw += filter_bytes;
4349          cur += output_bytes;
4350          prior += output_bytes;
4351       } else {
4352          raw += 1;
4353          cur += 1;
4354          prior += 1;
4355       }
4356 
4357       // this is a little gross, so that we don't switch per-pixel or per-component
4358       if (depth < 8 || img_n == out_n) {
4359          int nk = (width - 1)*filter_bytes;
4360          #define STBI__CASE(f) \
4361              case f:     \
4362                 for (k=0; k < nk; ++k)
4363          switch (filter) {
4364             // "none" filter turns into a memcpy here; make that explicit.
4365             case STBI__F_none:         memcpy(cur, raw, nk); break;
4366             STBI__CASE(STBI__F_sub)          { cur[k] = STBI__BYTECAST(raw[k] + cur[k-filter_bytes]); } break;
4367             STBI__CASE(STBI__F_up)           { cur[k] = STBI__BYTECAST(raw[k] + prior[k]); } break;
4368             STBI__CASE(STBI__F_avg)          { cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k-filter_bytes])>>1)); } break;
4369             STBI__CASE(STBI__F_paeth)        { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-filter_bytes],prior[k],prior[k-filter_bytes])); } break;
4370             STBI__CASE(STBI__F_avg_first)    { cur[k] = STBI__BYTECAST(raw[k] + (cur[k-filter_bytes] >> 1)); } break;
4371             STBI__CASE(STBI__F_paeth_first)  { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-filter_bytes],0,0)); } break;
4372          }
4373          #undef STBI__CASE
4374          raw += nk;
4375       } else {
4376          STBI_ASSERT(img_n+1 == out_n);
4377          #define STBI__CASE(f) \
4378              case f:     \
4379                 for (i=x-1; i >= 1; --i, cur[filter_bytes]=255,raw+=filter_bytes,cur+=output_bytes,prior+=output_bytes) \
4380                    for (k=0; k < filter_bytes; ++k)
4381          switch (filter) {
4382             STBI__CASE(STBI__F_none)         { cur[k] = raw[k]; } break;
4383             STBI__CASE(STBI__F_sub)          { cur[k] = STBI__BYTECAST(raw[k] + cur[k- output_bytes]); } break;
4384             STBI__CASE(STBI__F_up)           { cur[k] = STBI__BYTECAST(raw[k] + prior[k]); } break;
4385             STBI__CASE(STBI__F_avg)          { cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k- output_bytes])>>1)); } break;
4386             STBI__CASE(STBI__F_paeth)        { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k- output_bytes],prior[k],prior[k- output_bytes])); } break;
4387             STBI__CASE(STBI__F_avg_first)    { cur[k] = STBI__BYTECAST(raw[k] + (cur[k- output_bytes] >> 1)); } break;
4388             STBI__CASE(STBI__F_paeth_first)  { cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k- output_bytes],0,0)); } break;
4389          }
4390          #undef STBI__CASE
4391 
4392          // the loop above sets the high byte of the pixels' alpha, but for
4393          // 16 bit png files we also need the low byte set. we'll do that here.
4394          if (depth == 16) {
4395             cur = a->out + stride*j; // start at the beginning of the row again
4396             for (i=0; i < x; ++i,cur+=output_bytes) {
4397                cur[filter_bytes+1] = 255;
4398             }
4399          }
4400       }
4401    }
4402 
4403    // we make a separate pass to expand bits to pixels; for performance,
4404    // this could run two scanlines behind the above code, so it won't
4405    // intefere with filtering but will still be in the cache.
4406    if (depth < 8) {
4407       for (j=0; j < y; ++j) {
4408          stbi_uc *cur = a->out + stride*j;
4409          stbi_uc *in  = a->out + stride*j + x*out_n - img_width_bytes;
4410          // unpack 1/2/4-bit into a 8-bit buffer. allows us to keep the common 8-bit path optimal at minimal cost for 1/2/4-bit
4411          // png guarante byte alignment, if width is not multiple of 8/4/2 we'll decode dummy trailing data that will be skipped in the later loop
4412          stbi_uc scale = (color == 0) ? stbi__depth_scale_table[depth] : 1; // scale grayscale values to 0..255 range
4413 
4414          // note that the final byte might overshoot and write more data than desired.
4415          // we can allocate enough data that this never writes out of memory, but it
4416          // could also overwrite the next scanline. can it overwrite non-empty data
4417          // on the next scanline? yes, consider 1-pixel-wide scanlines with 1-bit-per-pixel.
4418          // so we need to explicitly clamp the final ones
4419 
4420          if (depth == 4) {
4421             for (k=x*img_n; k >= 2; k-=2, ++in) {
4422                *cur++ = scale * ((*in >> 4)       );
4423                *cur++ = scale * ((*in     ) & 0x0f);
4424             }
4425             if (k > 0) *cur++ = scale * ((*in >> 4)       );
4426          } else if (depth == 2) {
4427             for (k=x*img_n; k >= 4; k-=4, ++in) {
4428                *cur++ = scale * ((*in >> 6)       );
4429                *cur++ = scale * ((*in >> 4) & 0x03);
4430                *cur++ = scale * ((*in >> 2) & 0x03);
4431                *cur++ = scale * ((*in     ) & 0x03);
4432             }
4433             if (k > 0) *cur++ = scale * ((*in >> 6)       );
4434             if (k > 1) *cur++ = scale * ((*in >> 4) & 0x03);
4435             if (k > 2) *cur++ = scale * ((*in >> 2) & 0x03);
4436          } else if (depth == 1) {
4437             for (k=x*img_n; k >= 8; k-=8, ++in) {
4438                *cur++ = scale * ((*in >> 7)       );
4439                *cur++ = scale * ((*in >> 6) & 0x01);
4440                *cur++ = scale * ((*in >> 5) & 0x01);
4441                *cur++ = scale * ((*in >> 4) & 0x01);
4442                *cur++ = scale * ((*in >> 3) & 0x01);
4443                *cur++ = scale * ((*in >> 2) & 0x01);
4444                *cur++ = scale * ((*in >> 1) & 0x01);
4445                *cur++ = scale * ((*in     ) & 0x01);
4446             }
4447             if (k > 0) *cur++ = scale * ((*in >> 7)       );
4448             if (k > 1) *cur++ = scale * ((*in >> 6) & 0x01);
4449             if (k > 2) *cur++ = scale * ((*in >> 5) & 0x01);
4450             if (k > 3) *cur++ = scale * ((*in >> 4) & 0x01);
4451             if (k > 4) *cur++ = scale * ((*in >> 3) & 0x01);
4452             if (k > 5) *cur++ = scale * ((*in >> 2) & 0x01);
4453             if (k > 6) *cur++ = scale * ((*in >> 1) & 0x01);
4454          }
4455          if (img_n != out_n) {
4456             int q;
4457             // insert alpha = 255
4458             cur = a->out + stride*j;
4459             if (img_n == 1) {
4460                for (q=x-1; q >= 0; --q) {
4461                   cur[q*2+1] = 255;
4462                   cur[q*2+0] = cur[q];
4463                }
4464             } else {
4465                STBI_ASSERT(img_n == 3);
4466                for (q=x-1; q >= 0; --q) {
4467                   cur[q*4+3] = 255;
4468                   cur[q*4+2] = cur[q*3+2];
4469                   cur[q*4+1] = cur[q*3+1];
4470                   cur[q*4+0] = cur[q*3+0];
4471                }
4472             }
4473          }
4474       }
4475    } else if (depth == 16) {
4476       // force the image data from big-endian to platform-native.
4477       // this is done in a separate pass due to the decoding relying
4478       // on the data being untouched, but could probably be done
4479       // per-line during decode if care is taken.
4480       stbi_uc *cur = a->out;
4481       stbi__uint16 *cur16 = (stbi__uint16*)cur;
4482 
4483       for(i=0; i < x*y*out_n; ++i,cur16++,cur+=2) {
4484          *cur16 = (cur[0] << 8) | cur[1];
4485       }
4486    }
4487 
4488    return 1;
4489 }
4490 
stbi__create_png_image(stbi__png * a,stbi_uc * image_data,stbi__uint32 image_data_len,int out_n,int depth,int color,int interlaced)4491 static int stbi__create_png_image(stbi__png *a, stbi_uc *image_data, stbi__uint32 image_data_len, int out_n, int depth, int color, int interlaced)
4492 {
4493    int bytes = (depth == 16 ? 2 : 1);
4494    int out_bytes = out_n * bytes;
4495    stbi_uc *final;
4496    int p;
4497    if (!interlaced)
4498       return stbi__create_png_image_raw(a, image_data, image_data_len, out_n, a->s->img_x, a->s->img_y, depth, color);
4499 
4500    // de-interlacing
4501    final = (stbi_uc *) stbi__malloc_mad3(a->s->img_x, a->s->img_y, out_bytes, 0);
4502    for (p=0; p < 7; ++p) {
4503       int xorig[] = { 0,4,0,2,0,1,0 };
4504       int yorig[] = { 0,0,4,0,2,0,1 };
4505       int xspc[]  = { 8,8,4,4,2,2,1 };
4506       int yspc[]  = { 8,8,8,4,4,2,2 };
4507       int i,j,x,y;
4508       // pass1_x[4] = 0, pass1_x[5] = 1, pass1_x[12] = 1
4509       x = (a->s->img_x - xorig[p] + xspc[p]-1) / xspc[p];
4510       y = (a->s->img_y - yorig[p] + yspc[p]-1) / yspc[p];
4511       if (x && y) {
4512          stbi__uint32 img_len = ((((a->s->img_n * x * depth) + 7) >> 3) + 1) * y;
4513          if (!stbi__create_png_image_raw(a, image_data, image_data_len, out_n, x, y, depth, color)) {
4514             STBI_FREE(final);
4515             return 0;
4516          }
4517          for (j=0; j < y; ++j) {
4518             for (i=0; i < x; ++i) {
4519                int out_y = j*yspc[p]+yorig[p];
4520                int out_x = i*xspc[p]+xorig[p];
4521                memcpy(final + out_y*a->s->img_x*out_bytes + out_x*out_bytes,
4522                       a->out + (j*x+i)*out_bytes, out_bytes);
4523             }
4524          }
4525          STBI_FREE(a->out);
4526          image_data += img_len;
4527          image_data_len -= img_len;
4528       }
4529    }
4530    a->out = final;
4531 
4532    return 1;
4533 }
4534 
stbi__compute_transparency(stbi__png * z,stbi_uc tc[3],int out_n)4535 static int stbi__compute_transparency(stbi__png *z, stbi_uc tc[3], int out_n)
4536 {
4537    stbi__context *s = z->s;
4538    stbi__uint32 i, pixel_count = s->img_x * s->img_y;
4539    stbi_uc *p = z->out;
4540 
4541    // compute color-based transparency, assuming we've
4542    // already got 255 as the alpha value in the output
4543    STBI_ASSERT(out_n == 2 || out_n == 4);
4544 
4545    if (out_n == 2) {
4546       for (i=0; i < pixel_count; ++i) {
4547          p[1] = (p[0] == tc[0] ? 0 : 255);
4548          p += 2;
4549       }
4550    } else {
4551       for (i=0; i < pixel_count; ++i) {
4552          if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2])
4553             p[3] = 0;
4554          p += 4;
4555       }
4556    }
4557    return 1;
4558 }
4559 
stbi__compute_transparency16(stbi__png * z,stbi__uint16 tc[3],int out_n)4560 static int stbi__compute_transparency16(stbi__png *z, stbi__uint16 tc[3], int out_n)
4561 {
4562    stbi__context *s = z->s;
4563    stbi__uint32 i, pixel_count = s->img_x * s->img_y;
4564    stbi__uint16 *p = (stbi__uint16*) z->out;
4565 
4566    // compute color-based transparency, assuming we've
4567    // already got 65535 as the alpha value in the output
4568    STBI_ASSERT(out_n == 2 || out_n == 4);
4569 
4570    if (out_n == 2) {
4571       for (i = 0; i < pixel_count; ++i) {
4572          p[1] = (p[0] == tc[0] ? 0 : 65535);
4573          p += 2;
4574       }
4575    } else {
4576       for (i = 0; i < pixel_count; ++i) {
4577          if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2])
4578             p[3] = 0;
4579          p += 4;
4580       }
4581    }
4582    return 1;
4583 }
4584 
stbi__expand_png_palette(stbi__png * a,stbi_uc * palette,int len,int pal_img_n)4585 static int stbi__expand_png_palette(stbi__png *a, stbi_uc *palette, int len, int pal_img_n)
4586 {
4587    stbi__uint32 i, pixel_count = a->s->img_x * a->s->img_y;
4588    stbi_uc *p, *temp_out, *orig = a->out;
4589 
4590    p = (stbi_uc *) stbi__malloc_mad2(pixel_count, pal_img_n, 0);
4591    if (p == NULL) return stbi__err("outofmem", "Out of memory");
4592 
4593    // between here and free(out) below, exitting would leak
4594    temp_out = p;
4595 
4596    if (pal_img_n == 3) {
4597       for (i=0; i < pixel_count; ++i) {
4598          int n = orig[i]*4;
4599          p[0] = palette[n  ];
4600          p[1] = palette[n+1];
4601          p[2] = palette[n+2];
4602          p += 3;
4603       }
4604    } else {
4605       for (i=0; i < pixel_count; ++i) {
4606          int n = orig[i]*4;
4607          p[0] = palette[n  ];
4608          p[1] = palette[n+1];
4609          p[2] = palette[n+2];
4610          p[3] = palette[n+3];
4611          p += 4;
4612       }
4613    }
4614    STBI_FREE(a->out);
4615    a->out = temp_out;
4616 
4617    STBI_NOTUSED(len);
4618 
4619    return 1;
4620 }
4621 
4622 static int stbi__unpremultiply_on_load = 0;
4623 static int stbi__de_iphone_flag = 0;
4624 
stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply)4625 STBIDEF void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply)
4626 {
4627    stbi__unpremultiply_on_load = flag_true_if_should_unpremultiply;
4628 }
4629 
stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert)4630 STBIDEF void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert)
4631 {
4632    stbi__de_iphone_flag = flag_true_if_should_convert;
4633 }
4634 
stbi__de_iphone(stbi__png * z)4635 static void stbi__de_iphone(stbi__png *z)
4636 {
4637    stbi__context *s = z->s;
4638    stbi__uint32 i, pixel_count = s->img_x * s->img_y;
4639    stbi_uc *p = z->out;
4640 
4641    if (s->img_out_n == 3) {  // convert bgr to rgb
4642       for (i=0; i < pixel_count; ++i) {
4643          stbi_uc t = p[0];
4644          p[0] = p[2];
4645          p[2] = t;
4646          p += 3;
4647       }
4648    } else {
4649       STBI_ASSERT(s->img_out_n == 4);
4650       if (stbi__unpremultiply_on_load) {
4651          // convert bgr to rgb and unpremultiply
4652          for (i=0; i < pixel_count; ++i) {
4653             stbi_uc a = p[3];
4654             stbi_uc t = p[0];
4655             if (a) {
4656                stbi_uc half = a / 2;
4657                p[0] = (p[2] * 255 + half) / a;
4658                p[1] = (p[1] * 255 + half) / a;
4659                p[2] = ( t   * 255 + half) / a;
4660             } else {
4661                p[0] = p[2];
4662                p[2] = t;
4663             }
4664             p += 4;
4665          }
4666       } else {
4667          // convert bgr to rgb
4668          for (i=0; i < pixel_count; ++i) {
4669             stbi_uc t = p[0];
4670             p[0] = p[2];
4671             p[2] = t;
4672             p += 4;
4673          }
4674       }
4675    }
4676 }
4677 
4678 #define STBI__PNG_TYPE(a,b,c,d)  (((a) << 24) + ((b) << 16) + ((c) << 8) + (d))
4679 
stbi__parse_png_file(stbi__png * z,int scan,int req_comp)4680 static int stbi__parse_png_file(stbi__png *z, int scan, int req_comp)
4681 {
4682    stbi_uc palette[1024], pal_img_n=0;
4683    stbi_uc has_trans=0, tc[3];
4684    stbi__uint16 tc16[3];
4685    stbi__uint32 ioff=0, idata_limit=0, i, pal_len=0;
4686    int first=1,k,interlace=0, color=0, is_iphone=0;
4687    stbi__context *s = z->s;
4688 
4689    z->expanded = NULL;
4690    z->idata = NULL;
4691    z->out = NULL;
4692 
4693    if (!stbi__check_png_header(s)) return 0;
4694 
4695    if (scan == STBI__SCAN_type) return 1;
4696 
4697    for (;;) {
4698       stbi__pngchunk c = stbi__get_chunk_header(s);
4699       switch (c.type) {
4700          case STBI__PNG_TYPE('C','g','B','I'):
4701             is_iphone = 1;
4702             stbi__skip(s, c.length);
4703             break;
4704          case STBI__PNG_TYPE('I','H','D','R'): {
4705             int comp,filter;
4706             if (!first) return stbi__err("multiple IHDR","Corrupt PNG");
4707             first = 0;
4708             if (c.length != 13) return stbi__err("bad IHDR len","Corrupt PNG");
4709             s->img_x = stbi__get32be(s); if (s->img_x > (1 << 24)) return stbi__err("too large","Very large image (corrupt?)");
4710             s->img_y = stbi__get32be(s); if (s->img_y > (1 << 24)) return stbi__err("too large","Very large image (corrupt?)");
4711             z->depth = stbi__get8(s);  if (z->depth != 1 && z->depth != 2 && z->depth != 4 && z->depth != 8 && z->depth != 16)  return stbi__err("1/2/4/8/16-bit only","PNG not supported: 1/2/4/8/16-bit only");
4712             color = stbi__get8(s);  if (color > 6)         return stbi__err("bad ctype","Corrupt PNG");
4713             if (color == 3 && z->depth == 16)                  return stbi__err("bad ctype","Corrupt PNG");
4714             if (color == 3) pal_img_n = 3; else if (color & 1) return stbi__err("bad ctype","Corrupt PNG");
4715             comp  = stbi__get8(s);  if (comp) return stbi__err("bad comp method","Corrupt PNG");
4716             filter= stbi__get8(s);  if (filter) return stbi__err("bad filter method","Corrupt PNG");
4717             interlace = stbi__get8(s); if (interlace>1) return stbi__err("bad interlace method","Corrupt PNG");
4718             if (!s->img_x || !s->img_y) return stbi__err("0-pixel image","Corrupt PNG");
4719             if (!pal_img_n) {
4720                s->img_n = (color & 2 ? 3 : 1) + (color & 4 ? 1 : 0);
4721                if ((1 << 30) / s->img_x / s->img_n < s->img_y) return stbi__err("too large", "Image too large to decode");
4722                if (scan == STBI__SCAN_header) return 1;
4723             } else {
4724                // if paletted, then pal_n is our final components, and
4725                // img_n is # components to decompress/filter.
4726                s->img_n = 1;
4727                if ((1 << 30) / s->img_x / 4 < s->img_y) return stbi__err("too large","Corrupt PNG");
4728                // if SCAN_header, have to scan to see if we have a tRNS
4729             }
4730             break;
4731          }
4732 
4733          case STBI__PNG_TYPE('P','L','T','E'):  {
4734             if (first) return stbi__err("first not IHDR", "Corrupt PNG");
4735             if (c.length > 256*3) return stbi__err("invalid PLTE","Corrupt PNG");
4736             pal_len = c.length / 3;
4737             if (pal_len * 3 != c.length) return stbi__err("invalid PLTE","Corrupt PNG");
4738             for (i=0; i < pal_len; ++i) {
4739                palette[i*4+0] = stbi__get8(s);
4740                palette[i*4+1] = stbi__get8(s);
4741                palette[i*4+2] = stbi__get8(s);
4742                palette[i*4+3] = 255;
4743             }
4744             break;
4745          }
4746 
4747          case STBI__PNG_TYPE('t','R','N','S'): {
4748             if (first) return stbi__err("first not IHDR", "Corrupt PNG");
4749             if (z->idata) return stbi__err("tRNS after IDAT","Corrupt PNG");
4750             if (pal_img_n) {
4751                if (scan == STBI__SCAN_header) { s->img_n = 4; return 1; }
4752                if (pal_len == 0) return stbi__err("tRNS before PLTE","Corrupt PNG");
4753                if (c.length > pal_len) return stbi__err("bad tRNS len","Corrupt PNG");
4754                pal_img_n = 4;
4755                for (i=0; i < c.length; ++i)
4756                   palette[i*4+3] = stbi__get8(s);
4757             } else {
4758                if (!(s->img_n & 1)) return stbi__err("tRNS with alpha","Corrupt PNG");
4759                if (c.length != (stbi__uint32) s->img_n*2) return stbi__err("bad tRNS len","Corrupt PNG");
4760                has_trans = 1;
4761                if (z->depth == 16) {
4762                   for (k = 0; k < s->img_n; ++k) tc16[k] = (stbi__uint16)stbi__get16be(s); // copy the values as-is
4763                } else {
4764                   for (k = 0; k < s->img_n; ++k) tc[k] = (stbi_uc)(stbi__get16be(s) & 255) * stbi__depth_scale_table[z->depth]; // non 8-bit images will be larger
4765                }
4766             }
4767             break;
4768          }
4769 
4770          case STBI__PNG_TYPE('I','D','A','T'): {
4771             if (first) return stbi__err("first not IHDR", "Corrupt PNG");
4772             if (pal_img_n && !pal_len) return stbi__err("no PLTE","Corrupt PNG");
4773             if (scan == STBI__SCAN_header) { s->img_n = pal_img_n; return 1; }
4774             if ((int)(ioff + c.length) < (int)ioff) return 0;
4775             if (ioff + c.length > idata_limit) {
4776                stbi__uint32 idata_limit_old = idata_limit;
4777                stbi_uc *p;
4778                if (idata_limit == 0) idata_limit = c.length > 4096 ? c.length : 4096;
4779                while (ioff + c.length > idata_limit)
4780                   idata_limit *= 2;
4781                STBI_NOTUSED(idata_limit_old);
4782                p = (stbi_uc *) STBI_REALLOC_SIZED(z->idata, idata_limit_old, idata_limit); if (p == NULL) return stbi__err("outofmem", "Out of memory");
4783                z->idata = p;
4784             }
4785             if (!stbi__getn(s, z->idata+ioff,c.length)) return stbi__err("outofdata","Corrupt PNG");
4786             ioff += c.length;
4787             break;
4788          }
4789 
4790          case STBI__PNG_TYPE('I','E','N','D'): {
4791             stbi__uint32 raw_len, bpl;
4792             if (first) return stbi__err("first not IHDR", "Corrupt PNG");
4793             if (scan != STBI__SCAN_load) return 1;
4794             if (z->idata == NULL) return stbi__err("no IDAT","Corrupt PNG");
4795             // initial guess for decoded data size to avoid unnecessary reallocs
4796             bpl = (s->img_x * z->depth + 7) / 8; // bytes per line, per component
4797             raw_len = bpl * s->img_y * s->img_n /* pixels */ + s->img_y /* filter mode per row */;
4798             z->expanded = (stbi_uc *) stbi_zlib_decode_malloc_guesssize_headerflag((char *) z->idata, ioff, raw_len, (int *) &raw_len, !is_iphone);
4799             if (z->expanded == NULL) return 0; // zlib should set error
4800             STBI_FREE(z->idata); z->idata = NULL;
4801             if ((req_comp == s->img_n+1 && req_comp != 3 && !pal_img_n) || has_trans)
4802                s->img_out_n = s->img_n+1;
4803             else
4804                s->img_out_n = s->img_n;
4805             if (!stbi__create_png_image(z, z->expanded, raw_len, s->img_out_n, z->depth, color, interlace)) return 0;
4806             if (has_trans) {
4807                if (z->depth == 16) {
4808                   if (!stbi__compute_transparency16(z, tc16, s->img_out_n)) return 0;
4809                } else {
4810                   if (!stbi__compute_transparency(z, tc, s->img_out_n)) return 0;
4811                }
4812             }
4813             if (is_iphone && stbi__de_iphone_flag && s->img_out_n > 2)
4814                stbi__de_iphone(z);
4815             if (pal_img_n) {
4816                // pal_img_n == 3 or 4
4817                s->img_n = pal_img_n; // record the actual colors we had
4818                s->img_out_n = pal_img_n;
4819                if (req_comp >= 3) s->img_out_n = req_comp;
4820                if (!stbi__expand_png_palette(z, palette, pal_len, s->img_out_n))
4821                   return 0;
4822             } else if (has_trans) {
4823                // non-paletted image with tRNS -> source image has (constant) alpha
4824                ++s->img_n;
4825             }
4826             STBI_FREE(z->expanded); z->expanded = NULL;
4827             return 1;
4828          }
4829 
4830          default:
4831             // if critical, fail
4832             if (first) return stbi__err("first not IHDR", "Corrupt PNG");
4833             if ((c.type & (1 << 29)) == 0) {
4834                #ifndef STBI_NO_FAILURE_STRINGS
4835                // not threadsafe
4836                static char invalid_chunk[] = "XXXX PNG chunk not known";
4837                invalid_chunk[0] = STBI__BYTECAST(c.type >> 24);
4838                invalid_chunk[1] = STBI__BYTECAST(c.type >> 16);
4839                invalid_chunk[2] = STBI__BYTECAST(c.type >>  8);
4840                invalid_chunk[3] = STBI__BYTECAST(c.type >>  0);
4841                #endif
4842                return stbi__err(invalid_chunk, "PNG not supported: unknown PNG chunk type");
4843             }
4844             stbi__skip(s, c.length);
4845             break;
4846       }
4847       // end of PNG chunk, read and skip CRC
4848       stbi__get32be(s);
4849    }
4850 }
4851 
stbi__do_png(stbi__png * p,int * x,int * y,int * n,int req_comp,stbi__result_info * ri)4852 static void *stbi__do_png(stbi__png *p, int *x, int *y, int *n, int req_comp, stbi__result_info *ri)
4853 {
4854    void *result=NULL;
4855    if (req_comp < 0 || req_comp > 4) return stbi__errpuc("bad req_comp", "Internal error");
4856    if (stbi__parse_png_file(p, STBI__SCAN_load, req_comp)) {
4857       if (p->depth < 8)
4858          ri->bits_per_channel = 8;
4859       else
4860          ri->bits_per_channel = p->depth;
4861       result = p->out;
4862       p->out = NULL;
4863       if (req_comp && req_comp != p->s->img_out_n) {
4864          if (ri->bits_per_channel == 8)
4865             result = stbi__convert_format((unsigned char *) result, p->s->img_out_n, req_comp, p->s->img_x, p->s->img_y);
4866          else
4867             result = stbi__convert_format16((stbi__uint16 *) result, p->s->img_out_n, req_comp, p->s->img_x, p->s->img_y);
4868          p->s->img_out_n = req_comp;
4869          if (result == NULL) return result;
4870       }
4871       *x = p->s->img_x;
4872       *y = p->s->img_y;
4873       if (n) *n = p->s->img_n;
4874    }
4875    STBI_FREE(p->out);      p->out      = NULL;
4876    STBI_FREE(p->expanded); p->expanded = NULL;
4877    STBI_FREE(p->idata);    p->idata    = NULL;
4878 
4879    return result;
4880 }
4881 
stbi__png_load(stbi__context * s,int * x,int * y,int * comp,int req_comp,stbi__result_info * ri)4882 static void *stbi__png_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
4883 {
4884    stbi__png p;
4885    p.s = s;
4886    return stbi__do_png(&p, x,y,comp,req_comp, ri);
4887 }
4888 
stbi__png_test(stbi__context * s)4889 static int stbi__png_test(stbi__context *s)
4890 {
4891    int r;
4892    r = stbi__check_png_header(s);
4893    stbi__rewind(s);
4894    return r;
4895 }
4896 
stbi__png_info_raw(stbi__png * p,int * x,int * y,int * comp)4897 static int stbi__png_info_raw(stbi__png *p, int *x, int *y, int *comp)
4898 {
4899    if (!stbi__parse_png_file(p, STBI__SCAN_header, 0)) {
4900       stbi__rewind( p->s );
4901       return 0;
4902    }
4903    if (x) *x = p->s->img_x;
4904    if (y) *y = p->s->img_y;
4905    if (comp) *comp = p->s->img_n;
4906    return 1;
4907 }
4908 
stbi__png_info(stbi__context * s,int * x,int * y,int * comp)4909 static int stbi__png_info(stbi__context *s, int *x, int *y, int *comp)
4910 {
4911    stbi__png p;
4912    p.s = s;
4913    return stbi__png_info_raw(&p, x, y, comp);
4914 }
4915 #endif
4916 
4917 // Microsoft/Windows BMP image
4918 
4919 #ifndef STBI_NO_BMP
stbi__bmp_test_raw(stbi__context * s)4920 static int stbi__bmp_test_raw(stbi__context *s)
4921 {
4922    int r;
4923    int sz;
4924    if (stbi__get8(s) != 'B') return 0;
4925    if (stbi__get8(s) != 'M') return 0;
4926    stbi__get32le(s); // discard filesize
4927    stbi__get16le(s); // discard reserved
4928    stbi__get16le(s); // discard reserved
4929    stbi__get32le(s); // discard data offset
4930    sz = stbi__get32le(s);
4931    r = (sz == 12 || sz == 40 || sz == 56 || sz == 108 || sz == 124);
4932    return r;
4933 }
4934 
stbi__bmp_test(stbi__context * s)4935 static int stbi__bmp_test(stbi__context *s)
4936 {
4937    int r = stbi__bmp_test_raw(s);
4938    stbi__rewind(s);
4939    return r;
4940 }
4941 
4942 
4943 // returns 0..31 for the highest set bit
stbi__high_bit(unsigned int z)4944 static int stbi__high_bit(unsigned int z)
4945 {
4946    int n=0;
4947    if (z == 0) return -1;
4948    if (z >= 0x10000) n += 16, z >>= 16;
4949    if (z >= 0x00100) n +=  8, z >>=  8;
4950    if (z >= 0x00010) n +=  4, z >>=  4;
4951    if (z >= 0x00004) n +=  2, z >>=  2;
4952    if (z >= 0x00002) n +=  1, z >>=  1;
4953    return n;
4954 }
4955 
stbi__bitcount(unsigned int a)4956 static int stbi__bitcount(unsigned int a)
4957 {
4958    a = (a & 0x55555555) + ((a >>  1) & 0x55555555); // max 2
4959    a = (a & 0x33333333) + ((a >>  2) & 0x33333333); // max 4
4960    a = (a + (a >> 4)) & 0x0f0f0f0f; // max 8 per 4, now 8 bits
4961    a = (a + (a >> 8)); // max 16 per 8 bits
4962    a = (a + (a >> 16)); // max 32 per 8 bits
4963    return a & 0xff;
4964 }
4965 
stbi__shiftsigned(int v,int shift,int bits)4966 static int stbi__shiftsigned(int v, int shift, int bits)
4967 {
4968    int result;
4969    int z=0;
4970 
4971    if (shift < 0) v <<= -shift;
4972    else v >>= shift;
4973    result = v;
4974 
4975    z = bits;
4976    while (z < 8) {
4977       result += v >> z;
4978       z += bits;
4979    }
4980    return result;
4981 }
4982 
4983 typedef struct
4984 {
4985    int bpp, offset, hsz;
4986    unsigned int mr,mg,mb,ma, all_a;
4987 } stbi__bmp_data;
4988 
stbi__bmp_parse_header(stbi__context * s,stbi__bmp_data * info)4989 static void *stbi__bmp_parse_header(stbi__context *s, stbi__bmp_data *info)
4990 {
4991    int hsz;
4992    if (stbi__get8(s) != 'B' || stbi__get8(s) != 'M') return stbi__errpuc("not BMP", "Corrupt BMP");
4993    stbi__get32le(s); // discard filesize
4994    stbi__get16le(s); // discard reserved
4995    stbi__get16le(s); // discard reserved
4996    info->offset = stbi__get32le(s);
4997    info->hsz = hsz = stbi__get32le(s);
4998    info->mr = info->mg = info->mb = info->ma = 0;
4999 
5000    if (hsz != 12 && hsz != 40 && hsz != 56 && hsz != 108 && hsz != 124) return stbi__errpuc("unknown BMP", "BMP type not supported: unknown");
5001    if (hsz == 12) {
5002       s->img_x = stbi__get16le(s);
5003       s->img_y = stbi__get16le(s);
5004    } else {
5005       s->img_x = stbi__get32le(s);
5006       s->img_y = stbi__get32le(s);
5007    }
5008    if (stbi__get16le(s) != 1) return stbi__errpuc("bad BMP", "bad BMP");
5009    info->bpp = stbi__get16le(s);
5010    if (info->bpp == 1) return stbi__errpuc("monochrome", "BMP type not supported: 1-bit");
5011    if (hsz != 12) {
5012       int compress = stbi__get32le(s);
5013       if (compress == 1 || compress == 2) return stbi__errpuc("BMP RLE", "BMP type not supported: RLE");
5014       stbi__get32le(s); // discard sizeof
5015       stbi__get32le(s); // discard hres
5016       stbi__get32le(s); // discard vres
5017       stbi__get32le(s); // discard colorsused
5018       stbi__get32le(s); // discard max important
5019       if (hsz == 40 || hsz == 56) {
5020          if (hsz == 56) {
5021             stbi__get32le(s);
5022             stbi__get32le(s);
5023             stbi__get32le(s);
5024             stbi__get32le(s);
5025          }
5026          if (info->bpp == 16 || info->bpp == 32) {
5027             if (compress == 0) {
5028                if (info->bpp == 32) {
5029                   info->mr = 0xffu << 16;
5030                   info->mg = 0xffu <<  8;
5031                   info->mb = 0xffu <<  0;
5032                   info->ma = 0xffu << 24;
5033                   info->all_a = 0; // if all_a is 0 at end, then we loaded alpha channel but it was all 0
5034                } else {
5035                   info->mr = 31u << 10;
5036                   info->mg = 31u <<  5;
5037                   info->mb = 31u <<  0;
5038                }
5039             } else if (compress == 3) {
5040                info->mr = stbi__get32le(s);
5041                info->mg = stbi__get32le(s);
5042                info->mb = stbi__get32le(s);
5043                // not documented, but generated by photoshop and handled by mspaint
5044                if (info->mr == info->mg && info->mg == info->mb) {
5045                   // ?!?!?
5046                   return stbi__errpuc("bad BMP", "bad BMP");
5047                }
5048             } else
5049                return stbi__errpuc("bad BMP", "bad BMP");
5050          }
5051       } else {
5052          int i;
5053          if (hsz != 108 && hsz != 124)
5054             return stbi__errpuc("bad BMP", "bad BMP");
5055          info->mr = stbi__get32le(s);
5056          info->mg = stbi__get32le(s);
5057          info->mb = stbi__get32le(s);
5058          info->ma = stbi__get32le(s);
5059          stbi__get32le(s); // discard color space
5060          for (i=0; i < 12; ++i)
5061             stbi__get32le(s); // discard color space parameters
5062          if (hsz == 124) {
5063             stbi__get32le(s); // discard rendering intent
5064             stbi__get32le(s); // discard offset of profile data
5065             stbi__get32le(s); // discard size of profile data
5066             stbi__get32le(s); // discard reserved
5067          }
5068       }
5069    }
5070    return (void *) 1;
5071 }
5072 
5073 
stbi__bmp_load(stbi__context * s,int * x,int * y,int * comp,int req_comp,stbi__result_info * ri)5074 static void *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
5075 {
5076    stbi_uc *out;
5077    unsigned int mr=0,mg=0,mb=0,ma=0, all_a;
5078    stbi_uc pal[256][4];
5079    int psize=0,i,j,width;
5080    int flip_vertically, pad, target;
5081    stbi__bmp_data info;
5082    STBI_NOTUSED(ri);
5083 
5084    info.all_a = 255;
5085    if (stbi__bmp_parse_header(s, &info) == NULL)
5086       return NULL; // error code already set
5087 
5088    flip_vertically = ((int) s->img_y) > 0;
5089    s->img_y = abs((int) s->img_y);
5090 
5091    mr = info.mr;
5092    mg = info.mg;
5093    mb = info.mb;
5094    ma = info.ma;
5095    all_a = info.all_a;
5096 
5097    if (info.hsz == 12) {
5098       if (info.bpp < 24)
5099          psize = (info.offset - 14 - 24) / 3;
5100    } else {
5101       if (info.bpp < 16)
5102          psize = (info.offset - 14 - info.hsz) >> 2;
5103    }
5104 
5105    s->img_n = ma ? 4 : 3;
5106    if (req_comp && req_comp >= 3) // we can directly decode 3 or 4
5107       target = req_comp;
5108    else
5109       target = s->img_n; // if they want monochrome, we'll post-convert
5110 
5111    // sanity-check size
5112    if (!stbi__mad3sizes_valid(target, s->img_x, s->img_y, 0))
5113       return stbi__errpuc("too large", "Corrupt BMP");
5114 
5115    out = (stbi_uc *) stbi__malloc_mad3(target, s->img_x, s->img_y, 0);
5116    if (!out) return stbi__errpuc("outofmem", "Out of memory");
5117    if (info.bpp < 16) {
5118       int z=0;
5119       if (psize == 0 || psize > 256) { STBI_FREE(out); return stbi__errpuc("invalid", "Corrupt BMP"); }
5120       for (i=0; i < psize; ++i) {
5121          pal[i][2] = stbi__get8(s);
5122          pal[i][1] = stbi__get8(s);
5123          pal[i][0] = stbi__get8(s);
5124          if (info.hsz != 12) stbi__get8(s);
5125          pal[i][3] = 255;
5126       }
5127       stbi__skip(s, info.offset - 14 - info.hsz - psize * (info.hsz == 12 ? 3 : 4));
5128       if (info.bpp == 4) width = (s->img_x + 1) >> 1;
5129       else if (info.bpp == 8) width = s->img_x;
5130       else { STBI_FREE(out); return stbi__errpuc("bad bpp", "Corrupt BMP"); }
5131       pad = (-width)&3;
5132       for (j=0; j < (int) s->img_y; ++j) {
5133          for (i=0; i < (int) s->img_x; i += 2) {
5134             int v=stbi__get8(s),v2=0;
5135             if (info.bpp == 4) {
5136                v2 = v & 15;
5137                v >>= 4;
5138             }
5139             out[z++] = pal[v][0];
5140             out[z++] = pal[v][1];
5141             out[z++] = pal[v][2];
5142             if (target == 4) out[z++] = 255;
5143             if (i+1 == (int) s->img_x) break;
5144             v = (info.bpp == 8) ? stbi__get8(s) : v2;
5145             out[z++] = pal[v][0];
5146             out[z++] = pal[v][1];
5147             out[z++] = pal[v][2];
5148             if (target == 4) out[z++] = 255;
5149          }
5150          stbi__skip(s, pad);
5151       }
5152    } else {
5153       int rshift=0,gshift=0,bshift=0,ashift=0,rcount=0,gcount=0,bcount=0,acount=0;
5154       int z = 0;
5155       int easy=0;
5156       stbi__skip(s, info.offset - 14 - info.hsz);
5157       if (info.bpp == 24) width = 3 * s->img_x;
5158       else if (info.bpp == 16) width = 2*s->img_x;
5159       else /* bpp = 32 and pad = 0 */ width=0;
5160       pad = (-width) & 3;
5161       if (info.bpp == 24) {
5162          easy = 1;
5163       } else if (info.bpp == 32) {
5164          if (mb == 0xff && mg == 0xff00 && mr == 0x00ff0000 && ma == 0xff000000)
5165             easy = 2;
5166       }
5167       if (!easy) {
5168          if (!mr || !mg || !mb) { STBI_FREE(out); return stbi__errpuc("bad masks", "Corrupt BMP"); }
5169          // right shift amt to put high bit in position #7
5170          rshift = stbi__high_bit(mr)-7; rcount = stbi__bitcount(mr);
5171          gshift = stbi__high_bit(mg)-7; gcount = stbi__bitcount(mg);
5172          bshift = stbi__high_bit(mb)-7; bcount = stbi__bitcount(mb);
5173          ashift = stbi__high_bit(ma)-7; acount = stbi__bitcount(ma);
5174       }
5175       for (j=0; j < (int) s->img_y; ++j) {
5176          if (easy) {
5177             for (i=0; i < (int) s->img_x; ++i) {
5178                unsigned char a;
5179                out[z+2] = stbi__get8(s);
5180                out[z+1] = stbi__get8(s);
5181                out[z+0] = stbi__get8(s);
5182                z += 3;
5183                a = (easy == 2 ? stbi__get8(s) : 255);
5184                all_a |= a;
5185                if (target == 4) out[z++] = a;
5186             }
5187          } else {
5188             int bpp = info.bpp;
5189             for (i=0; i < (int) s->img_x; ++i) {
5190                stbi__uint32 v = (bpp == 16 ? (stbi__uint32) stbi__get16le(s) : stbi__get32le(s));
5191                int a;
5192                out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mr, rshift, rcount));
5193                out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mg, gshift, gcount));
5194                out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mb, bshift, bcount));
5195                a = (ma ? stbi__shiftsigned(v & ma, ashift, acount) : 255);
5196                all_a |= a;
5197                if (target == 4) out[z++] = STBI__BYTECAST(a);
5198             }
5199          }
5200          stbi__skip(s, pad);
5201       }
5202    }
5203 
5204    // if alpha channel is all 0s, replace with all 255s
5205    if (target == 4 && all_a == 0)
5206       for (i=4*s->img_x*s->img_y-1; i >= 0; i -= 4)
5207          out[i] = 255;
5208 
5209    if (flip_vertically) {
5210       stbi_uc t;
5211       for (j=0; j < (int) s->img_y>>1; ++j) {
5212          stbi_uc *p1 = out +      j     *s->img_x*target;
5213          stbi_uc *p2 = out + (s->img_y-1-j)*s->img_x*target;
5214          for (i=0; i < (int) s->img_x*target; ++i) {
5215             t = p1[i], p1[i] = p2[i], p2[i] = t;
5216          }
5217       }
5218    }
5219 
5220    if (req_comp && req_comp != target) {
5221       out = stbi__convert_format(out, target, req_comp, s->img_x, s->img_y);
5222       if (out == NULL) return out; // stbi__convert_format frees input on failure
5223    }
5224 
5225    *x = s->img_x;
5226    *y = s->img_y;
5227    if (comp) *comp = s->img_n;
5228    return out;
5229 }
5230 #endif
5231 
5232 // Targa Truevision - TGA
5233 // by Jonathan Dummer
5234 #ifndef STBI_NO_TGA
5235 // returns STBI_rgb or whatever, 0 on error
stbi__tga_get_comp(int bits_per_pixel,int is_grey,int * is_rgb16)5236 static int stbi__tga_get_comp(int bits_per_pixel, int is_grey, int* is_rgb16)
5237 {
5238    // only RGB or RGBA (incl. 16bit) or grey allowed
5239    if(is_rgb16) *is_rgb16 = 0;
5240    switch(bits_per_pixel) {
5241       case 8:  return STBI_grey;
5242       case 16: if(is_grey) return STBI_grey_alpha;
5243             // else: fall-through
5244       case 15: if(is_rgb16) *is_rgb16 = 1;
5245             return STBI_rgb;
5246       case 24: // fall-through
5247       case 32: return bits_per_pixel/8;
5248       default: return 0;
5249    }
5250 }
5251 
stbi__tga_info(stbi__context * s,int * x,int * y,int * comp)5252 static int stbi__tga_info(stbi__context *s, int *x, int *y, int *comp)
5253 {
5254     int tga_w, tga_h, tga_comp, tga_image_type, tga_bits_per_pixel, tga_colormap_bpp;
5255     int sz, tga_colormap_type;
5256     stbi__get8(s);                   // discard Offset
5257     tga_colormap_type = stbi__get8(s); // colormap type
5258     if( tga_colormap_type > 1 ) {
5259         stbi__rewind(s);
5260         return 0;      // only RGB or indexed allowed
5261     }
5262     tga_image_type = stbi__get8(s); // image type
5263     if ( tga_colormap_type == 1 ) { // colormapped (paletted) image
5264         if (tga_image_type != 1 && tga_image_type != 9) {
5265             stbi__rewind(s);
5266             return 0;
5267         }
5268         stbi__skip(s,4);       // skip index of first colormap entry and number of entries
5269         sz = stbi__get8(s);    //   check bits per palette color entry
5270         if ( (sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32) ) {
5271             stbi__rewind(s);
5272             return 0;
5273         }
5274         stbi__skip(s,4);       // skip image x and y origin
5275         tga_colormap_bpp = sz;
5276     } else { // "normal" image w/o colormap - only RGB or grey allowed, +/- RLE
5277         if ( (tga_image_type != 2) && (tga_image_type != 3) && (tga_image_type != 10) && (tga_image_type != 11) ) {
5278             stbi__rewind(s);
5279             return 0; // only RGB or grey allowed, +/- RLE
5280         }
5281         stbi__skip(s,9); // skip colormap specification and image x/y origin
5282         tga_colormap_bpp = 0;
5283     }
5284     tga_w = stbi__get16le(s);
5285     if( tga_w < 1 ) {
5286         stbi__rewind(s);
5287         return 0;   // test width
5288     }
5289     tga_h = stbi__get16le(s);
5290     if( tga_h < 1 ) {
5291         stbi__rewind(s);
5292         return 0;   // test height
5293     }
5294     tga_bits_per_pixel = stbi__get8(s); // bits per pixel
5295     stbi__get8(s); // ignore alpha bits
5296     if (tga_colormap_bpp != 0) {
5297         if((tga_bits_per_pixel != 8) && (tga_bits_per_pixel != 16)) {
5298             // when using a colormap, tga_bits_per_pixel is the size of the indexes
5299             // I don't think anything but 8 or 16bit indexes makes sense
5300             stbi__rewind(s);
5301             return 0;
5302         }
5303         tga_comp = stbi__tga_get_comp(tga_colormap_bpp, 0, NULL);
5304     } else {
5305         tga_comp = stbi__tga_get_comp(tga_bits_per_pixel, (tga_image_type == 3) || (tga_image_type == 11), NULL);
5306     }
5307     if(!tga_comp) {
5308       stbi__rewind(s);
5309       return 0;
5310     }
5311     if (x) *x = tga_w;
5312     if (y) *y = tga_h;
5313     if (comp) *comp = tga_comp;
5314     return 1;                   // seems to have passed everything
5315 }
5316 
stbi__tga_test(stbi__context * s)5317 static int stbi__tga_test(stbi__context *s)
5318 {
5319    int res = 0;
5320    int sz, tga_color_type;
5321    stbi__get8(s);      //   discard Offset
5322    tga_color_type = stbi__get8(s);   //   color type
5323    if ( tga_color_type > 1 ) goto errorEnd;   //   only RGB or indexed allowed
5324    sz = stbi__get8(s);   //   image type
5325    if ( tga_color_type == 1 ) { // colormapped (paletted) image
5326       if (sz != 1 && sz != 9) goto errorEnd; // colortype 1 demands image type 1 or 9
5327       stbi__skip(s,4);       // skip index of first colormap entry and number of entries
5328       sz = stbi__get8(s);    //   check bits per palette color entry
5329       if ( (sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32) ) goto errorEnd;
5330       stbi__skip(s,4);       // skip image x and y origin
5331    } else { // "normal" image w/o colormap
5332       if ( (sz != 2) && (sz != 3) && (sz != 10) && (sz != 11) ) goto errorEnd; // only RGB or grey allowed, +/- RLE
5333       stbi__skip(s,9); // skip colormap specification and image x/y origin
5334    }
5335    if ( stbi__get16le(s) < 1 ) goto errorEnd;      //   test width
5336    if ( stbi__get16le(s) < 1 ) goto errorEnd;      //   test height
5337    sz = stbi__get8(s);   //   bits per pixel
5338    if ( (tga_color_type == 1) && (sz != 8) && (sz != 16) ) goto errorEnd; // for colormapped images, bpp is size of an index
5339    if ( (sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32) ) goto errorEnd;
5340 
5341    res = 1; // if we got this far, everything's good and we can return 1 instead of 0
5342 
5343 errorEnd:
5344    stbi__rewind(s);
5345    return res;
5346 }
5347 
5348 // read 16bit value and convert to 24bit RGB
stbi__tga_read_rgb16(stbi__context * s,stbi_uc * out)5349 static void stbi__tga_read_rgb16(stbi__context *s, stbi_uc* out)
5350 {
5351    stbi__uint16 px = (stbi__uint16)stbi__get16le(s);
5352    stbi__uint16 fiveBitMask = 31;
5353    // we have 3 channels with 5bits each
5354    int r = (px >> 10) & fiveBitMask;
5355    int g = (px >> 5) & fiveBitMask;
5356    int b = px & fiveBitMask;
5357    // Note that this saves the data in RGB(A) order, so it doesn't need to be swapped later
5358    out[0] = (stbi_uc)((r * 255)/31);
5359    out[1] = (stbi_uc)((g * 255)/31);
5360    out[2] = (stbi_uc)((b * 255)/31);
5361 
5362    // some people claim that the most significant bit might be used for alpha
5363    // (possibly if an alpha-bit is set in the "image descriptor byte")
5364    // but that only made 16bit test images completely translucent..
5365    // so let's treat all 15 and 16bit TGAs as RGB with no alpha.
5366 }
5367 
stbi__tga_load(stbi__context * s,int * x,int * y,int * comp,int req_comp,stbi__result_info * ri)5368 static void *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
5369 {
5370    //   read in the TGA header stuff
5371    int tga_offset = stbi__get8(s);
5372    int tga_indexed = stbi__get8(s);
5373    int tga_image_type = stbi__get8(s);
5374    int tga_is_RLE = 0;
5375    int tga_palette_start = stbi__get16le(s);
5376    int tga_palette_len = stbi__get16le(s);
5377    int tga_palette_bits = stbi__get8(s);
5378    int tga_x_origin = stbi__get16le(s);
5379    int tga_y_origin = stbi__get16le(s);
5380    int tga_width = stbi__get16le(s);
5381    int tga_height = stbi__get16le(s);
5382    int tga_bits_per_pixel = stbi__get8(s);
5383    int tga_comp, tga_rgb16=0;
5384    int tga_inverted = stbi__get8(s);
5385    // int tga_alpha_bits = tga_inverted & 15; // the 4 lowest bits - unused (useless?)
5386    //   image data
5387    unsigned char *tga_data;
5388    unsigned char *tga_palette = NULL;
5389    int i, j;
5390    unsigned char raw_data[4] = {0};
5391    int RLE_count = 0;
5392    int RLE_repeating = 0;
5393    int read_next_pixel = 1;
5394    STBI_NOTUSED(ri);
5395 
5396    //   do a tiny bit of precessing
5397    if ( tga_image_type >= 8 )
5398    {
5399       tga_image_type -= 8;
5400       tga_is_RLE = 1;
5401    }
5402    tga_inverted = 1 - ((tga_inverted >> 5) & 1);
5403 
5404    //   If I'm paletted, then I'll use the number of bits from the palette
5405    if ( tga_indexed ) tga_comp = stbi__tga_get_comp(tga_palette_bits, 0, &tga_rgb16);
5406    else tga_comp = stbi__tga_get_comp(tga_bits_per_pixel, (tga_image_type == 3), &tga_rgb16);
5407 
5408    if(!tga_comp) // shouldn't really happen, stbi__tga_test() should have ensured basic consistency
5409       return stbi__errpuc("bad format", "Can't find out TGA pixelformat");
5410 
5411    //   tga info
5412    *x = tga_width;
5413    *y = tga_height;
5414    if (comp) *comp = tga_comp;
5415 
5416    if (!stbi__mad3sizes_valid(tga_width, tga_height, tga_comp, 0))
5417       return stbi__errpuc("too large", "Corrupt TGA");
5418 
5419    tga_data = (unsigned char*)stbi__malloc_mad3(tga_width, tga_height, tga_comp, 0);
5420    if (!tga_data) return stbi__errpuc("outofmem", "Out of memory");
5421 
5422    // skip to the data's starting position (offset usually = 0)
5423    stbi__skip(s, tga_offset );
5424 
5425    if ( !tga_indexed && !tga_is_RLE && !tga_rgb16 ) {
5426       for (i=0; i < tga_height; ++i) {
5427          int row = tga_inverted ? tga_height -i - 1 : i;
5428          stbi_uc *tga_row = tga_data + row*tga_width*tga_comp;
5429          stbi__getn(s, tga_row, tga_width * tga_comp);
5430       }
5431    } else  {
5432       //   do I need to load a palette?
5433       if ( tga_indexed)
5434       {
5435          //   any data to skip? (offset usually = 0)
5436          stbi__skip(s, tga_palette_start );
5437          //   load the palette
5438          tga_palette = (unsigned char*)stbi__malloc_mad2(tga_palette_len, tga_comp, 0);
5439          if (!tga_palette) {
5440             STBI_FREE(tga_data);
5441             return stbi__errpuc("outofmem", "Out of memory");
5442          }
5443          if (tga_rgb16) {
5444             stbi_uc *pal_entry = tga_palette;
5445             STBI_ASSERT(tga_comp == STBI_rgb);
5446             for (i=0; i < tga_palette_len; ++i) {
5447                stbi__tga_read_rgb16(s, pal_entry);
5448                pal_entry += tga_comp;
5449             }
5450          } else if (!stbi__getn(s, tga_palette, tga_palette_len * tga_comp)) {
5451                STBI_FREE(tga_data);
5452                STBI_FREE(tga_palette);
5453                return stbi__errpuc("bad palette", "Corrupt TGA");
5454          }
5455       }
5456       //   load the data
5457       for (i=0; i < tga_width * tga_height; ++i)
5458       {
5459          //   if I'm in RLE mode, do I need to get a RLE stbi__pngchunk?
5460          if ( tga_is_RLE )
5461          {
5462             if ( RLE_count == 0 )
5463             {
5464                //   yep, get the next byte as a RLE command
5465                int RLE_cmd = stbi__get8(s);
5466                RLE_count = 1 + (RLE_cmd & 127);
5467                RLE_repeating = RLE_cmd >> 7;
5468                read_next_pixel = 1;
5469             } else if ( !RLE_repeating )
5470             {
5471                read_next_pixel = 1;
5472             }
5473          } else
5474          {
5475             read_next_pixel = 1;
5476          }
5477          //   OK, if I need to read a pixel, do it now
5478          if ( read_next_pixel )
5479          {
5480             //   load however much data we did have
5481             if ( tga_indexed )
5482             {
5483                // read in index, then perform the lookup
5484                int pal_idx = (tga_bits_per_pixel == 8) ? stbi__get8(s) : stbi__get16le(s);
5485                if ( pal_idx >= tga_palette_len ) {
5486                   // invalid index
5487                   pal_idx = 0;
5488                }
5489                pal_idx *= tga_comp;
5490                for (j = 0; j < tga_comp; ++j) {
5491                   raw_data[j] = tga_palette[pal_idx+j];
5492                }
5493             } else if(tga_rgb16) {
5494                STBI_ASSERT(tga_comp == STBI_rgb);
5495                stbi__tga_read_rgb16(s, raw_data);
5496             } else {
5497                //   read in the data raw
5498                for (j = 0; j < tga_comp; ++j) {
5499                   raw_data[j] = stbi__get8(s);
5500                }
5501             }
5502             //   clear the reading flag for the next pixel
5503             read_next_pixel = 0;
5504          } // end of reading a pixel
5505 
5506          // copy data
5507          for (j = 0; j < tga_comp; ++j)
5508            tga_data[i*tga_comp+j] = raw_data[j];
5509 
5510          //   in case we're in RLE mode, keep counting down
5511          --RLE_count;
5512       }
5513       //   do I need to invert the image?
5514       if ( tga_inverted )
5515       {
5516          for (j = 0; j*2 < tga_height; ++j)
5517          {
5518             int index1 = j * tga_width * tga_comp;
5519             int index2 = (tga_height - 1 - j) * tga_width * tga_comp;
5520             for (i = tga_width * tga_comp; i > 0; --i)
5521             {
5522                unsigned char temp = tga_data[index1];
5523                tga_data[index1] = tga_data[index2];
5524                tga_data[index2] = temp;
5525                ++index1;
5526                ++index2;
5527             }
5528          }
5529       }
5530       //   clear my palette, if I had one
5531       if ( tga_palette != NULL )
5532       {
5533          STBI_FREE( tga_palette );
5534       }
5535    }
5536 
5537    // swap RGB - if the source data was RGB16, it already is in the right order
5538    if (tga_comp >= 3 && !tga_rgb16)
5539    {
5540       unsigned char* tga_pixel = tga_data;
5541       for (i=0; i < tga_width * tga_height; ++i)
5542       {
5543          unsigned char temp = tga_pixel[0];
5544          tga_pixel[0] = tga_pixel[2];
5545          tga_pixel[2] = temp;
5546          tga_pixel += tga_comp;
5547       }
5548    }
5549 
5550    // convert to target component count
5551    if (req_comp && req_comp != tga_comp)
5552       tga_data = stbi__convert_format(tga_data, tga_comp, req_comp, tga_width, tga_height);
5553 
5554    //   the things I do to get rid of an error message, and yet keep
5555    //   Microsoft's C compilers happy... [8^(
5556    tga_palette_start = tga_palette_len = tga_palette_bits =
5557          tga_x_origin = tga_y_origin = 0;
5558    //   OK, done
5559    return tga_data;
5560 }
5561 #endif
5562 
5563 // *************************************************************************************************
5564 // Photoshop PSD loader -- PD by Thatcher Ulrich, integration by Nicolas Schulz, tweaked by STB
5565 
5566 #ifndef STBI_NO_PSD
stbi__psd_test(stbi__context * s)5567 static int stbi__psd_test(stbi__context *s)
5568 {
5569    int r = (stbi__get32be(s) == 0x38425053);
5570    stbi__rewind(s);
5571    return r;
5572 }
5573 
stbi__psd_decode_rle(stbi__context * s,stbi_uc * p,int pixelCount)5574 static int stbi__psd_decode_rle(stbi__context *s, stbi_uc *p, int pixelCount)
5575 {
5576    int count, nleft, len;
5577 
5578    count = 0;
5579    while ((nleft = pixelCount - count) > 0) {
5580       len = stbi__get8(s);
5581       if (len == 128) {
5582          // No-op.
5583       } else if (len < 128) {
5584          // Copy next len+1 bytes literally.
5585          len++;
5586          if (len > nleft) return 0; // corrupt data
5587          count += len;
5588          while (len) {
5589             *p = stbi__get8(s);
5590             p += 4;
5591             len--;
5592          }
5593       } else if (len > 128) {
5594          stbi_uc   val;
5595          // Next -len+1 bytes in the dest are replicated from next source byte.
5596          // (Interpret len as a negative 8-bit int.)
5597          len = 257 - len;
5598          if (len > nleft) return 0; // corrupt data
5599          val = stbi__get8(s);
5600          count += len;
5601          while (len) {
5602             *p = val;
5603             p += 4;
5604             len--;
5605          }
5606       }
5607    }
5608 
5609    return 1;
5610 }
5611 
stbi__psd_load(stbi__context * s,int * x,int * y,int * comp,int req_comp,stbi__result_info * ri,int bpc)5612 static void *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc)
5613 {
5614    int pixelCount;
5615    int channelCount, compression;
5616    int channel, i;
5617    int bitdepth;
5618    int w,h;
5619    stbi_uc *out;
5620    STBI_NOTUSED(ri);
5621 
5622    // Check identifier
5623    if (stbi__get32be(s) != 0x38425053)   // "8BPS"
5624       return stbi__errpuc("not PSD", "Corrupt PSD image");
5625 
5626    // Check file type version.
5627    if (stbi__get16be(s) != 1)
5628       return stbi__errpuc("wrong version", "Unsupported version of PSD image");
5629 
5630    // Skip 6 reserved bytes.
5631    stbi__skip(s, 6 );
5632 
5633    // Read the number of channels (R, G, B, A, etc).
5634    channelCount = stbi__get16be(s);
5635    if (channelCount < 0 || channelCount > 16)
5636       return stbi__errpuc("wrong channel count", "Unsupported number of channels in PSD image");
5637 
5638    // Read the rows and columns of the image.
5639    h = stbi__get32be(s);
5640    w = stbi__get32be(s);
5641 
5642    // Make sure the depth is 8 bits.
5643    bitdepth = stbi__get16be(s);
5644    if (bitdepth != 8 && bitdepth != 16)
5645       return stbi__errpuc("unsupported bit depth", "PSD bit depth is not 8 or 16 bit");
5646 
5647    // Make sure the color mode is RGB.
5648    // Valid options are:
5649    //   0: Bitmap
5650    //   1: Grayscale
5651    //   2: Indexed color
5652    //   3: RGB color
5653    //   4: CMYK color
5654    //   7: Multichannel
5655    //   8: Duotone
5656    //   9: Lab color
5657    if (stbi__get16be(s) != 3)
5658       return stbi__errpuc("wrong color format", "PSD is not in RGB color format");
5659 
5660    // Skip the Mode Data.  (It's the palette for indexed color; other info for other modes.)
5661    stbi__skip(s,stbi__get32be(s) );
5662 
5663    // Skip the image resources.  (resolution, pen tool paths, etc)
5664    stbi__skip(s, stbi__get32be(s) );
5665 
5666    // Skip the reserved data.
5667    stbi__skip(s, stbi__get32be(s) );
5668 
5669    // Find out if the data is compressed.
5670    // Known values:
5671    //   0: no compression
5672    //   1: RLE compressed
5673    compression = stbi__get16be(s);
5674    if (compression > 1)
5675       return stbi__errpuc("bad compression", "PSD has an unknown compression format");
5676 
5677    // Check size
5678    if (!stbi__mad3sizes_valid(4, w, h, 0))
5679       return stbi__errpuc("too large", "Corrupt PSD");
5680 
5681    // Create the destination image.
5682 
5683    if (!compression && bitdepth == 16 && bpc == 16) {
5684       out = (stbi_uc *) stbi__malloc_mad3(8, w, h, 0);
5685       ri->bits_per_channel = 16;
5686    } else
5687       out = (stbi_uc *) stbi__malloc(4 * w*h);
5688 
5689    if (!out) return stbi__errpuc("outofmem", "Out of memory");
5690    pixelCount = w*h;
5691 
5692    // Initialize the data to zero.
5693    //memset( out, 0, pixelCount * 4 );
5694 
5695    // Finally, the image data.
5696    if (compression) {
5697       // RLE as used by .PSD and .TIFF
5698       // Loop until you get the number of unpacked bytes you are expecting:
5699       //     Read the next source byte into n.
5700       //     If n is between 0 and 127 inclusive, copy the next n+1 bytes literally.
5701       //     Else if n is between -127 and -1 inclusive, copy the next byte -n+1 times.
5702       //     Else if n is 128, noop.
5703       // Endloop
5704 
5705       // The RLE-compressed data is preceeded by a 2-byte data count for each row in the data,
5706       // which we're going to just skip.
5707       stbi__skip(s, h * channelCount * 2 );
5708 
5709       // Read the RLE data by channel.
5710       for (channel = 0; channel < 4; channel++) {
5711          stbi_uc *p;
5712 
5713          p = out+channel;
5714          if (channel >= channelCount) {
5715             // Fill this channel with default data.
5716             for (i = 0; i < pixelCount; i++, p += 4)
5717                *p = (channel == 3 ? 255 : 0);
5718          } else {
5719             // Read the RLE data.
5720             if (!stbi__psd_decode_rle(s, p, pixelCount)) {
5721                STBI_FREE(out);
5722                return stbi__errpuc("corrupt", "bad RLE data");
5723             }
5724          }
5725       }
5726 
5727    } else {
5728       // We're at the raw image data.  It's each channel in order (Red, Green, Blue, Alpha, ...)
5729       // where each channel consists of an 8-bit (or 16-bit) value for each pixel in the image.
5730 
5731       // Read the data by channel.
5732       for (channel = 0; channel < 4; channel++) {
5733          if (channel >= channelCount) {
5734             // Fill this channel with default data.
5735             if (bitdepth == 16 && bpc == 16) {
5736                stbi__uint16 *q = ((stbi__uint16 *) out) + channel;
5737                stbi__uint16 val = channel == 3 ? 65535 : 0;
5738                for (i = 0; i < pixelCount; i++, q += 4)
5739                   *q = val;
5740             } else {
5741                stbi_uc *p = out+channel;
5742                stbi_uc val = channel == 3 ? 255 : 0;
5743                for (i = 0; i < pixelCount; i++, p += 4)
5744                   *p = val;
5745             }
5746          } else {
5747             if (ri->bits_per_channel == 16) {    // output bpc
5748                stbi__uint16 *q = ((stbi__uint16 *) out) + channel;
5749                for (i = 0; i < pixelCount; i++, q += 4)
5750                   *q = (stbi__uint16) stbi__get16be(s);
5751             } else {
5752                stbi_uc *p = out+channel;
5753                if (bitdepth == 16) {  // input bpc
5754                   for (i = 0; i < pixelCount; i++, p += 4)
5755                      *p = (stbi_uc) (stbi__get16be(s) >> 8);
5756                } else {
5757                   for (i = 0; i < pixelCount; i++, p += 4)
5758                      *p = stbi__get8(s);
5759                }
5760             }
5761          }
5762       }
5763    }
5764 
5765    // remove weird white matte from PSD
5766    if (channelCount >= 4) {
5767       if (ri->bits_per_channel == 16) {
5768          for (i=0; i < w*h; ++i) {
5769             stbi__uint16 *pixel = (stbi__uint16 *) out + 4*i;
5770             if (pixel[3] != 0 && pixel[3] != 65535) {
5771                float a = pixel[3] / 65535.0f;
5772                float ra = 1.0f / a;
5773                float inv_a = 65535.0f * (1 - ra);
5774                pixel[0] = (stbi__uint16) (pixel[0]*ra + inv_a);
5775                pixel[1] = (stbi__uint16) (pixel[1]*ra + inv_a);
5776                pixel[2] = (stbi__uint16) (pixel[2]*ra + inv_a);
5777             }
5778          }
5779       } else {
5780          for (i=0; i < w*h; ++i) {
5781             unsigned char *pixel = out + 4*i;
5782             if (pixel[3] != 0 && pixel[3] != 255) {
5783                float a = pixel[3] / 255.0f;
5784                float ra = 1.0f / a;
5785                float inv_a = 255.0f * (1 - ra);
5786                pixel[0] = (unsigned char) (pixel[0]*ra + inv_a);
5787                pixel[1] = (unsigned char) (pixel[1]*ra + inv_a);
5788                pixel[2] = (unsigned char) (pixel[2]*ra + inv_a);
5789             }
5790          }
5791       }
5792    }
5793 
5794    // convert to desired output format
5795    if (req_comp && req_comp != 4) {
5796       if (ri->bits_per_channel == 16)
5797          out = (stbi_uc *) stbi__convert_format16((stbi__uint16 *) out, 4, req_comp, w, h);
5798       else
5799          out = stbi__convert_format(out, 4, req_comp, w, h);
5800       if (out == NULL) return out; // stbi__convert_format frees input on failure
5801    }
5802 
5803    if (comp) *comp = 4;
5804    *y = h;
5805    *x = w;
5806 
5807    return out;
5808 }
5809 #endif
5810 
5811 // *************************************************************************************************
5812 // Softimage PIC loader
5813 // by Tom Seddon
5814 //
5815 // See http://softimage.wiki.softimage.com/index.php/INFO:_PIC_file_format
5816 // See http://ozviz.wasp.uwa.edu.au/~pbourke/dataformats/softimagepic/
5817 
5818 #ifndef STBI_NO_PIC
stbi__pic_is4(stbi__context * s,const char * str)5819 static int stbi__pic_is4(stbi__context *s,const char *str)
5820 {
5821    int i;
5822    for (i=0; i<4; ++i)
5823       if (stbi__get8(s) != (stbi_uc)str[i])
5824          return 0;
5825 
5826    return 1;
5827 }
5828 
stbi__pic_test_core(stbi__context * s)5829 static int stbi__pic_test_core(stbi__context *s)
5830 {
5831    int i;
5832 
5833    if (!stbi__pic_is4(s,"\x53\x80\xF6\x34"))
5834       return 0;
5835 
5836    for(i=0;i<84;++i)
5837       stbi__get8(s);
5838 
5839    if (!stbi__pic_is4(s,"PICT"))
5840       return 0;
5841 
5842    return 1;
5843 }
5844 
5845 typedef struct
5846 {
5847    stbi_uc size,type,channel;
5848 } stbi__pic_packet;
5849 
stbi__readval(stbi__context * s,int channel,stbi_uc * dest)5850 static stbi_uc *stbi__readval(stbi__context *s, int channel, stbi_uc *dest)
5851 {
5852    int mask=0x80, i;
5853 
5854    for (i=0; i<4; ++i, mask>>=1) {
5855       if (channel & mask) {
5856          if (stbi__at_eof(s)) return stbi__errpuc("bad file","PIC file too short");
5857          dest[i]=stbi__get8(s);
5858       }
5859    }
5860 
5861    return dest;
5862 }
5863 
stbi__copyval(int channel,stbi_uc * dest,const stbi_uc * src)5864 static void stbi__copyval(int channel,stbi_uc *dest,const stbi_uc *src)
5865 {
5866    int mask=0x80,i;
5867 
5868    for (i=0;i<4; ++i, mask>>=1)
5869       if (channel&mask)
5870          dest[i]=src[i];
5871 }
5872 
stbi__pic_load_core(stbi__context * s,int width,int height,int * comp,stbi_uc * result)5873 static stbi_uc *stbi__pic_load_core(stbi__context *s,int width,int height,int *comp, stbi_uc *result)
5874 {
5875    int act_comp=0,num_packets=0,y,chained;
5876    stbi__pic_packet packets[10];
5877 
5878    // this will (should...) cater for even some bizarre stuff like having data
5879     // for the same channel in multiple packets.
5880    do {
5881       stbi__pic_packet *packet;
5882 
5883       if (num_packets==sizeof(packets)/sizeof(packets[0]))
5884          return stbi__errpuc("bad format","too many packets");
5885 
5886       packet = &packets[num_packets++];
5887 
5888       chained = stbi__get8(s);
5889       packet->size    = stbi__get8(s);
5890       packet->type    = stbi__get8(s);
5891       packet->channel = stbi__get8(s);
5892 
5893       act_comp |= packet->channel;
5894 
5895       if (stbi__at_eof(s))          return stbi__errpuc("bad file","file too short (reading packets)");
5896       if (packet->size != 8)  return stbi__errpuc("bad format","packet isn't 8bpp");
5897    } while (chained);
5898 
5899    *comp = (act_comp & 0x10 ? 4 : 3); // has alpha channel?
5900 
5901    for(y=0; y<height; ++y) {
5902       int packet_idx;
5903 
5904       for(packet_idx=0; packet_idx < num_packets; ++packet_idx) {
5905          stbi__pic_packet *packet = &packets[packet_idx];
5906          stbi_uc *dest = result+y*width*4;
5907 
5908          switch (packet->type) {
5909             default:
5910                return stbi__errpuc("bad format","packet has bad compression type");
5911 
5912             case 0: {//uncompressed
5913                int x;
5914 
5915                for(x=0;x<width;++x, dest+=4)
5916                   if (!stbi__readval(s,packet->channel,dest))
5917                      return 0;
5918                break;
5919             }
5920 
5921             case 1://Pure RLE
5922                {
5923                   int left=width, i;
5924 
5925                   while (left>0) {
5926                      stbi_uc count,value[4];
5927 
5928                      count=stbi__get8(s);
5929                      if (stbi__at_eof(s))   return stbi__errpuc("bad file","file too short (pure read count)");
5930 
5931                      if (count > left)
5932                         count = (stbi_uc) left;
5933 
5934                      if (!stbi__readval(s,packet->channel,value))  return 0;
5935 
5936                      for(i=0; i<count; ++i,dest+=4)
5937                         stbi__copyval(packet->channel,dest,value);
5938                      left -= count;
5939                   }
5940                }
5941                break;
5942 
5943             case 2: {//Mixed RLE
5944                int left=width;
5945                while (left>0) {
5946                   int count = stbi__get8(s), i;
5947                   if (stbi__at_eof(s))  return stbi__errpuc("bad file","file too short (mixed read count)");
5948 
5949                   if (count >= 128) { // Repeated
5950                      stbi_uc value[4];
5951 
5952                      if (count==128)
5953                         count = stbi__get16be(s);
5954                      else
5955                         count -= 127;
5956                      if (count > left)
5957                         return stbi__errpuc("bad file","scanline overrun");
5958 
5959                      if (!stbi__readval(s,packet->channel,value))
5960                         return 0;
5961 
5962                      for(i=0;i<count;++i, dest += 4)
5963                         stbi__copyval(packet->channel,dest,value);
5964                   } else { // Raw
5965                      ++count;
5966                      if (count>left) return stbi__errpuc("bad file","scanline overrun");
5967 
5968                      for(i=0;i<count;++i, dest+=4)
5969                         if (!stbi__readval(s,packet->channel,dest))
5970                            return 0;
5971                   }
5972                   left-=count;
5973                }
5974                break;
5975             }
5976          }
5977       }
5978    }
5979 
5980    return result;
5981 }
5982 
stbi__pic_load(stbi__context * s,int * px,int * py,int * comp,int req_comp,stbi__result_info * ri)5983 static void *stbi__pic_load(stbi__context *s,int *px,int *py,int *comp,int req_comp, stbi__result_info *ri)
5984 {
5985    stbi_uc *result;
5986    int i, x,y, internal_comp;
5987    STBI_NOTUSED(ri);
5988 
5989    if (!comp) comp = &internal_comp;
5990 
5991    for (i=0; i<92; ++i)
5992       stbi__get8(s);
5993 
5994    x = stbi__get16be(s);
5995    y = stbi__get16be(s);
5996    if (stbi__at_eof(s))  return stbi__errpuc("bad file","file too short (pic header)");
5997    if (!stbi__mad3sizes_valid(x, y, 4, 0)) return stbi__errpuc("too large", "PIC image too large to decode");
5998 
5999    stbi__get32be(s); //skip `ratio'
6000    stbi__get16be(s); //skip `fields'
6001    stbi__get16be(s); //skip `pad'
6002 
6003    // intermediate buffer is RGBA
6004    result = (stbi_uc *) stbi__malloc_mad3(x, y, 4, 0);
6005    memset(result, 0xff, x*y*4);
6006 
6007    if (!stbi__pic_load_core(s,x,y,comp, result)) {
6008       STBI_FREE(result);
6009       result=0;
6010    }
6011    *px = x;
6012    *py = y;
6013    if (req_comp == 0) req_comp = *comp;
6014    result=stbi__convert_format(result,4,req_comp,x,y);
6015 
6016    return result;
6017 }
6018 
stbi__pic_test(stbi__context * s)6019 static int stbi__pic_test(stbi__context *s)
6020 {
6021    int r = stbi__pic_test_core(s);
6022    stbi__rewind(s);
6023    return r;
6024 }
6025 #endif
6026 
6027 // *************************************************************************************************
6028 // GIF loader -- public domain by Jean-Marc Lienher -- simplified/shrunk by stb
6029 
6030 #ifndef STBI_NO_GIF
6031 typedef struct
6032 {
6033    stbi__int16 prefix;
6034    stbi_uc first;
6035    stbi_uc suffix;
6036 } stbi__gif_lzw;
6037 
6038 typedef struct
6039 {
6040    int w,h;
6041    stbi_uc *out, *old_out;             // output buffer (always 4 components)
6042    int flags, bgindex, ratio, transparent, eflags, delay;
6043    stbi_uc  pal[256][4];
6044    stbi_uc lpal[256][4];
6045    stbi__gif_lzw codes[4096];
6046    stbi_uc *color_table;
6047    int parse, step;
6048    int lflags;
6049    int start_x, start_y;
6050    int max_x, max_y;
6051    int cur_x, cur_y;
6052    int line_size;
6053 } stbi__gif;
6054 
stbi__gif_test_raw(stbi__context * s)6055 static int stbi__gif_test_raw(stbi__context *s)
6056 {
6057    int sz;
6058    if (stbi__get8(s) != 'G' || stbi__get8(s) != 'I' || stbi__get8(s) != 'F' || stbi__get8(s) != '8') return 0;
6059    sz = stbi__get8(s);
6060    if (sz != '9' && sz != '7') return 0;
6061    if (stbi__get8(s) != 'a') return 0;
6062    return 1;
6063 }
6064 
stbi__gif_test(stbi__context * s)6065 static int stbi__gif_test(stbi__context *s)
6066 {
6067    int r = stbi__gif_test_raw(s);
6068    stbi__rewind(s);
6069    return r;
6070 }
6071 
stbi__gif_parse_colortable(stbi__context * s,stbi_uc pal[256][4],int num_entries,int transp)6072 static void stbi__gif_parse_colortable(stbi__context *s, stbi_uc pal[256][4], int num_entries, int transp)
6073 {
6074    int i;
6075    for (i=0; i < num_entries; ++i) {
6076       pal[i][2] = stbi__get8(s);
6077       pal[i][1] = stbi__get8(s);
6078       pal[i][0] = stbi__get8(s);
6079       pal[i][3] = transp == i ? 0 : 255;
6080    }
6081 }
6082 
stbi__gif_header(stbi__context * s,stbi__gif * g,int * comp,int is_info)6083 static int stbi__gif_header(stbi__context *s, stbi__gif *g, int *comp, int is_info)
6084 {
6085    stbi_uc version;
6086    if (stbi__get8(s) != 'G' || stbi__get8(s) != 'I' || stbi__get8(s) != 'F' || stbi__get8(s) != '8')
6087       return stbi__err("not GIF", "Corrupt GIF");
6088 
6089    version = stbi__get8(s);
6090    if (version != '7' && version != '9')    return stbi__err("not GIF", "Corrupt GIF");
6091    if (stbi__get8(s) != 'a')                return stbi__err("not GIF", "Corrupt GIF");
6092 
6093    stbi__g_failure_reason = "";
6094    g->w = stbi__get16le(s);
6095    g->h = stbi__get16le(s);
6096    g->flags = stbi__get8(s);
6097    g->bgindex = stbi__get8(s);
6098    g->ratio = stbi__get8(s);
6099    g->transparent = -1;
6100 
6101    if (comp != 0) *comp = 4;  // can't actually tell whether it's 3 or 4 until we parse the comments
6102 
6103    if (is_info) return 1;
6104 
6105    if (g->flags & 0x80)
6106       stbi__gif_parse_colortable(s,g->pal, 2 << (g->flags & 7), -1);
6107 
6108    return 1;
6109 }
6110 
stbi__gif_info_raw(stbi__context * s,int * x,int * y,int * comp)6111 static int stbi__gif_info_raw(stbi__context *s, int *x, int *y, int *comp)
6112 {
6113    stbi__gif* g = (stbi__gif*) stbi__malloc(sizeof(stbi__gif));
6114    if (!stbi__gif_header(s, g, comp, 1)) {
6115       STBI_FREE(g);
6116       stbi__rewind( s );
6117       return 0;
6118    }
6119    if (x) *x = g->w;
6120    if (y) *y = g->h;
6121    STBI_FREE(g);
6122    return 1;
6123 }
6124 
stbi__out_gif_code(stbi__gif * g,stbi__uint16 code)6125 static void stbi__out_gif_code(stbi__gif *g, stbi__uint16 code)
6126 {
6127    stbi_uc *p, *c;
6128 
6129    // recurse to decode the prefixes, since the linked-list is backwards,
6130    // and working backwards through an interleaved image would be nasty
6131    if (g->codes[code].prefix >= 0)
6132       stbi__out_gif_code(g, g->codes[code].prefix);
6133 
6134    if (g->cur_y >= g->max_y) return;
6135 
6136    p = &g->out[g->cur_x + g->cur_y];
6137    c = &g->color_table[g->codes[code].suffix * 4];
6138 
6139    if (c[3] >= 128) {
6140       p[0] = c[2];
6141       p[1] = c[1];
6142       p[2] = c[0];
6143       p[3] = c[3];
6144    }
6145    g->cur_x += 4;
6146 
6147    if (g->cur_x >= g->max_x) {
6148       g->cur_x = g->start_x;
6149       g->cur_y += g->step;
6150 
6151       while (g->cur_y >= g->max_y && g->parse > 0) {
6152          g->step = (1 << g->parse) * g->line_size;
6153          g->cur_y = g->start_y + (g->step >> 1);
6154          --g->parse;
6155       }
6156    }
6157 }
6158 
stbi__process_gif_raster(stbi__context * s,stbi__gif * g)6159 static stbi_uc *stbi__process_gif_raster(stbi__context *s, stbi__gif *g)
6160 {
6161    stbi_uc lzw_cs;
6162    stbi__int32 len, init_code;
6163    stbi__uint32 first;
6164    stbi__int32 codesize, codemask, avail, oldcode, bits, valid_bits, clear;
6165    stbi__gif_lzw *p;
6166 
6167    lzw_cs = stbi__get8(s);
6168    if (lzw_cs > 12) return NULL;
6169    clear = 1 << lzw_cs;
6170    first = 1;
6171    codesize = lzw_cs + 1;
6172    codemask = (1 << codesize) - 1;
6173    bits = 0;
6174    valid_bits = 0;
6175    for (init_code = 0; init_code < clear; init_code++) {
6176       g->codes[init_code].prefix = -1;
6177       g->codes[init_code].first = (stbi_uc) init_code;
6178       g->codes[init_code].suffix = (stbi_uc) init_code;
6179    }
6180 
6181    // support no starting clear code
6182    avail = clear+2;
6183    oldcode = -1;
6184 
6185    len = 0;
6186    for(;;) {
6187       if (valid_bits < codesize) {
6188          if (len == 0) {
6189             len = stbi__get8(s); // start new block
6190             if (len == 0)
6191                return g->out;
6192          }
6193          --len;
6194          bits |= (stbi__int32) stbi__get8(s) << valid_bits;
6195          valid_bits += 8;
6196       } else {
6197          stbi__int32 code = bits & codemask;
6198          bits >>= codesize;
6199          valid_bits -= codesize;
6200          // @OPTIMIZE: is there some way we can accelerate the non-clear path?
6201          if (code == clear) {  // clear code
6202             codesize = lzw_cs + 1;
6203             codemask = (1 << codesize) - 1;
6204             avail = clear + 2;
6205             oldcode = -1;
6206             first = 0;
6207          } else if (code == clear + 1) { // end of stream code
6208             stbi__skip(s, len);
6209             while ((len = stbi__get8(s)) > 0)
6210                stbi__skip(s,len);
6211             return g->out;
6212          } else if (code <= avail) {
6213             if (first) return stbi__errpuc("no clear code", "Corrupt GIF");
6214 
6215             if (oldcode >= 0) {
6216                p = &g->codes[avail++];
6217                if (avail > 4096)        return stbi__errpuc("too many codes", "Corrupt GIF");
6218                p->prefix = (stbi__int16) oldcode;
6219                p->first = g->codes[oldcode].first;
6220                p->suffix = (code == avail) ? p->first : g->codes[code].first;
6221             } else if (code == avail)
6222                return stbi__errpuc("illegal code in raster", "Corrupt GIF");
6223 
6224             stbi__out_gif_code(g, (stbi__uint16) code);
6225 
6226             if ((avail & codemask) == 0 && avail <= 0x0FFF) {
6227                codesize++;
6228                codemask = (1 << codesize) - 1;
6229             }
6230 
6231             oldcode = code;
6232          } else {
6233             return stbi__errpuc("illegal code in raster", "Corrupt GIF");
6234          }
6235       }
6236    }
6237 }
6238 
stbi__fill_gif_background(stbi__gif * g,int x0,int y0,int x1,int y1)6239 static void stbi__fill_gif_background(stbi__gif *g, int x0, int y0, int x1, int y1)
6240 {
6241    int x, y;
6242    stbi_uc *c = g->pal[g->bgindex];
6243    for (y = y0; y < y1; y += 4 * g->w) {
6244       for (x = x0; x < x1; x += 4) {
6245          stbi_uc *p  = &g->out[y + x];
6246          p[0] = c[2];
6247          p[1] = c[1];
6248          p[2] = c[0];
6249          p[3] = 0;
6250       }
6251    }
6252 }
6253 
6254 // this function is designed to support animated gifs, although stb_image doesn't support it
stbi__gif_load_next(stbi__context * s,stbi__gif * g,int * comp,int req_comp)6255 static stbi_uc *stbi__gif_load_next(stbi__context *s, stbi__gif *g, int *comp, int req_comp)
6256 {
6257    int i;
6258    stbi_uc *prev_out = 0;
6259 
6260    if (g->out == 0 && !stbi__gif_header(s, g, comp,0))
6261       return 0; // stbi__g_failure_reason set by stbi__gif_header
6262 
6263    if (!stbi__mad3sizes_valid(g->w, g->h, 4, 0))
6264       return stbi__errpuc("too large", "GIF too large");
6265 
6266    prev_out = g->out;
6267    g->out = (stbi_uc *) stbi__malloc_mad3(4, g->w, g->h, 0);
6268    if (g->out == 0) return stbi__errpuc("outofmem", "Out of memory");
6269 
6270    switch ((g->eflags & 0x1C) >> 2) {
6271       case 0: // unspecified (also always used on 1st frame)
6272          stbi__fill_gif_background(g, 0, 0, 4 * g->w, 4 * g->w * g->h);
6273          break;
6274       case 1: // do not dispose
6275          if (prev_out) memcpy(g->out, prev_out, 4 * g->w * g->h);
6276          g->old_out = prev_out;
6277          break;
6278       case 2: // dispose to background
6279          if (prev_out) memcpy(g->out, prev_out, 4 * g->w * g->h);
6280          stbi__fill_gif_background(g, g->start_x, g->start_y, g->max_x, g->max_y);
6281          break;
6282       case 3: // dispose to previous
6283          if (g->old_out) {
6284             for (i = g->start_y; i < g->max_y; i += 4 * g->w)
6285                memcpy(&g->out[i + g->start_x], &g->old_out[i + g->start_x], g->max_x - g->start_x);
6286          }
6287          break;
6288    }
6289 
6290    for (;;) {
6291       switch (stbi__get8(s)) {
6292          case 0x2C: /* Image Descriptor */
6293          {
6294             int prev_trans = -1;
6295             stbi__int32 x, y, w, h;
6296             stbi_uc *o;
6297 
6298             x = stbi__get16le(s);
6299             y = stbi__get16le(s);
6300             w = stbi__get16le(s);
6301             h = stbi__get16le(s);
6302             if (((x + w) > (g->w)) || ((y + h) > (g->h)))
6303                return stbi__errpuc("bad Image Descriptor", "Corrupt GIF");
6304 
6305             g->line_size = g->w * 4;
6306             g->start_x = x * 4;
6307             g->start_y = y * g->line_size;
6308             g->max_x   = g->start_x + w * 4;
6309             g->max_y   = g->start_y + h * g->line_size;
6310             g->cur_x   = g->start_x;
6311             g->cur_y   = g->start_y;
6312 
6313             g->lflags = stbi__get8(s);
6314 
6315             if (g->lflags & 0x40) {
6316                g->step = 8 * g->line_size; // first interlaced spacing
6317                g->parse = 3;
6318             } else {
6319                g->step = g->line_size;
6320                g->parse = 0;
6321             }
6322 
6323             if (g->lflags & 0x80) {
6324                stbi__gif_parse_colortable(s,g->lpal, 2 << (g->lflags & 7), g->eflags & 0x01 ? g->transparent : -1);
6325                g->color_table = (stbi_uc *) g->lpal;
6326             } else if (g->flags & 0x80) {
6327                if (g->transparent >= 0 && (g->eflags & 0x01)) {
6328                   prev_trans = g->pal[g->transparent][3];
6329                   g->pal[g->transparent][3] = 0;
6330                }
6331                g->color_table = (stbi_uc *) g->pal;
6332             } else
6333                return stbi__errpuc("missing color table", "Corrupt GIF");
6334 
6335             o = stbi__process_gif_raster(s, g);
6336             if (o == NULL) return NULL;
6337 
6338             if (prev_trans != -1)
6339                g->pal[g->transparent][3] = (stbi_uc) prev_trans;
6340 
6341             return o;
6342          }
6343 
6344          case 0x21: // Comment Extension.
6345          {
6346             int len;
6347             if (stbi__get8(s) == 0xF9) { // Graphic Control Extension.
6348                len = stbi__get8(s);
6349                if (len == 4) {
6350                   g->eflags = stbi__get8(s);
6351                   g->delay = stbi__get16le(s);
6352                   g->transparent = stbi__get8(s);
6353                } else {
6354                   stbi__skip(s, len);
6355                   break;
6356                }
6357             }
6358             while ((len = stbi__get8(s)) != 0)
6359                stbi__skip(s, len);
6360             break;
6361          }
6362 
6363          case 0x3B: // gif stream termination code
6364             return (stbi_uc *) s; // using '1' causes warning on some compilers
6365 
6366          default:
6367             return stbi__errpuc("unknown code", "Corrupt GIF");
6368       }
6369    }
6370 
6371    STBI_NOTUSED(req_comp);
6372 }
6373 
stbi__gif_load(stbi__context * s,int * x,int * y,int * comp,int req_comp,stbi__result_info * ri)6374 static void *stbi__gif_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
6375 {
6376    stbi_uc *u = 0;
6377    stbi__gif* g = (stbi__gif*) stbi__malloc(sizeof(stbi__gif));
6378    memset(g, 0, sizeof(*g));
6379    STBI_NOTUSED(ri);
6380 
6381    u = stbi__gif_load_next(s, g, comp, req_comp);
6382    if (u == (stbi_uc *) s) u = 0;  // end of animated gif marker
6383    if (u) {
6384       *x = g->w;
6385       *y = g->h;
6386       if (req_comp && req_comp != 4)
6387          u = stbi__convert_format(u, 4, req_comp, g->w, g->h);
6388    }
6389    else if (g->out)
6390       STBI_FREE(g->out);
6391    STBI_FREE(g);
6392    return u;
6393 }
6394 
stbi__gif_info(stbi__context * s,int * x,int * y,int * comp)6395 static int stbi__gif_info(stbi__context *s, int *x, int *y, int *comp)
6396 {
6397    return stbi__gif_info_raw(s,x,y,comp);
6398 }
6399 #endif
6400 
6401 // *************************************************************************************************
6402 // Radiance RGBE HDR loader
6403 // originally by Nicolas Schulz
6404 #ifndef STBI_NO_HDR
stbi__hdr_test_core(stbi__context * s,const char * signature)6405 static int stbi__hdr_test_core(stbi__context *s, const char *signature)
6406 {
6407    int i;
6408    for (i=0; signature[i]; ++i)
6409       if (stbi__get8(s) != signature[i])
6410           return 0;
6411    stbi__rewind(s);
6412    return 1;
6413 }
6414 
stbi__hdr_test(stbi__context * s)6415 static int stbi__hdr_test(stbi__context* s)
6416 {
6417    int r = stbi__hdr_test_core(s, "#?RADIANCE\n");
6418    stbi__rewind(s);
6419    if(!r) {
6420        r = stbi__hdr_test_core(s, "#?RGBE\n");
6421        stbi__rewind(s);
6422    }
6423    return r;
6424 }
6425 
6426 #define STBI__HDR_BUFLEN  1024
stbi__hdr_gettoken(stbi__context * z,char * buffer)6427 static char *stbi__hdr_gettoken(stbi__context *z, char *buffer)
6428 {
6429    int len=0;
6430    char c = '\0';
6431 
6432    c = (char) stbi__get8(z);
6433 
6434    while (!stbi__at_eof(z) && c != '\n') {
6435       buffer[len++] = c;
6436       if (len == STBI__HDR_BUFLEN-1) {
6437          // flush to end of line
6438          while (!stbi__at_eof(z) && stbi__get8(z) != '\n')
6439             ;
6440          break;
6441       }
6442       c = (char) stbi__get8(z);
6443    }
6444 
6445    buffer[len] = 0;
6446    return buffer;
6447 }
6448 
stbi__hdr_convert(float * output,stbi_uc * input,int req_comp)6449 static void stbi__hdr_convert(float *output, stbi_uc *input, int req_comp)
6450 {
6451    if ( input[3] != 0 ) {
6452       float f1;
6453       // Exponent
6454       f1 = (float) ldexp(1.0f, input[3] - (int)(128 + 8));
6455       if (req_comp <= 2)
6456          output[0] = (input[0] + input[1] + input[2]) * f1 / 3;
6457       else {
6458          output[0] = input[0] * f1;
6459          output[1] = input[1] * f1;
6460          output[2] = input[2] * f1;
6461       }
6462       if (req_comp == 2) output[1] = 1;
6463       if (req_comp == 4) output[3] = 1;
6464    } else {
6465       switch (req_comp) {
6466          case 4: output[3] = 1; /* fallthrough */
6467          case 3: output[0] = output[1] = output[2] = 0;
6468                  break;
6469          case 2: output[1] = 1; /* fallthrough */
6470          case 1: output[0] = 0;
6471                  break;
6472       }
6473    }
6474 }
6475 
stbi__hdr_load(stbi__context * s,int * x,int * y,int * comp,int req_comp,stbi__result_info * ri)6476 static float *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
6477 {
6478    char buffer[STBI__HDR_BUFLEN];
6479    char *token;
6480    int valid = 0;
6481    int width, height;
6482    stbi_uc *scanline;
6483    float *hdr_data;
6484    int len;
6485    unsigned char count, value;
6486    int i, j, k, c1,c2, z;
6487    const char *headerToken;
6488    STBI_NOTUSED(ri);
6489 
6490    // Check identifier
6491    headerToken = stbi__hdr_gettoken(s,buffer);
6492    if (strcmp(headerToken, "#?RADIANCE") != 0 && strcmp(headerToken, "#?RGBE") != 0)
6493       return stbi__errpf("not HDR", "Corrupt HDR image");
6494 
6495    // Parse header
6496    for(;;) {
6497       token = stbi__hdr_gettoken(s,buffer);
6498       if (token[0] == 0) break;
6499       if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0) valid = 1;
6500    }
6501 
6502    if (!valid)    return stbi__errpf("unsupported format", "Unsupported HDR format");
6503 
6504    // Parse width and height
6505    // can't use sscanf() if we're not using stdio!
6506    token = stbi__hdr_gettoken(s,buffer);
6507    if (strncmp(token, "-Y ", 3))  return stbi__errpf("unsupported data layout", "Unsupported HDR format");
6508    token += 3;
6509    height = (int) strtol(token, &token, 10);
6510    while (*token == ' ') ++token;
6511    if (strncmp(token, "+X ", 3))  return stbi__errpf("unsupported data layout", "Unsupported HDR format");
6512    token += 3;
6513    width = (int) strtol(token, NULL, 10);
6514 
6515    *x = width;
6516    *y = height;
6517 
6518    if (comp) *comp = 3;
6519    if (req_comp == 0) req_comp = 3;
6520 
6521    if (!stbi__mad4sizes_valid(width, height, req_comp, sizeof(float), 0))
6522       return stbi__errpf("too large", "HDR image is too large");
6523 
6524    // Read data
6525    hdr_data = (float *) stbi__malloc_mad4(width, height, req_comp, sizeof(float), 0);
6526    if (!hdr_data)
6527       return stbi__errpf("outofmem", "Out of memory");
6528 
6529    // Load image data
6530    // image data is stored as some number of sca
6531    if ( width < 8 || width >= 32768) {
6532       // Read flat data
6533       for (j=0; j < height; ++j) {
6534          for (i=0; i < width; ++i) {
6535             stbi_uc rgbe[4];
6536            main_decode_loop:
6537             stbi__getn(s, rgbe, 4);
6538             stbi__hdr_convert(hdr_data + j * width * req_comp + i * req_comp, rgbe, req_comp);
6539          }
6540       }
6541    } else {
6542       // Read RLE-encoded data
6543       scanline = NULL;
6544 
6545       for (j = 0; j < height; ++j) {
6546          c1 = stbi__get8(s);
6547          c2 = stbi__get8(s);
6548          len = stbi__get8(s);
6549          if (c1 != 2 || c2 != 2 || (len & 0x80)) {
6550             // not run-length encoded, so we have to actually use THIS data as a decoded
6551             // pixel (note this can't be a valid pixel--one of RGB must be >= 128)
6552             stbi_uc rgbe[4];
6553             rgbe[0] = (stbi_uc) c1;
6554             rgbe[1] = (stbi_uc) c2;
6555             rgbe[2] = (stbi_uc) len;
6556             rgbe[3] = (stbi_uc) stbi__get8(s);
6557             stbi__hdr_convert(hdr_data, rgbe, req_comp);
6558             i = 1;
6559             j = 0;
6560             STBI_FREE(scanline);
6561             goto main_decode_loop; // yes, this makes no sense
6562          }
6563          len <<= 8;
6564          len |= stbi__get8(s);
6565          if (len != width) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("invalid decoded scanline length", "corrupt HDR"); }
6566          if (scanline == NULL) {
6567             scanline = (stbi_uc *) stbi__malloc_mad2(width, 4, 0);
6568             if (!scanline) {
6569                STBI_FREE(hdr_data);
6570                return stbi__errpf("outofmem", "Out of memory");
6571             }
6572          }
6573 
6574          for (k = 0; k < 4; ++k) {
6575             int nleft;
6576             i = 0;
6577             while ((nleft = width - i) > 0) {
6578                count = stbi__get8(s);
6579                if (count > 128) {
6580                   // Run
6581                   value = stbi__get8(s);
6582                   count -= 128;
6583                   if (count > nleft) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("corrupt", "bad RLE data in HDR"); }
6584                   for (z = 0; z < count; ++z)
6585                      scanline[i++ * 4 + k] = value;
6586                } else {
6587                   // Dump
6588                   if (count > nleft) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("corrupt", "bad RLE data in HDR"); }
6589                   for (z = 0; z < count; ++z)
6590                      scanline[i++ * 4 + k] = stbi__get8(s);
6591                }
6592             }
6593          }
6594          for (i=0; i < width; ++i)
6595             stbi__hdr_convert(hdr_data+(j*width + i)*req_comp, scanline + i*4, req_comp);
6596       }
6597       if (scanline)
6598          STBI_FREE(scanline);
6599    }
6600 
6601    return hdr_data;
6602 }
6603 
stbi__hdr_info(stbi__context * s,int * x,int * y,int * comp)6604 static int stbi__hdr_info(stbi__context *s, int *x, int *y, int *comp)
6605 {
6606    char buffer[STBI__HDR_BUFLEN];
6607    char *token;
6608    int valid = 0;
6609    int dummy;
6610 
6611    if (!x) x = &dummy;
6612    if (!y) y = &dummy;
6613    if (!comp) comp = &dummy;
6614 
6615    if (stbi__hdr_test(s) == 0) {
6616        stbi__rewind( s );
6617        return 0;
6618    }
6619 
6620    for(;;) {
6621       token = stbi__hdr_gettoken(s,buffer);
6622       if (token[0] == 0) break;
6623       if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0) valid = 1;
6624    }
6625 
6626    if (!valid) {
6627        stbi__rewind( s );
6628        return 0;
6629    }
6630    token = stbi__hdr_gettoken(s,buffer);
6631    if (strncmp(token, "-Y ", 3)) {
6632        stbi__rewind( s );
6633        return 0;
6634    }
6635    token += 3;
6636    *y = (int) strtol(token, &token, 10);
6637    while (*token == ' ') ++token;
6638    if (strncmp(token, "+X ", 3)) {
6639        stbi__rewind( s );
6640        return 0;
6641    }
6642    token += 3;
6643    *x = (int) strtol(token, NULL, 10);
6644    *comp = 3;
6645    return 1;
6646 }
6647 #endif // STBI_NO_HDR
6648 
6649 #ifndef STBI_NO_BMP
stbi__bmp_info(stbi__context * s,int * x,int * y,int * comp)6650 static int stbi__bmp_info(stbi__context *s, int *x, int *y, int *comp)
6651 {
6652    void *p;
6653    stbi__bmp_data info;
6654 
6655    info.all_a = 255;
6656    p = stbi__bmp_parse_header(s, &info);
6657    stbi__rewind( s );
6658    if (p == NULL)
6659       return 0;
6660    if (x) *x = s->img_x;
6661    if (y) *y = s->img_y;
6662    if (comp) *comp = info.ma ? 4 : 3;
6663    return 1;
6664 }
6665 #endif
6666 
6667 #ifndef STBI_NO_PSD
stbi__psd_info(stbi__context * s,int * x,int * y,int * comp)6668 static int stbi__psd_info(stbi__context *s, int *x, int *y, int *comp)
6669 {
6670    int channelCount, dummy;
6671    if (!x) x = &dummy;
6672    if (!y) y = &dummy;
6673    if (!comp) comp = &dummy;
6674    if (stbi__get32be(s) != 0x38425053) {
6675        stbi__rewind( s );
6676        return 0;
6677    }
6678    if (stbi__get16be(s) != 1) {
6679        stbi__rewind( s );
6680        return 0;
6681    }
6682    stbi__skip(s, 6);
6683    channelCount = stbi__get16be(s);
6684    if (channelCount < 0 || channelCount > 16) {
6685        stbi__rewind( s );
6686        return 0;
6687    }
6688    *y = stbi__get32be(s);
6689    *x = stbi__get32be(s);
6690    if (stbi__get16be(s) != 8) {
6691        stbi__rewind( s );
6692        return 0;
6693    }
6694    if (stbi__get16be(s) != 3) {
6695        stbi__rewind( s );
6696        return 0;
6697    }
6698    *comp = 4;
6699    return 1;
6700 }
6701 #endif
6702 
6703 #ifndef STBI_NO_PIC
stbi__pic_info(stbi__context * s,int * x,int * y,int * comp)6704 static int stbi__pic_info(stbi__context *s, int *x, int *y, int *comp)
6705 {
6706    int act_comp=0,num_packets=0,chained,dummy;
6707    stbi__pic_packet packets[10];
6708 
6709    if (!x) x = &dummy;
6710    if (!y) y = &dummy;
6711    if (!comp) comp = &dummy;
6712 
6713    if (!stbi__pic_is4(s,"\x53\x80\xF6\x34")) {
6714       stbi__rewind(s);
6715       return 0;
6716    }
6717 
6718    stbi__skip(s, 88);
6719 
6720    *x = stbi__get16be(s);
6721    *y = stbi__get16be(s);
6722    if (stbi__at_eof(s)) {
6723       stbi__rewind( s);
6724       return 0;
6725    }
6726    if ( (*x) != 0 && (1 << 28) / (*x) < (*y)) {
6727       stbi__rewind( s );
6728       return 0;
6729    }
6730 
6731    stbi__skip(s, 8);
6732 
6733    do {
6734       stbi__pic_packet *packet;
6735 
6736       if (num_packets==sizeof(packets)/sizeof(packets[0]))
6737          return 0;
6738 
6739       packet = &packets[num_packets++];
6740       chained = stbi__get8(s);
6741       packet->size    = stbi__get8(s);
6742       packet->type    = stbi__get8(s);
6743       packet->channel = stbi__get8(s);
6744       act_comp |= packet->channel;
6745 
6746       if (stbi__at_eof(s)) {
6747           stbi__rewind( s );
6748           return 0;
6749       }
6750       if (packet->size != 8) {
6751           stbi__rewind( s );
6752           return 0;
6753       }
6754    } while (chained);
6755 
6756    *comp = (act_comp & 0x10 ? 4 : 3);
6757 
6758    return 1;
6759 }
6760 #endif
6761 
6762 // *************************************************************************************************
6763 // Portable Gray Map and Portable Pixel Map loader
6764 // by Ken Miller
6765 //
6766 // PGM: http://netpbm.sourceforge.net/doc/pgm.html
6767 // PPM: http://netpbm.sourceforge.net/doc/ppm.html
6768 //
6769 // Known limitations:
6770 //    Does not support comments in the header section
6771 //    Does not support ASCII image data (formats P2 and P3)
6772 //    Does not support 16-bit-per-channel
6773 
6774 #ifndef STBI_NO_PNM
6775 
stbi__pnm_test(stbi__context * s)6776 static int      stbi__pnm_test(stbi__context *s)
6777 {
6778    char p, t;
6779    p = (char) stbi__get8(s);
6780    t = (char) stbi__get8(s);
6781    if (p != 'P' || (t != '5' && t != '6')) {
6782        stbi__rewind( s );
6783        return 0;
6784    }
6785    return 1;
6786 }
6787 
stbi__pnm_load(stbi__context * s,int * x,int * y,int * comp,int req_comp,stbi__result_info * ri)6788 static void *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
6789 {
6790    stbi_uc *out;
6791    STBI_NOTUSED(ri);
6792 
6793    if (!stbi__pnm_info(s, (int *)&s->img_x, (int *)&s->img_y, (int *)&s->img_n))
6794       return 0;
6795 
6796    *x = s->img_x;
6797    *y = s->img_y;
6798    if (comp) *comp = s->img_n;
6799 
6800    if (!stbi__mad3sizes_valid(s->img_n, s->img_x, s->img_y, 0))
6801       return stbi__errpuc("too large", "PNM too large");
6802 
6803    out = (stbi_uc *) stbi__malloc_mad3(s->img_n, s->img_x, s->img_y, 0);
6804    if (!out) return stbi__errpuc("outofmem", "Out of memory");
6805    stbi__getn(s, out, s->img_n * s->img_x * s->img_y);
6806 
6807    if (req_comp && req_comp != s->img_n) {
6808       out = stbi__convert_format(out, s->img_n, req_comp, s->img_x, s->img_y);
6809       if (out == NULL) return out; // stbi__convert_format frees input on failure
6810    }
6811    return out;
6812 }
6813 
stbi__pnm_isspace(char c)6814 static int      stbi__pnm_isspace(char c)
6815 {
6816    return c == ' ' || c == '\t' || c == '\n' || c == '\v' || c == '\f' || c == '\r';
6817 }
6818 
stbi__pnm_skip_whitespace(stbi__context * s,char * c)6819 static void     stbi__pnm_skip_whitespace(stbi__context *s, char *c)
6820 {
6821    for (;;) {
6822       while (!stbi__at_eof(s) && stbi__pnm_isspace(*c))
6823          *c = (char) stbi__get8(s);
6824 
6825       if (stbi__at_eof(s) || *c != '#')
6826          break;
6827 
6828       while (!stbi__at_eof(s) && *c != '\n' && *c != '\r' )
6829          *c = (char) stbi__get8(s);
6830    }
6831 }
6832 
stbi__pnm_isdigit(char c)6833 static int      stbi__pnm_isdigit(char c)
6834 {
6835    return c >= '0' && c <= '9';
6836 }
6837 
stbi__pnm_getinteger(stbi__context * s,char * c)6838 static int      stbi__pnm_getinteger(stbi__context *s, char *c)
6839 {
6840    int value = 0;
6841 
6842    while (!stbi__at_eof(s) && stbi__pnm_isdigit(*c)) {
6843       value = value*10 + (*c - '0');
6844       *c = (char) stbi__get8(s);
6845    }
6846 
6847    return value;
6848 }
6849 
stbi__pnm_info(stbi__context * s,int * x,int * y,int * comp)6850 static int      stbi__pnm_info(stbi__context *s, int *x, int *y, int *comp)
6851 {
6852    int maxv, dummy;
6853    char c, p, t;
6854 
6855    if (!x) x = &dummy;
6856    if (!y) y = &dummy;
6857    if (!comp) comp = &dummy;
6858 
6859    stbi__rewind(s);
6860 
6861    // Get identifier
6862    p = (char) stbi__get8(s);
6863    t = (char) stbi__get8(s);
6864    if (p != 'P' || (t != '5' && t != '6')) {
6865        stbi__rewind(s);
6866        return 0;
6867    }
6868 
6869    *comp = (t == '6') ? 3 : 1;  // '5' is 1-component .pgm; '6' is 3-component .ppm
6870 
6871    c = (char) stbi__get8(s);
6872    stbi__pnm_skip_whitespace(s, &c);
6873 
6874    *x = stbi__pnm_getinteger(s, &c); // read width
6875    stbi__pnm_skip_whitespace(s, &c);
6876 
6877    *y = stbi__pnm_getinteger(s, &c); // read height
6878    stbi__pnm_skip_whitespace(s, &c);
6879 
6880    maxv = stbi__pnm_getinteger(s, &c);  // read max value
6881 
6882    if (maxv > 255)
6883       return stbi__err("max value > 255", "PPM image not 8-bit");
6884    else
6885       return 1;
6886 }
6887 #endif
6888 
stbi__info_main(stbi__context * s,int * x,int * y,int * comp)6889 static int stbi__info_main(stbi__context *s, int *x, int *y, int *comp)
6890 {
6891    #ifndef STBI_NO_JPEG
6892    if (stbi__jpeg_info(s, x, y, comp)) return 1;
6893    #endif
6894 
6895    #ifndef STBI_NO_PNG
6896    if (stbi__png_info(s, x, y, comp))  return 1;
6897    #endif
6898 
6899    #ifndef STBI_NO_GIF
6900    if (stbi__gif_info(s, x, y, comp))  return 1;
6901    #endif
6902 
6903    #ifndef STBI_NO_BMP
6904    if (stbi__bmp_info(s, x, y, comp))  return 1;
6905    #endif
6906 
6907    #ifndef STBI_NO_PSD
6908    if (stbi__psd_info(s, x, y, comp))  return 1;
6909    #endif
6910 
6911    #ifndef STBI_NO_PIC
6912    if (stbi__pic_info(s, x, y, comp))  return 1;
6913    #endif
6914 
6915    #ifndef STBI_NO_PNM
6916    if (stbi__pnm_info(s, x, y, comp))  return 1;
6917    #endif
6918 
6919    #ifndef STBI_NO_HDR
6920    if (stbi__hdr_info(s, x, y, comp))  return 1;
6921    #endif
6922 
6923    // test tga last because it's a crappy test!
6924    #ifndef STBI_NO_TGA
6925    if (stbi__tga_info(s, x, y, comp))
6926        return 1;
6927    #endif
6928    return stbi__err("unknown image type", "Image not of any known type, or corrupt");
6929 }
6930 
6931 #ifndef STBI_NO_STDIO
stbi_info(char const * filename,int * x,int * y,int * comp)6932 STBIDEF int stbi_info(char const *filename, int *x, int *y, int *comp)
6933 {
6934     FILE *f = stbi__fopen(filename, "rb");
6935     int result;
6936     if (!f) return stbi__err("can't fopen", "Unable to open file");
6937     result = stbi_info_from_file(f, x, y, comp);
6938     fclose(f);
6939     return result;
6940 }
6941 
stbi_info_from_file(FILE * f,int * x,int * y,int * comp)6942 STBIDEF int stbi_info_from_file(FILE *f, int *x, int *y, int *comp)
6943 {
6944    int r;
6945    stbi__context s;
6946    long pos = ftell(f);
6947    stbi__start_file(&s, f);
6948    r = stbi__info_main(&s,x,y,comp);
6949    fseek(f,pos,SEEK_SET);
6950    return r;
6951 }
6952 #endif // !STBI_NO_STDIO
6953 
stbi_info_from_memory(stbi_uc const * buffer,int len,int * x,int * y,int * comp)6954 STBIDEF int stbi_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp)
6955 {
6956    stbi__context s;
6957    stbi__start_mem(&s,buffer,len);
6958    return stbi__info_main(&s,x,y,comp);
6959 }
6960 
stbi_info_from_callbacks(stbi_io_callbacks const * c,void * user,int * x,int * y,int * comp)6961 STBIDEF int stbi_info_from_callbacks(stbi_io_callbacks const *c, void *user, int *x, int *y, int *comp)
6962 {
6963    stbi__context s;
6964    stbi__start_callbacks(&s, (stbi_io_callbacks *) c, user);
6965    return stbi__info_main(&s,x,y,comp);
6966 }
6967 
6968 #endif // STB_IMAGE_IMPLEMENTATION
6969 
6970 /*
6971    revision history:
6972       2.16  (2017-07-23) all functions have 16-bit variants;
6973                          STBI_NO_STDIO works again;
6974                          compilation fixes;
6975                          fix rounding in unpremultiply;
6976                          optimize vertical flip;
6977                          disable raw_len validation;
6978                          documentation fixes
6979       2.15  (2017-03-18) fix png-1,2,4 bug; now all Imagenet JPGs decode;
6980                          warning fixes; disable run-time SSE detection on gcc;
6981                          uniform handling of optional "return" values;
6982                          thread-safe initialization of zlib tables
6983       2.14  (2017-03-03) remove deprecated STBI_JPEG_OLD; fixes for Imagenet JPGs
6984       2.13  (2016-11-29) add 16-bit API, only supported for PNG right now
6985       2.12  (2016-04-02) fix typo in 2.11 PSD fix that caused crashes
6986       2.11  (2016-04-02) allocate large structures on the stack
6987                          remove white matting for transparent PSD
6988                          fix reported channel count for PNG & BMP
6989                          re-enable SSE2 in non-gcc 64-bit
6990                          support RGB-formatted JPEG
6991                          read 16-bit PNGs (only as 8-bit)
6992       2.10  (2016-01-22) avoid warning introduced in 2.09 by STBI_REALLOC_SIZED
6993       2.09  (2016-01-16) allow comments in PNM files
6994                          16-bit-per-pixel TGA (not bit-per-component)
6995                          info() for TGA could break due to .hdr handling
6996                          info() for BMP to shares code instead of sloppy parse
6997                          can use STBI_REALLOC_SIZED if allocator doesn't support realloc
6998                          code cleanup
6999       2.08  (2015-09-13) fix to 2.07 cleanup, reading RGB PSD as RGBA
7000       2.07  (2015-09-13) fix compiler warnings
7001                          partial animated GIF support
7002                          limited 16-bpc PSD support
7003                          #ifdef unused functions
7004                          bug with < 92 byte PIC,PNM,HDR,TGA
7005       2.06  (2015-04-19) fix bug where PSD returns wrong '*comp' value
7006       2.05  (2015-04-19) fix bug in progressive JPEG handling, fix warning
7007       2.04  (2015-04-15) try to re-enable SIMD on MinGW 64-bit
7008       2.03  (2015-04-12) extra corruption checking (mmozeiko)
7009                          stbi_set_flip_vertically_on_load (nguillemot)
7010                          fix NEON support; fix mingw support
7011       2.02  (2015-01-19) fix incorrect assert, fix warning
7012       2.01  (2015-01-17) fix various warnings; suppress SIMD on gcc 32-bit without -msse2
7013       2.00b (2014-12-25) fix STBI_MALLOC in progressive JPEG
7014       2.00  (2014-12-25) optimize JPG, including x86 SSE2 & NEON SIMD (ryg)
7015                          progressive JPEG (stb)
7016                          PGM/PPM support (Ken Miller)
7017                          STBI_MALLOC,STBI_REALLOC,STBI_FREE
7018                          GIF bugfix -- seemingly never worked
7019                          STBI_NO_*, STBI_ONLY_*
7020       1.48  (2014-12-14) fix incorrectly-named assert()
7021       1.47  (2014-12-14) 1/2/4-bit PNG support, both direct and paletted (Omar Cornut & stb)
7022                          optimize PNG (ryg)
7023                          fix bug in interlaced PNG with user-specified channel count (stb)
7024       1.46  (2014-08-26)
7025               fix broken tRNS chunk (colorkey-style transparency) in non-paletted PNG
7026       1.45  (2014-08-16)
7027               fix MSVC-ARM internal compiler error by wrapping malloc
7028       1.44  (2014-08-07)
7029               various warning fixes from Ronny Chevalier
7030       1.43  (2014-07-15)
7031               fix MSVC-only compiler problem in code changed in 1.42
7032       1.42  (2014-07-09)
7033               don't define _CRT_SECURE_NO_WARNINGS (affects user code)
7034               fixes to stbi__cleanup_jpeg path
7035               added STBI_ASSERT to avoid requiring assert.h
7036       1.41  (2014-06-25)
7037               fix search&replace from 1.36 that messed up comments/error messages
7038       1.40  (2014-06-22)
7039               fix gcc struct-initialization warning
7040       1.39  (2014-06-15)
7041               fix to TGA optimization when req_comp != number of components in TGA;
7042               fix to GIF loading because BMP wasn't rewinding (whoops, no GIFs in my test suite)
7043               add support for BMP version 5 (more ignored fields)
7044       1.38  (2014-06-06)
7045               suppress MSVC warnings on integer casts truncating values
7046               fix accidental rename of 'skip' field of I/O
7047       1.37  (2014-06-04)
7048               remove duplicate typedef
7049       1.36  (2014-06-03)
7050               convert to header file single-file library
7051               if de-iphone isn't set, load iphone images color-swapped instead of returning NULL
7052       1.35  (2014-05-27)
7053               various warnings
7054               fix broken STBI_SIMD path
7055               fix bug where stbi_load_from_file no longer left file pointer in correct place
7056               fix broken non-easy path for 32-bit BMP (possibly never used)
7057               TGA optimization by Arseny Kapoulkine
7058       1.34  (unknown)
7059               use STBI_NOTUSED in stbi__resample_row_generic(), fix one more leak in tga failure case
7060       1.33  (2011-07-14)
7061               make stbi_is_hdr work in STBI_NO_HDR (as specified), minor compiler-friendly improvements
7062       1.32  (2011-07-13)
7063               support for "info" function for all supported filetypes (SpartanJ)
7064       1.31  (2011-06-20)
7065               a few more leak fixes, bug in PNG handling (SpartanJ)
7066       1.30  (2011-06-11)
7067               added ability to load files via callbacks to accomidate custom input streams (Ben Wenger)
7068               removed deprecated format-specific test/load functions
7069               removed support for installable file formats (stbi_loader) -- would have been broken for IO callbacks anyway
7070               error cases in bmp and tga give messages and don't leak (Raymond Barbiero, grisha)
7071               fix inefficiency in decoding 32-bit BMP (David Woo)
7072       1.29  (2010-08-16)
7073               various warning fixes from Aurelien Pocheville
7074       1.28  (2010-08-01)
7075               fix bug in GIF palette transparency (SpartanJ)
7076       1.27  (2010-08-01)
7077               cast-to-stbi_uc to fix warnings
7078       1.26  (2010-07-24)
7079               fix bug in file buffering for PNG reported by SpartanJ
7080       1.25  (2010-07-17)
7081               refix trans_data warning (Won Chun)
7082       1.24  (2010-07-12)
7083               perf improvements reading from files on platforms with lock-heavy fgetc()
7084               minor perf improvements for jpeg
7085               deprecated type-specific functions so we'll get feedback if they're needed
7086               attempt to fix trans_data warning (Won Chun)
7087       1.23    fixed bug in iPhone support
7088       1.22  (2010-07-10)
7089               removed image *writing* support
7090               stbi_info support from Jetro Lauha
7091               GIF support from Jean-Marc Lienher
7092               iPhone PNG-extensions from James Brown
7093               warning-fixes from Nicolas Schulz and Janez Zemva (i.stbi__err. Janez (U+017D)emva)
7094       1.21    fix use of 'stbi_uc' in header (reported by jon blow)
7095       1.20    added support for Softimage PIC, by Tom Seddon
7096       1.19    bug in interlaced PNG corruption check (found by ryg)
7097       1.18  (2008-08-02)
7098               fix a threading bug (local mutable static)
7099       1.17    support interlaced PNG
7100       1.16    major bugfix - stbi__convert_format converted one too many pixels
7101       1.15    initialize some fields for thread safety
7102       1.14    fix threadsafe conversion bug
7103               header-file-only version (#define STBI_HEADER_FILE_ONLY before including)
7104       1.13    threadsafe
7105       1.12    const qualifiers in the API
7106       1.11    Support installable IDCT, colorspace conversion routines
7107       1.10    Fixes for 64-bit (don't use "unsigned long")
7108               optimized upsampling by Fabian "ryg" Giesen
7109       1.09    Fix format-conversion for PSD code (bad global variables!)
7110       1.08    Thatcher Ulrich's PSD code integrated by Nicolas Schulz
7111       1.07    attempt to fix C++ warning/errors again
7112       1.06    attempt to fix C++ warning/errors again
7113       1.05    fix TGA loading to return correct *comp and use good luminance calc
7114       1.04    default float alpha is 1, not 255; use 'void *' for stbi_image_free
7115       1.03    bugfixes to STBI_NO_STDIO, STBI_NO_HDR
7116       1.02    support for (subset of) HDR files, float interface for preferred access to them
7117       1.01    fix bug: possible bug in handling right-side up bmps... not sure
7118               fix bug: the stbi__bmp_load() and stbi__tga_load() functions didn't work at all
7119       1.00    interface to zlib that skips zlib header
7120       0.99    correct handling of alpha in palette
7121       0.98    TGA loader by lonesock; dynamically add loaders (untested)
7122       0.97    jpeg errors on too large a file; also catch another malloc failure
7123       0.96    fix detection of invalid v value - particleman@mollyrocket forum
7124       0.95    during header scan, seek to markers in case of padding
7125       0.94    STBI_NO_STDIO to disable stdio usage; rename all #defines the same
7126       0.93    handle jpegtran output; verbose errors
7127       0.92    read 4,8,16,24,32-bit BMP files of several formats
7128       0.91    output 24-bit Windows 3.0 BMP files
7129       0.90    fix a few more warnings; bump version number to approach 1.0
7130       0.61    bugfixes due to Marc LeBlanc, Christopher Lloyd
7131       0.60    fix compiling as c++
7132       0.59    fix warnings: merge Dave Moore's -Wall fixes
7133       0.58    fix bug: zlib uncompressed mode len/nlen was wrong endian
7134       0.57    fix bug: jpg last huffman symbol before marker was >9 bits but less than 16 available
7135       0.56    fix bug: zlib uncompressed mode len vs. nlen
7136       0.55    fix bug: restart_interval not initialized to 0
7137       0.54    allow NULL for 'int *comp'
7138       0.53    fix bug in png 3->4; speedup png decoding
7139       0.52    png handles req_comp=3,4 directly; minor cleanup; jpeg comments
7140       0.51    obey req_comp requests, 1-component jpegs return as 1-component,
7141               on 'test' only check type, not whether we support this variant
7142       0.50  (2006-11-19)
7143               first released version
7144 */
7145 
7146 
7147 /*
7148 ------------------------------------------------------------------------------
7149 This software is available under 2 licenses -- choose whichever you prefer.
7150 ------------------------------------------------------------------------------
7151 ALTERNATIVE A - MIT License
7152 Copyright (c) 2017 Sean Barrett
7153 Permission is hereby granted, free of charge, to any person obtaining a copy of
7154 this software and associated documentation files (the "Software"), to deal in
7155 the Software without restriction, including without limitation the rights to
7156 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
7157 of the Software, and to permit persons to whom the Software is furnished to do
7158 so, subject to the following conditions:
7159 The above copyright notice and this permission notice shall be included in all
7160 copies or substantial portions of the Software.
7161 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
7162 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
7163 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
7164 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
7165 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
7166 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
7167 SOFTWARE.
7168 ------------------------------------------------------------------------------
7169 ALTERNATIVE B - Public Domain (www.unlicense.org)
7170 This is free and unencumbered software released into the public domain.
7171 Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
7172 software, either in source code form or as a compiled binary, for any purpose,
7173 commercial or non-commercial, and by any means.
7174 In jurisdictions that recognize copyright laws, the author or authors of this
7175 software dedicate any and all copyright interest in the software to the public
7176 domain. We make this dedication for the benefit of the public at large and to
7177 the detriment of our heirs and successors. We intend this dedication to be an
7178 overt act of relinquishment in perpetuity of all present and future rights to
7179 this software under copyright law.
7180 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
7181 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
7182 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
7183 AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
7184 ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
7185 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
7186 ------------------------------------------------------------------------------
7187 */
7188