1 /*====================================================================*
2  -  Copyright (C) 2001 Leptonica.  All rights reserved.
3  -  This software is distributed in the hope that it will be
4  -  useful, but with NO WARRANTY OF ANY KIND.
5  -  No author or distributor accepts responsibility to anyone for the
6  -  consequences of using this software, or for whether it serves any
7  -  particular purpose or works at all, unless he or she says so in
8  -  writing.  Everyone is granted permission to copy, modify and
9  -  redistribute this source code, for commercial or non-commercial
10  -  purposes, with the following restrictions: (1) the origin of this
11  -  source code must not be misrepresented; (2) modified versions must
12  -  be plainly marked as such; and (3) this notice may not be removed
13  -  or altered from any source or modified source distribution.
14  *====================================================================*/
15 
16 
17 /*
18  *  readfile.c:  reads image on file into memory
19  *
20  *      Top-level functions for reading images from file
21  *           PIXA      *pixaReadFiles()
22  *           PIXA      *pixaReadFilesSA()
23  *           PIX       *pixRead()
24  *           PIX       *pixReadWithHint()
25  *           PIX       *pixReadIndexed()
26  *           PIX       *pixReadStream()
27  *
28  *      Read header information from file
29  *           l_int32    pixReadHeader()
30  *
31  *      Format finders
32  *           l_int32    findFileFormat()
33  *           l_int32    findFileFormatBuffer()
34  *           l_int32    fileFormatIsTiff()
35  *
36  *      Read from memory
37  *           PIX       *pixReadMem()
38  *           l_int32    pixReadHeaderMem()
39  *
40  *      Test function for I/O with different formats
41  *           l_int32    ioFormatTest()
42  */
43 
44 #include <stdio.h>
45 #include <stdlib.h>
46 #include <string.h>
47 #include "allheaders.h"
48 
49 
50     /*  choose type of PIX to be generated  */
51 enum {
52     READ_24_BIT_COLOR = 0,     /* read in as 24 (really 32) bit pix */
53     CONVERT_TO_PALETTE = 1,    /* convert to 8 bit colormapped pix */
54     READ_GRAY = 2              /* read gray only */
55 };
56 
57     /* Output files for ioFormatTest().
58      * Note that the test for jpeg is not yet implemented */
59 static const char *FILE_BMP  =  "/tmp/junkout.bmp";
60 static const char *FILE_PNG  =  "/tmp/junkout.png";
61 static const char *FILE_PNM  =  "/tmp/junkout.pnm";
62 static const char *FILE_G3   =  "/tmp/junkout_g3.tif";
63 static const char *FILE_G4   =  "/tmp/junkout_g4.tif";
64 static const char *FILE_RLE  =  "/tmp/junkout_rle.tif";
65 static const char *FILE_PB   =  "/tmp/junkout_packbits.tif";
66 static const char *FILE_LZW  =  "/tmp/junkout_lzw.tif";
67 static const char *FILE_ZIP  =  "/tmp/junkout_zip.tif";
68 static const char *FILE_TIFF =  "/tmp/junkout.tif";
69 static const char *FILE_JPG  =  "/tmp/junkout.jpg";
70 
71 
72 /*---------------------------------------------------------------------*
73  *          Top-level functions for reading images from file           *
74  *---------------------------------------------------------------------*/
75 /*!
76  *  pixaReadFiles()
77  *
78  *      Input:  dirname
79  *              substr (<optional> substring filter on filenames; can be null)
80  *      Return: pixa, or null on error
81  *
82  *  Notes:
83  *      (1) @dirname is the full path for the directory.
84  *      (2) @substr is the part of the file name (excluding
85  *          the directory) that is to be matched.  All matching
86  *          filenames are read into the Pixa.  If substr is NULL,
87  *          all filenames are read into the Pixa.
88  */
89 PIXA *
pixaReadFiles(const char * dirname,const char * substr)90 pixaReadFiles(const char  *dirname,
91               const char  *substr)
92 {
93 PIXA    *pixa;
94 SARRAY  *sa;
95 
96     PROCNAME("pixaReadFiles");
97 
98     if (!dirname)
99         return (PIXA *)ERROR_PTR("dirname not defined", procName, NULL);
100 
101     if ((sa = getSortedPathnamesInDirectory(dirname, substr, 0, 0)) == NULL)
102         return (PIXA *)ERROR_PTR("sa not made", procName, NULL);
103 
104     pixa = pixaReadFilesSA(sa);
105     sarrayDestroy(&sa);
106     return pixa;
107 }
108 
109 
110 /*!
111  *  pixaReadFilesSA()
112  *
113  *      Input:  sarray (full pathnames for all files)
114  *      Return: pixa, or null on error
115  */
116 PIXA *
pixaReadFilesSA(SARRAY * sa)117 pixaReadFilesSA(SARRAY  *sa)
118 {
119 char    *str;
120 l_int32  i, n;
121 PIX     *pix;
122 PIXA    *pixa;
123 
124     PROCNAME("pixaReadFilesSA");
125 
126     if (!sa)
127         return (PIXA *)ERROR_PTR("sa not defined", procName, NULL);
128 
129     n = sarrayGetCount(sa);
130     pixa = pixaCreate(n);
131     for (i = 0; i < n; i++) {
132         str = sarrayGetString(sa, i, L_NOCOPY);
133         if ((pix = pixRead(str)) == NULL) {
134             L_WARNING_STRING("pix not read from file %s", procName, str);
135             continue;
136         }
137 	pixaAddPix(pixa, pix, L_INSERT);
138     }
139 
140     return pixa;
141 }
142 
143 
144 /*!
145  *  pixRead()
146  *
147  *      Input:  filename (with full pathname or in local directory)
148  *      Return: pix if OK; null on error
149  */
150 PIX *
pixRead(const char * filename)151 pixRead(const char  *filename)
152 {
153 FILE  *fp;
154 PIX   *pix;
155 
156     PROCNAME("pixRead");
157 
158     if (!filename)
159         return (PIX *)ERROR_PTR("filename not defined", procName, NULL);
160 
161     if ((fp = fopenReadStream(filename)) == NULL)
162         return (PIX *)ERROR_PTR("image file not found", procName, NULL);
163     pix = pixReadStream(fp, 0);
164     if (pixGetInputFormat(pix) != IFF_GIF)  /* DGifCloseFile() closes stream! */
165         fclose(fp);
166 
167     if (!pix)
168         return (PIX *)ERROR_PTR("image not returned", procName, NULL);
169     return pix;
170 }
171 
172 
173 /*!
174  *  pixReadWithHint()
175  *
176  *      Input:  filename (with full pathname or in local directory)
177  *              hint (bitwise OR of L_HINT_* values for jpeg; use 0 for no hint)
178  *      Return: pix if OK; null on error
179  *
180  *  Notes:
181  *      (1) The hint is not binding, but may be used to optimize jpeg decoding.
182  *          Use 0 for no hinting.
183  */
184 PIX *
pixReadWithHint(const char * filename,l_int32 hint)185 pixReadWithHint(const char  *filename,
186                 l_int32      hint)
187 {
188 FILE  *fp;
189 PIX   *pix;
190 
191     PROCNAME("pixReadWithHint");
192 
193     if (!filename)
194         return (PIX *)ERROR_PTR("filename not defined", procName, NULL);
195 
196     if ((fp = fopenReadStream(filename)) == NULL)
197         return (PIX *)ERROR_PTR("image file not found", procName, NULL);
198     pix = pixReadStream(fp, hint);
199     fclose(fp);
200 
201     if (!pix)
202         return (PIX *)ERROR_PTR("image not returned", procName, NULL);
203     return pix;
204 }
205 
206 
207 /*!
208  *  pixReadIndexed()
209  *
210  *      Input:  sarray (of full pathnames)
211  *              index (into pathname array)
212  *      Return: pix if OK; null if not found
213  *
214  *  Notes:
215  *      (1) This function is useful for selecting image files from a
216  *          directory, where the integer @index is embedded into
217  *          the file name.
218  *      (2) This is typically done by generating the sarray using
219  *          getNumberedPathnamesInDirectory(), so that the @index
220  *          pathname would have the number @index in it.  The size
221  *          of the sarray should be the largest number (plus 1) appearing
222  *          in the file names, respecting the constraints in the
223  *          call to getNumberedPathnamesInDirectory().
224  *      (3) Consequently, for some indices into the sarray, there may
225  *          be no pathnames in the directory containing that number.
226  *          By convention, we place empty C strings ("") in those
227  *          locations in the sarray, and it is not an error if such
228  *          a string is encountered and no pix is returned.
229  *          Therefore, the caller must verify that a pix is returned.
230  *      (4) See convertSegmentedPagesToPS() in src/psio1.c for an
231  *          example of usage.
232  */
233 PIX *
pixReadIndexed(SARRAY * sa,l_int32 index)234 pixReadIndexed(SARRAY  *sa,
235                l_int32  index)
236 {
237 char    *fname;
238 l_int32  n;
239 PIX     *pix;
240 
241     PROCNAME("pixReadIndexed");
242 
243     if (!sa)
244         return (PIX *)ERROR_PTR("sa not defined", procName, NULL);
245     n = sarrayGetCount(sa);
246     if (index < 0 || index >= n)
247         return (PIX *)ERROR_PTR("index out of bounds", procName, NULL);
248 
249     fname = sarrayGetString(sa, index, L_NOCOPY);
250     if (fname[0] == '\0')
251         return NULL;
252 
253     if ((pix = pixRead(fname)) == NULL) {
254         L_ERROR_STRING("pix not read from file %s", procName, fname);
255         return NULL;
256     }
257 
258     return pix;
259 }
260 
261 
262 /*!
263  *  pixReadStream()
264  *
265  *      Input:  fp (file stream)
266  *              hint (bitwise OR of L_HINT_* values for jpeg; use 0 for no hint)
267  *      Return: pix if OK; null on error
268  *
269  *  Notes:
270  *      (1) The hint only applies to jpeg.
271  */
272 PIX *
pixReadStream(FILE * fp,l_int32 hint)273 pixReadStream(FILE    *fp,
274               l_int32  hint)
275 {
276 l_int32  format;
277 PIX     *pix;
278 
279     PROCNAME("pixReadStream");
280 
281     if (!fp)
282         return (PIX *)ERROR_PTR("stream not defined", procName, NULL);
283     pix = NULL;
284 
285     findFileFormat(fp, &format);
286     switch (format)
287     {
288     case IFF_BMP:
289         if ((pix = pixReadStreamBmp(fp)) == NULL )
290             return (PIX *)ERROR_PTR( "bmp: no pix returned", procName, NULL);
291         break;
292 
293     case IFF_JFIF_JPEG:
294         if ((pix = pixReadStreamJpeg(fp, READ_24_BIT_COLOR, 1, NULL, hint))
295                 == NULL)
296             return (PIX *)ERROR_PTR( "jpeg: no pix returned", procName, NULL);
297         break;
298 
299     case IFF_PNG:
300         if ((pix = pixReadStreamPng(fp)) == NULL)
301             return (PIX *)ERROR_PTR("png: no pix returned", procName, NULL);
302         break;
303 
304     case IFF_TIFF:
305     case IFF_TIFF_PACKBITS:
306     case IFF_TIFF_RLE:
307     case IFF_TIFF_G3:
308     case IFF_TIFF_G4:
309     case IFF_TIFF_LZW:
310     case IFF_TIFF_ZIP:
311         if ((pix = pixReadStreamTiff(fp, 0)) == NULL)  /* page 0 by default */
312             return (PIX *)ERROR_PTR("tiff: no pix returned", procName, NULL);
313         break;
314 
315     case IFF_PNM:
316         if ((pix = pixReadStreamPnm(fp)) == NULL)
317             return (PIX *)ERROR_PTR("pnm: no pix returned", procName, NULL);
318         break;
319 
320     case IFF_GIF:
321         if ((pix = pixReadStreamGif(fp)) == NULL)
322             return (PIX *)ERROR_PTR("gif: no pix returned", procName, NULL);
323         break;
324 
325     case IFF_JP2:
326         return (PIX *)ERROR_PTR("jp2: format not supported", procName, NULL);
327         break;
328 
329     case IFF_UNKNOWN:
330         return (PIX *)ERROR_PTR( "Unknown format: no pix returned",
331                 procName, NULL);
332         break;
333     }
334 
335     if (pix)
336         pixSetInputFormat(pix, format);
337     return pix;
338 }
339 
340 
341 
342 /*---------------------------------------------------------------------*
343  *                     Read header information from file               *
344  *---------------------------------------------------------------------*/
345 /*!
346  *  pixReadHeader()
347  *
348  *      Input:  filename (with full pathname or in local directory)
349  *              &format (<optional return> file format)
350  *              &w, &h (<optional returns> width and height)
351  *              &bps <optional return> bits/sample
352  *              &spp <optional return> samples/pixel (1, 3 or 4)
353  *              &iscmap (<optional return> 1 if cmap exists; 0 otherwise)
354  *      Return: 0 if OK, 1 on error
355  *
356  *  Notes:
357  *      (1) This reads the actual headers for jpeg, png, tiff and pnm.
358  *          For bmp and gif, we cheat and read the entire file into a pix,
359  *          from which we extract the "header" information.
360  */
361 l_int32
pixReadHeader(const char * filename,l_int32 * pformat,l_int32 * pw,l_int32 * ph,l_int32 * pbps,l_int32 * pspp,l_int32 * piscmap)362 pixReadHeader(const char  *filename,
363               l_int32     *pformat,
364               l_int32     *pw,
365               l_int32     *ph,
366               l_int32     *pbps,
367               l_int32     *pspp,
368               l_int32     *piscmap)
369 {
370 l_int32   size, format, ret, w, h, d, bps, spp, iscmap;
371 l_int32   type;  /* ignored */
372 l_uint8  *data;
373 FILE     *fp;
374 PIX      *pix;
375 
376     PROCNAME("pixReadHeader");
377 
378     if (pw) *pw = 0;
379     if (ph) *ph = 0;
380     if (pbps) *pbps = 0;
381     if (pspp) *pspp = 0;
382     if (piscmap) *piscmap = 0;
383     if (pformat) *pformat = 0;
384     iscmap = 0;  /* init to false */
385     if (!filename)
386         return ERROR_INT("filename not defined", procName, 1);
387 
388     if ((fp = fopenReadStream(filename)) == NULL)
389         return ERROR_INT("image file not found", procName, 1);
390     findFileFormat(fp, &format);
391     fclose(fp);
392 
393     switch (format)
394     {
395     case IFF_BMP:  /* cheating: reading the entire file */
396         if ((pix = pixRead(filename)) == NULL)
397             return ERROR_INT( "bmp: pix not read", procName, 1);
398         pixGetDimensions(pix, &w, &h, &d);
399         pixDestroy(&pix);
400         bps = (d == 32) ? 8 : d;
401         spp = (d == 32) ? 3 : 1;
402         break;
403 
404     case IFF_JFIF_JPEG:
405         ret = extractJpegDataFromFile(filename, &data, &size, &w, &h,
406                                       &bps, &spp);
407         if (ret)
408             return ERROR_INT( "jpeg: no header info returned", procName, 1);
409         FREE(data);
410         break;
411 
412     case IFF_PNG:
413         ret = readHeaderPng(filename, &w, &h, &bps, &spp, &iscmap);
414         if (ret)
415             return ERROR_INT( "png: no header info returned", procName, 1);
416         break;
417 
418     case IFF_TIFF:
419     case IFF_TIFF_PACKBITS:
420     case IFF_TIFF_RLE:
421     case IFF_TIFF_G3:
422     case IFF_TIFF_G4:
423     case IFF_TIFF_LZW:
424     case IFF_TIFF_ZIP:
425             /* Reading page 0 by default; possibly redefine format */
426         ret = readHeaderTiff(filename, 0, &w, &h, &bps, &spp, NULL, &iscmap,
427                              &format);
428         if (ret)
429             return ERROR_INT( "tiff: no header info returned", procName, 1);
430         break;
431 
432     case IFF_PNM:
433         if ((fp = fopenReadStream(filename)) == NULL)
434             return ERROR_INT("file stream not opened", procName, 1);
435         ret = freadHeaderPnm(fp, NULL, &w, &h, &d, &type, &bps, &spp);
436         fclose(fp);
437         if (ret)
438             return ERROR_INT( "pnm: no header info returned", procName, 1);
439         break;
440 
441     case IFF_GIF:  /* cheating: reading the entire file */
442         if ((pix = pixRead(filename)) == NULL)
443             return ERROR_INT( "gif: pix not read", procName, 1);
444         pixGetDimensions(pix, &w, &h, &d);
445         pixDestroy(&pix);
446         iscmap = 1;  /* always colormapped; max 256 colors */
447         spp = 1;
448         bps = d;
449         break;
450 
451     case IFF_JP2:
452         return ERROR_INT("jp2: format not supported", procName, 1);
453         break;
454 
455     case IFF_UNKNOWN:
456         L_ERROR_STRING("unknown format in file %s", procName, filename);
457         return 1;
458         break;
459     }
460 
461     if (pw) *pw = w;
462     if (ph) *ph = h;
463     if (pbps) *pbps = bps;
464     if (pspp) *pspp = spp;
465     if (piscmap) *piscmap = iscmap;
466     if (pformat) *pformat = format;
467     return 0;
468 }
469 
470 
471 /*---------------------------------------------------------------------*
472  *                            Format finders                           *
473  *---------------------------------------------------------------------*/
474 /*!
475  *  findFileFormat()
476  *
477  *      Input:  fp (file stream)
478  *              &format (<return>)
479  *      Return: 0 if OK, 1 on error or if format is not recognized
480  *
481  *  Notes:
482  *      (1) Important: Side effect -- this resets fp to BOF.
483  */
484 l_int32
findFileFormat(FILE * fp,l_int32 * pformat)485 findFileFormat(FILE     *fp,
486                l_int32  *pformat)
487 {
488 l_uint8  firstbytes[8];
489 l_int32  format;
490 
491     PROCNAME("findFileFormat");
492 
493     if (!pformat)
494         return ERROR_INT("&format not defined", procName, 1);
495     *pformat = 0;
496     if (!fp)
497         return ERROR_INT("stream not defined", procName, 1);
498 
499     rewind(fp);
500     if (fnbytesInFile(fp) < 8)
501         return ERROR_INT("truncated file", procName, 1);
502 
503     if (fread((char *)&firstbytes, 1, 8, fp) != 8)
504         return ERROR_INT("failed to read first 8 bytes of file", procName, 1);
505     rewind(fp);
506 
507     findFileFormatBuffer(firstbytes, &format);
508     if (format == IFF_TIFF) {
509         findTiffCompression(fp, &format);
510         rewind(fp);
511     }
512     *pformat = format;
513     if (format == IFF_UNKNOWN)
514         return 1;
515     else
516         return 0;
517 }
518 
519 
520 /*!
521  *  findFileFormatBuffer()
522  *
523  *      Input:  byte buffer (at least 8 bytes in size; we can't check)
524  *              &format (<return>)
525  *      Return: 0 if OK, 1 on error or if format is not recognized
526  *
527  *  Notes:
528  *      (1) This determines the file format from the first 8 bytes in
529  *          the compressed data stream, which are stored in memory.
530  *      (2) For tiff files, this returns IFF_TIFF.  The specific tiff
531  *          compression is then determined using findTiffCompression().
532  */
533 l_int32
findFileFormatBuffer(const l_uint8 * buf,l_int32 * pformat)534 findFileFormatBuffer(const l_uint8  *buf,
535                      l_int32        *pformat)
536 {
537 l_uint16  twobytepw;
538 
539     PROCNAME("findFileFormatBuffer");
540 
541     if (!pformat)
542         return ERROR_INT("&format not defined", procName, 1);
543     *pformat = IFF_UNKNOWN;
544     if (!buf)
545         return ERROR_INT("byte buffer not defined", procName, 0);
546 
547         /* Check the bmp and tiff 2-byte header ids */
548     ((char *)(&twobytepw))[0] = buf[0];
549     ((char *)(&twobytepw))[1] = buf[1];
550 
551     if (convertOnBigEnd16(twobytepw) == BMP_ID) {
552         *pformat = IFF_BMP;
553         return 0;
554     }
555 
556     if (twobytepw == TIFF_BIGEND_ID || twobytepw == TIFF_LITTLEEND_ID) {
557         *pformat = IFF_TIFF;
558         return 0;
559     }
560 
561         /* Check for the p*m 2-byte header ids */
562     if ((buf[0] == 'P' && buf[1] == '4') || /* newer packed */
563         (buf[0] == 'P' && buf[1] == '1')) {  /* old format */
564         *pformat = IFF_PNM;
565         return 0;
566     }
567 
568     if ((buf[0] == 'P' && buf[1] == '5') || /* newer */
569         (buf[0] == 'P' && buf[1] == '2')) {  /* old */
570         *pformat = IFF_PNM;
571         return 0;
572     }
573 
574     if ((buf[0] == 'P' && buf[1] == '6') || /* newer */
575         (buf[0] == 'P' && buf[1] == '3')) {  /* old */
576         *pformat = IFF_PNM;
577         return 0;
578     }
579 
580         /*  Consider the first 11 bytes of the standard JFIF JPEG header:
581          *    - The first two bytes are the most important:  0xffd8.
582          *    - The next two bytes are the jfif marker: 0xffe0.
583          *      Not all jpeg files have this marker.
584          *    - The next two bytes are the header length.
585          *    - The next 5 bytes are a null-terminated string.
586          *      For JFIF, the string is "JFIF", naturally.  For others it
587          *      can be "Exif" or just about anything else.
588          *    - Because of all this variability, we only check the first
589          *      two byte marker.  All jpeg files are identified as
590          *      IFF_JFIF_JPEG.  */
591     if (buf[0] == 0xff && buf[1] == 0xd8) {
592         *pformat = IFF_JFIF_JPEG;
593         return 0;
594     }
595 
596         /* Check for the 8 byte PNG signature (png_signature in png.c):
597          *       {137, 80, 78, 71, 13, 10, 26, 10}      */
598     if (buf[0] == 137 && buf[1] == 80  && buf[2] == 78  && buf[3] == 71  &&
599         buf[4] == 13  && buf[5] == 10  && buf[6] == 26  && buf[7] == 10) {
600         *pformat = IFF_PNG;
601         return 0;
602     }
603 
604         /* Look for "GIF87a" or "GIF89a" */
605     if (buf[0] == 'G' && buf[1] == 'I' && buf[2] == 'F' && buf[3] == '8' &&
606         (buf[4] == '7' || buf[4] == '9') && buf[5] == 'a') {
607         *pformat = IFF_GIF;
608         return 0;
609     }
610 
611     if (buf[0] == 0xff && buf[1] == 0x4f && buf[2] == 0xff && buf[3] == 0x51) {
612         *pformat = IFF_JP2;
613         return 0;
614     }
615 
616         /* File format identifier not found; unknown */
617     return 1;
618 }
619 
620 
621 /*!
622  *  fileFormatIsTiff()
623  *
624  *      Input:  fp (file stream)
625  *      Return: 1 if file is tiff; 0 otherwise or on error
626  */
627 l_int32
fileFormatIsTiff(FILE * fp)628 fileFormatIsTiff(FILE  *fp)
629 {
630 l_int32  format;
631 
632     PROCNAME("fileFormatIsTiff");
633 
634     if (!fp)
635         return ERROR_INT("stream not defined", procName, 0);
636 
637     findFileFormat(fp, &format);
638     if (format == IFF_TIFF || format == IFF_TIFF_PACKBITS ||
639         format == IFF_TIFF_RLE || format == IFF_TIFF_G3 ||
640         format == IFF_TIFF_G4 || format == IFF_TIFF_LZW ||
641         format == IFF_TIFF_ZIP)
642         return 1;
643     else
644         return 0;
645 }
646 
647 
648 /*---------------------------------------------------------------------*
649  *                            Read from memory                         *
650  *---------------------------------------------------------------------*/
651 /*!
652  *  pixReadMem()
653  *
654  *      Input:  data (const; encoded)
655  *              datasize (size of data)
656  *      Return: pix, or null on error
657  *
658  *  Notes:
659  *      (1) This is a variation of pixReadStream(), where the data is read
660  *          from a memory buffer rather than a file.
661  *      (2) On windows, this will only read tiff formatted files from
662  *          memory.  For other formats, it requires fmemopen(3).
663  *          Attempts to read those formats will fail at runtime.
664  *      (3) findFileFormatBuffer() requires up to 8 bytes to decide on
665  *          the format.  That determines the constraint here.
666  */
667 PIX *
pixReadMem(const l_uint8 * data,size_t size)668 pixReadMem(const l_uint8  *data,
669            size_t          size)
670 {
671 l_int32  format;
672 PIX     *pix;
673 
674     PROCNAME("pixReadMem");
675 
676     if (!data)
677         return (PIX *)ERROR_PTR("data not defined", procName, NULL);
678     if (size < 8)
679         return (PIX *)ERROR_PTR("size < 8", procName, NULL);
680     pix = NULL;
681 
682     findFileFormatBuffer(data, &format);
683     switch (format)
684     {
685     case IFF_BMP:
686         if ((pix = pixReadMemBmp(data, size)) == NULL )
687             return (PIX *)ERROR_PTR( "bmp: no pix returned", procName, NULL);
688         break;
689 
690     case IFF_JFIF_JPEG:
691         if ((pix = pixReadMemJpeg(data, size, READ_24_BIT_COLOR, 1, NULL, 0))
692                 == NULL)
693             return (PIX *)ERROR_PTR( "jpeg: no pix returned", procName, NULL);
694         break;
695 
696     case IFF_PNG:
697         if ((pix = pixReadMemPng(data, size)) == NULL)
698             return (PIX *)ERROR_PTR("png: no pix returned", procName, NULL);
699         break;
700 
701     case IFF_TIFF:
702     case IFF_TIFF_PACKBITS:
703     case IFF_TIFF_RLE:
704     case IFF_TIFF_G3:
705     case IFF_TIFF_G4:
706     case IFF_TIFF_LZW:
707     case IFF_TIFF_ZIP:
708             /* Reading page 0 by default */
709         if ((pix = pixReadMemTiff(data, size, 0)) == NULL)
710             return (PIX *)ERROR_PTR("tiff: no pix returned", procName, NULL);
711         break;
712 
713     case IFF_PNM:
714         if ((pix = pixReadMemPnm(data, size)) == NULL)
715             return (PIX *)ERROR_PTR("pnm: no pix returned", procName, NULL);
716         break;
717 
718     case IFF_GIF:
719         if ((pix = pixReadMemGif(data, size)) == NULL)
720             return (PIX *)ERROR_PTR("gif: no pix returned", procName, NULL);
721         break;
722 
723     case IFF_JP2:
724         return (PIX *)ERROR_PTR("jp2: format not supported", procName, NULL);
725         break;
726 
727     case IFF_UNKNOWN:
728         return (PIX *)ERROR_PTR("Unknown format: no pix returned",
729                 procName, NULL);
730         break;
731     }
732 
733         /* Set the input format.  For tiff reading from memory we lose
734          * the actual input format; for 1 bpp, default to G4.  */
735     if (pix) {
736         if (format == IFF_TIFF && pixGetDepth(pix) == 1)
737             format = IFF_TIFF_G4;
738         pixSetInputFormat(pix, format);
739     }
740 
741     return pix;
742 }
743 
744 
745 /*!
746  *  pixReadHeaderMem()
747  *
748  *      Input:  data (const; encoded)
749  *              datasize (size of data)
750  *              &format (<optional returns> image format)
751  *              &w, &h (<optional returns> width and height)
752  *              &bps <optional return> bits/sample
753  *              &spp <optional return> samples/pixel (1, 3 or 4)
754  *              &iscmap (<optional return> 1 if cmap exists; 0 otherwise)
755  *      Return: 0 if OK, 1 on error
756  *
757  *  Notes:
758  *      (1) This reads the actual headers for jpeg, png, tiff and pnm.
759  *          For bmp and gif, we cheat and read all the data into a pix,
760  *          from which we extract the "header" information.
761  *      (2) On windows, this will only read tiff formatted files from
762  *          memory.  For other formats, it requires fmemopen(3).
763  *          Attempts to read those formats will fail at runtime.
764  *      (3) findFileFormatBuffer() requires up to 8 bytes to decide on
765  *          the format.  That determines the constraint here.
766  */
767 l_int32
pixReadHeaderMem(const l_uint8 * data,size_t size,l_int32 * pformat,l_int32 * pw,l_int32 * ph,l_int32 * pbps,l_int32 * pspp,l_int32 * piscmap)768 pixReadHeaderMem(const l_uint8  *data,
769                  size_t          size,
770                  l_int32        *pformat,
771                  l_int32        *pw,
772                  l_int32        *ph,
773                  l_int32        *pbps,
774                  l_int32        *pspp,
775                  l_int32        *piscmap)
776 {
777 l_int32  format, ret, w, h, d, bps, spp, iscmap;
778 l_int32  type;  /* not used */
779 PIX     *pix;
780 
781     PROCNAME("pixReadHeaderMem");
782 
783     if (pw) *pw = 0;
784     if (ph) *ph = 0;
785     if (pbps) *pbps = 0;
786     if (pspp) *pspp = 0;
787     if (piscmap) *piscmap = 0;
788     if (pformat) *pformat = 0;
789     iscmap = 0;  /* init to false */
790     if (!data)
791         return ERROR_INT("data not defined", procName, 1);
792     if (size < 8)
793         return ERROR_INT("size < 8", procName, 1);
794 
795     findFileFormatBuffer(data, &format);
796 
797     switch (format)
798     {
799     case IFF_BMP:  /* cheating: read the pix */
800         if ((pix = pixReadMemBmp(data, size)) == NULL)
801             return ERROR_INT( "bmp: pix not read", procName, 1);
802         pixGetDimensions(pix, &w, &h, &d);
803         pixDestroy(&pix);
804         bps = (d == 32) ? 8 : d;
805         spp = (d == 32) ? 3 : 1;
806         break;
807 
808     case IFF_JFIF_JPEG:
809         ret = extractJpegDataFromArray(data, (l_int32)size, &w, &h, &bps, &spp);
810         if (ret)
811             return ERROR_INT( "jpeg: no header info returned", procName, 1);
812         break;
813 
814     case IFF_PNG:
815         ret = sreadHeaderPng(data, &w, &h, &bps, &spp, &iscmap);
816         if (ret)
817             return ERROR_INT( "png: no header info returned", procName, 1);
818         break;
819 
820     case IFF_TIFF:
821     case IFF_TIFF_PACKBITS:
822     case IFF_TIFF_RLE:
823     case IFF_TIFF_G3:
824     case IFF_TIFF_G4:
825     case IFF_TIFF_LZW:
826     case IFF_TIFF_ZIP:
827             /* Reading page 0 by default; possibly redefine format */
828         ret = readHeaderMemTiff(data, size, 0, &w, &h, &bps, &spp,
829                                 NULL, &iscmap, &format);
830         if (ret)
831             return ERROR_INT( "tiff: no header info returned", procName, 1);
832         break;
833 
834     case IFF_PNM:
835         ret = sreadHeaderPnm(data, size, &w, &h, &d, &type, &bps, &spp);
836         if (ret)
837             return ERROR_INT( "pnm: no header info returned", procName, 1);
838         break;
839 
840     case IFF_GIF:  /* cheating: read the pix */
841         if ((pix = pixReadMemGif(data, size)) == NULL)
842             return ERROR_INT( "gif: pix not read", procName, 1);
843         pixGetDimensions(pix, &w, &h, &d);
844         pixDestroy(&pix);
845         iscmap = 1;  /* always colormapped; max 256 colors */
846         spp = 1;
847         bps = d;
848         break;
849 
850     case IFF_JP2:
851         return ERROR_INT("jp2: format not supported", procName, 1);
852         break;
853 
854     case IFF_UNKNOWN:
855         return ERROR_INT("unknown format; no data returned", procName, 1);
856         break;
857     }
858 
859     if (pw) *pw = w;
860     if (ph) *ph = h;
861     if (pbps) *pbps = bps;
862     if (pspp) *pspp = spp;
863     if (piscmap) *piscmap = iscmap;
864     if (pformat) *pformat = format;
865     return 0;
866 }
867 
868 
869 /*---------------------------------------------------------------------*
870  *             Test function for I/O with different formats            *
871  *---------------------------------------------------------------------*/
872 /*!
873  *  ioFormatTest()
874  *
875  *      Input:  filename (input file)
876  *      Return: 0 if OK; 1 on error or if the test fails
877  *
878  *  Notes:
879  *      (1) This writes and reads a set of output files losslessly
880  *          in different formats to /tmp, and tests that the
881  *          result before and after is unchanged.
882  *      (2) This should work properly on input images of any depth,
883  *          with and without colormaps.
884  *      (3) All supported formats are tested for bmp, png, tiff and
885  *          non-ascii pnm.  Ascii pnm also works (but who'd ever want
886  *          to use it?)   We allow 2 bpp bmp, although it's not
887  *          supported elsewhere.  And we don't support reading
888  *          16 bpp png, although this can be turned on in pngio.c.
889  */
890 l_int32
ioFormatTest(const char * filename)891 ioFormatTest(const char  *filename)
892 {
893 l_int32   d, equal, problems;
894 PIX      *pixs, *pixc, *pixt, *pixt2;
895 PIXCMAP  *cmap;
896 
897     PROCNAME("ioFormatTest");
898 
899     if (!filename)
900         return ERROR_INT("filename not defined", procName, 1);
901 
902     if ((pixs = pixRead(filename)) == NULL)
903         return ERROR_INT("pixs not made", procName, 1);
904 
905         /* Note that the reader automatically removes colormaps
906          * from 1 bpp BMP images, but not from 8 bpp BMP images.
907          * Therefore, if our 8 bpp image initially doesn't have a
908          * colormap, we are going to need to remove it from any
909          * pix read from a BMP file. */
910     pixc = pixClone(pixs);  /* laziness */
911     cmap = pixGetColormap(pixc);  /* colormap; can be NULL */
912     d = pixGetDepth(pixc);
913 
914     problems = FALSE;
915 
916         /* ----------------------- BMP -------------------------- */
917 
918         /* BMP works for 1, 2, 4, 8 and 32 bpp images.
919          * It always writes colormaps for 1 and 8 bpp, so we must
920          * remove it after readback if the input image doesn't have
921          * a colormap.  Although we can write/read 2 bpp BMP, nobody
922          * else can read them! */
923     if (d == 1 || d == 8) {
924         L_INFO("write/read bmp", procName);
925         pixWrite(FILE_BMP, pixc, IFF_BMP);
926         pixt = pixRead(FILE_BMP);
927         if (!cmap)
928             pixt2 = pixRemoveColormap(pixt, REMOVE_CMAP_BASED_ON_SRC);
929         else
930             pixt2 = pixClone(pixt);
931         pixEqual(pixc, pixt2, &equal);
932         if (!equal) {
933             L_INFO("   **** bad bmp image ****", procName);
934             problems = TRUE;
935         }
936         pixDestroy(&pixt);
937         pixDestroy(&pixt2);
938     }
939 
940     if (d == 2 || d == 4 || d == 32) {
941         L_INFO("write/read bmp", procName);
942         pixWrite(FILE_BMP, pixc, IFF_BMP);
943         pixt = pixRead(FILE_BMP);
944         pixEqual(pixc, pixt, &equal);
945         if (!equal) {
946             L_INFO("   **** bad bmp image ****", procName);
947             problems = TRUE;
948         }
949         pixDestroy(&pixt);
950     }
951 
952         /* ----------------------- PNG -------------------------- */
953 
954         /* PNG works for all depths, but here, because we strip
955          * 16 --> 8 bpp on reading, we don't test png for 16 bpp. */
956     if (d != 16) {
957         L_INFO("write/read png", procName);
958         pixWrite(FILE_PNG, pixc, IFF_PNG);
959         pixt = pixRead(FILE_PNG);
960         pixEqual(pixc, pixt, &equal);
961         if (!equal) {
962             L_INFO("   **** bad png image ****", procName);
963             problems = TRUE;
964         }
965         pixDestroy(&pixt);
966     }
967 
968         /* ----------------------- TIFF -------------------------- */
969 
970         /* TIFF works for 1, 2, 4, 8, 16 and 32 bpp images.
971          * Because 8 bpp tiff always writes 256 entry colormaps, the
972          * colormap sizes may be different for 8 bpp images with
973          * colormap; we are testing if the image content is the same.
974          * Likewise, the 2 and 4 bpp tiff images with colormaps
975          * have colormap sizes 4 and 16, rsp.  This test should
976          * work properly on the content, regardless of the number
977          * of color entries in pixc. */
978 
979         /* tiff uncompressed works for all pixel depths */
980     L_INFO("write/read uncompressed tiff", procName);
981     pixWrite(FILE_TIFF, pixc, IFF_TIFF);
982     pixt = pixRead(FILE_TIFF);
983     pixEqual(pixc, pixt, &equal);
984     if (!equal) {
985         L_INFO("   **** bad tiff uncompressed image ****", procName);
986         problems = TRUE;
987     }
988     pixDestroy(&pixt);
989 
990         /* tiff lzw works for all pixel depths */
991     L_INFO("write/read lzw compressed tiff", procName);
992     pixWrite(FILE_LZW, pixc, IFF_TIFF_LZW);
993     pixt = pixRead(FILE_LZW);
994     pixEqual(pixc, pixt, &equal);
995     if (!equal) {
996         L_INFO("   **** bad tiff lzw compressed image ****", procName);
997         problems = TRUE;
998     }
999     pixDestroy(&pixt);
1000 
1001         /* tiff adobe deflate (zip) works for all pixel depths */
1002     L_INFO("write/read zip compressed tiff", procName);
1003     pixWrite(FILE_ZIP, pixc, IFF_TIFF_ZIP);
1004     pixt = pixRead(FILE_ZIP);
1005     pixEqual(pixc, pixt, &equal);
1006     if (!equal) {
1007         L_INFO("   **** bad tiff zip compressed image ****", procName);
1008         problems = TRUE;
1009     }
1010     pixDestroy(&pixt);
1011 
1012         /* tiff g4, g3, rle and packbits work for 1 bpp */
1013     if (d == 1) {
1014         L_INFO("write/read g4 compressed tiff", procName);
1015         pixWrite(FILE_G4, pixc, IFF_TIFF_G4);
1016         pixt = pixRead(FILE_G4);
1017         pixEqual(pixc, pixt, &equal);
1018         if (!equal) {
1019             L_INFO("   **** bad tiff g4 image ****", procName);
1020             problems = TRUE;
1021         }
1022         pixDestroy(&pixt);
1023 
1024         L_INFO("write/read g3 compressed tiff", procName);
1025         pixWrite(FILE_G3, pixc, IFF_TIFF_G3);
1026         pixt = pixRead(FILE_G3);
1027         pixEqual(pixc, pixt, &equal);
1028         if (!equal) {
1029             L_INFO("   **** bad tiff g3 image ****", procName);
1030             problems = TRUE;
1031         }
1032         pixDestroy(&pixt);
1033 
1034         L_INFO("write/read rle compressed tiff", procName);
1035         pixWrite(FILE_RLE, pixc, IFF_TIFF_RLE);
1036         pixt = pixRead(FILE_RLE);
1037         pixEqual(pixc, pixt, &equal);
1038         if (!equal) {
1039             L_INFO("   **** bad tiff rle image ****", procName);
1040             problems = TRUE;
1041         }
1042         pixDestroy(&pixt);
1043 
1044         L_INFO("write/read packbits compressed tiff", procName);
1045         pixWrite(FILE_PB, pixc, IFF_TIFF_PACKBITS);
1046         pixt = pixRead(FILE_PB);
1047         pixEqual(pixc, pixt, &equal);
1048         if (!equal) {
1049             L_INFO("   **** bad tiff packbits image ****", procName);
1050             problems = TRUE;
1051         }
1052         pixDestroy(&pixt);
1053     }
1054 
1055         /* ----------------------- PNM -------------------------- */
1056 
1057         /* pnm works for 1, 2, 4, 8, 16 and 32 bpp.
1058          * pnm doesn't have colormaps, so when we write colormapped
1059          * pix out as pnm, the colormap is removed.  Thus for the test,
1060          * we must remove the colormap from pixc before testing.  */
1061     L_INFO("write/read pnm", procName);
1062     pixWrite(FILE_PNM, pixc, IFF_PNM);
1063     pixt = pixRead(FILE_PNM);
1064     if (cmap)
1065         pixt2 = pixRemoveColormap(pixc, REMOVE_CMAP_BASED_ON_SRC);
1066     else
1067         pixt2 = pixClone(pixc);
1068     pixEqual(pixt, pixt2, &equal);
1069     if (!equal) {
1070         L_INFO("   **** bad pnm image ****", procName);
1071         problems = TRUE;
1072     }
1073     pixDestroy(&pixt);
1074     pixDestroy(&pixt2);
1075 
1076     if (problems == FALSE)
1077         L_INFO("All formats read and written OK!", procName);
1078 
1079     pixDestroy(&pixc);
1080     pixDestroy(&pixs);
1081     return problems;
1082 }
1083 
1084