1 /*====================================================================*
2 - Copyright (C) 2001 Leptonica. All rights reserved.
3 - This software is distributed in the hope that it will be
4 - useful, but with NO WARRANTY OF ANY KIND.
5 - No author or distributor accepts responsibility to anyone for the
6 - consequences of using this software, or for whether it serves any
7 - particular purpose or works at all, unless he or she says so in
8 - writing. Everyone is granted permission to copy, modify and
9 - redistribute this source code, for commercial or non-commercial
10 - purposes, with the following restrictions: (1) the origin of this
11 - source code must not be misrepresented; (2) modified versions must
12 - be plainly marked as such; and (3) this notice may not be removed
13 - or altered from any source or modified source distribution.
14 *====================================================================*/
15
16
17 /*
18 * readfile.c: reads image on file into memory
19 *
20 * Top-level functions for reading images from file
21 * PIXA *pixaReadFiles()
22 * PIXA *pixaReadFilesSA()
23 * PIX *pixRead()
24 * PIX *pixReadWithHint()
25 * PIX *pixReadIndexed()
26 * PIX *pixReadStream()
27 *
28 * Read header information from file
29 * l_int32 pixReadHeader()
30 *
31 * Format finders
32 * l_int32 findFileFormat()
33 * l_int32 findFileFormatBuffer()
34 * l_int32 fileFormatIsTiff()
35 *
36 * Read from memory
37 * PIX *pixReadMem()
38 * l_int32 pixReadHeaderMem()
39 *
40 * Test function for I/O with different formats
41 * l_int32 ioFormatTest()
42 */
43
44 #include <stdio.h>
45 #include <stdlib.h>
46 #include <string.h>
47 #include "allheaders.h"
48
49
50 /* choose type of PIX to be generated */
51 enum {
52 READ_24_BIT_COLOR = 0, /* read in as 24 (really 32) bit pix */
53 CONVERT_TO_PALETTE = 1, /* convert to 8 bit colormapped pix */
54 READ_GRAY = 2 /* read gray only */
55 };
56
57 /* Output files for ioFormatTest().
58 * Note that the test for jpeg is not yet implemented */
59 static const char *FILE_BMP = "/tmp/junkout.bmp";
60 static const char *FILE_PNG = "/tmp/junkout.png";
61 static const char *FILE_PNM = "/tmp/junkout.pnm";
62 static const char *FILE_G3 = "/tmp/junkout_g3.tif";
63 static const char *FILE_G4 = "/tmp/junkout_g4.tif";
64 static const char *FILE_RLE = "/tmp/junkout_rle.tif";
65 static const char *FILE_PB = "/tmp/junkout_packbits.tif";
66 static const char *FILE_LZW = "/tmp/junkout_lzw.tif";
67 static const char *FILE_ZIP = "/tmp/junkout_zip.tif";
68 static const char *FILE_TIFF = "/tmp/junkout.tif";
69 static const char *FILE_JPG = "/tmp/junkout.jpg";
70
71
72 /*---------------------------------------------------------------------*
73 * Top-level functions for reading images from file *
74 *---------------------------------------------------------------------*/
75 /*!
76 * pixaReadFiles()
77 *
78 * Input: dirname
79 * substr (<optional> substring filter on filenames; can be null)
80 * Return: pixa, or null on error
81 *
82 * Notes:
83 * (1) @dirname is the full path for the directory.
84 * (2) @substr is the part of the file name (excluding
85 * the directory) that is to be matched. All matching
86 * filenames are read into the Pixa. If substr is NULL,
87 * all filenames are read into the Pixa.
88 */
89 PIXA *
pixaReadFiles(const char * dirname,const char * substr)90 pixaReadFiles(const char *dirname,
91 const char *substr)
92 {
93 PIXA *pixa;
94 SARRAY *sa;
95
96 PROCNAME("pixaReadFiles");
97
98 if (!dirname)
99 return (PIXA *)ERROR_PTR("dirname not defined", procName, NULL);
100
101 if ((sa = getSortedPathnamesInDirectory(dirname, substr, 0, 0)) == NULL)
102 return (PIXA *)ERROR_PTR("sa not made", procName, NULL);
103
104 pixa = pixaReadFilesSA(sa);
105 sarrayDestroy(&sa);
106 return pixa;
107 }
108
109
110 /*!
111 * pixaReadFilesSA()
112 *
113 * Input: sarray (full pathnames for all files)
114 * Return: pixa, or null on error
115 */
116 PIXA *
pixaReadFilesSA(SARRAY * sa)117 pixaReadFilesSA(SARRAY *sa)
118 {
119 char *str;
120 l_int32 i, n;
121 PIX *pix;
122 PIXA *pixa;
123
124 PROCNAME("pixaReadFilesSA");
125
126 if (!sa)
127 return (PIXA *)ERROR_PTR("sa not defined", procName, NULL);
128
129 n = sarrayGetCount(sa);
130 pixa = pixaCreate(n);
131 for (i = 0; i < n; i++) {
132 str = sarrayGetString(sa, i, L_NOCOPY);
133 if ((pix = pixRead(str)) == NULL) {
134 L_WARNING_STRING("pix not read from file %s", procName, str);
135 continue;
136 }
137 pixaAddPix(pixa, pix, L_INSERT);
138 }
139
140 return pixa;
141 }
142
143
144 /*!
145 * pixRead()
146 *
147 * Input: filename (with full pathname or in local directory)
148 * Return: pix if OK; null on error
149 */
150 PIX *
pixRead(const char * filename)151 pixRead(const char *filename)
152 {
153 FILE *fp;
154 PIX *pix;
155
156 PROCNAME("pixRead");
157
158 if (!filename)
159 return (PIX *)ERROR_PTR("filename not defined", procName, NULL);
160
161 if ((fp = fopenReadStream(filename)) == NULL)
162 return (PIX *)ERROR_PTR("image file not found", procName, NULL);
163 pix = pixReadStream(fp, 0);
164 if (pixGetInputFormat(pix) != IFF_GIF) /* DGifCloseFile() closes stream! */
165 fclose(fp);
166
167 if (!pix)
168 return (PIX *)ERROR_PTR("image not returned", procName, NULL);
169 return pix;
170 }
171
172
173 /*!
174 * pixReadWithHint()
175 *
176 * Input: filename (with full pathname or in local directory)
177 * hint (bitwise OR of L_HINT_* values for jpeg; use 0 for no hint)
178 * Return: pix if OK; null on error
179 *
180 * Notes:
181 * (1) The hint is not binding, but may be used to optimize jpeg decoding.
182 * Use 0 for no hinting.
183 */
184 PIX *
pixReadWithHint(const char * filename,l_int32 hint)185 pixReadWithHint(const char *filename,
186 l_int32 hint)
187 {
188 FILE *fp;
189 PIX *pix;
190
191 PROCNAME("pixReadWithHint");
192
193 if (!filename)
194 return (PIX *)ERROR_PTR("filename not defined", procName, NULL);
195
196 if ((fp = fopenReadStream(filename)) == NULL)
197 return (PIX *)ERROR_PTR("image file not found", procName, NULL);
198 pix = pixReadStream(fp, hint);
199 fclose(fp);
200
201 if (!pix)
202 return (PIX *)ERROR_PTR("image not returned", procName, NULL);
203 return pix;
204 }
205
206
207 /*!
208 * pixReadIndexed()
209 *
210 * Input: sarray (of full pathnames)
211 * index (into pathname array)
212 * Return: pix if OK; null if not found
213 *
214 * Notes:
215 * (1) This function is useful for selecting image files from a
216 * directory, where the integer @index is embedded into
217 * the file name.
218 * (2) This is typically done by generating the sarray using
219 * getNumberedPathnamesInDirectory(), so that the @index
220 * pathname would have the number @index in it. The size
221 * of the sarray should be the largest number (plus 1) appearing
222 * in the file names, respecting the constraints in the
223 * call to getNumberedPathnamesInDirectory().
224 * (3) Consequently, for some indices into the sarray, there may
225 * be no pathnames in the directory containing that number.
226 * By convention, we place empty C strings ("") in those
227 * locations in the sarray, and it is not an error if such
228 * a string is encountered and no pix is returned.
229 * Therefore, the caller must verify that a pix is returned.
230 * (4) See convertSegmentedPagesToPS() in src/psio1.c for an
231 * example of usage.
232 */
233 PIX *
pixReadIndexed(SARRAY * sa,l_int32 index)234 pixReadIndexed(SARRAY *sa,
235 l_int32 index)
236 {
237 char *fname;
238 l_int32 n;
239 PIX *pix;
240
241 PROCNAME("pixReadIndexed");
242
243 if (!sa)
244 return (PIX *)ERROR_PTR("sa not defined", procName, NULL);
245 n = sarrayGetCount(sa);
246 if (index < 0 || index >= n)
247 return (PIX *)ERROR_PTR("index out of bounds", procName, NULL);
248
249 fname = sarrayGetString(sa, index, L_NOCOPY);
250 if (fname[0] == '\0')
251 return NULL;
252
253 if ((pix = pixRead(fname)) == NULL) {
254 L_ERROR_STRING("pix not read from file %s", procName, fname);
255 return NULL;
256 }
257
258 return pix;
259 }
260
261
262 /*!
263 * pixReadStream()
264 *
265 * Input: fp (file stream)
266 * hint (bitwise OR of L_HINT_* values for jpeg; use 0 for no hint)
267 * Return: pix if OK; null on error
268 *
269 * Notes:
270 * (1) The hint only applies to jpeg.
271 */
272 PIX *
pixReadStream(FILE * fp,l_int32 hint)273 pixReadStream(FILE *fp,
274 l_int32 hint)
275 {
276 l_int32 format;
277 PIX *pix;
278
279 PROCNAME("pixReadStream");
280
281 if (!fp)
282 return (PIX *)ERROR_PTR("stream not defined", procName, NULL);
283 pix = NULL;
284
285 findFileFormat(fp, &format);
286 switch (format)
287 {
288 case IFF_BMP:
289 if ((pix = pixReadStreamBmp(fp)) == NULL )
290 return (PIX *)ERROR_PTR( "bmp: no pix returned", procName, NULL);
291 break;
292
293 case IFF_JFIF_JPEG:
294 if ((pix = pixReadStreamJpeg(fp, READ_24_BIT_COLOR, 1, NULL, hint))
295 == NULL)
296 return (PIX *)ERROR_PTR( "jpeg: no pix returned", procName, NULL);
297 break;
298
299 case IFF_PNG:
300 if ((pix = pixReadStreamPng(fp)) == NULL)
301 return (PIX *)ERROR_PTR("png: no pix returned", procName, NULL);
302 break;
303
304 case IFF_TIFF:
305 case IFF_TIFF_PACKBITS:
306 case IFF_TIFF_RLE:
307 case IFF_TIFF_G3:
308 case IFF_TIFF_G4:
309 case IFF_TIFF_LZW:
310 case IFF_TIFF_ZIP:
311 if ((pix = pixReadStreamTiff(fp, 0)) == NULL) /* page 0 by default */
312 return (PIX *)ERROR_PTR("tiff: no pix returned", procName, NULL);
313 break;
314
315 case IFF_PNM:
316 if ((pix = pixReadStreamPnm(fp)) == NULL)
317 return (PIX *)ERROR_PTR("pnm: no pix returned", procName, NULL);
318 break;
319
320 case IFF_GIF:
321 if ((pix = pixReadStreamGif(fp)) == NULL)
322 return (PIX *)ERROR_PTR("gif: no pix returned", procName, NULL);
323 break;
324
325 case IFF_JP2:
326 return (PIX *)ERROR_PTR("jp2: format not supported", procName, NULL);
327 break;
328
329 case IFF_UNKNOWN:
330 return (PIX *)ERROR_PTR( "Unknown format: no pix returned",
331 procName, NULL);
332 break;
333 }
334
335 if (pix)
336 pixSetInputFormat(pix, format);
337 return pix;
338 }
339
340
341
342 /*---------------------------------------------------------------------*
343 * Read header information from file *
344 *---------------------------------------------------------------------*/
345 /*!
346 * pixReadHeader()
347 *
348 * Input: filename (with full pathname or in local directory)
349 * &format (<optional return> file format)
350 * &w, &h (<optional returns> width and height)
351 * &bps <optional return> bits/sample
352 * &spp <optional return> samples/pixel (1, 3 or 4)
353 * &iscmap (<optional return> 1 if cmap exists; 0 otherwise)
354 * Return: 0 if OK, 1 on error
355 *
356 * Notes:
357 * (1) This reads the actual headers for jpeg, png, tiff and pnm.
358 * For bmp and gif, we cheat and read the entire file into a pix,
359 * from which we extract the "header" information.
360 */
361 l_int32
pixReadHeader(const char * filename,l_int32 * pformat,l_int32 * pw,l_int32 * ph,l_int32 * pbps,l_int32 * pspp,l_int32 * piscmap)362 pixReadHeader(const char *filename,
363 l_int32 *pformat,
364 l_int32 *pw,
365 l_int32 *ph,
366 l_int32 *pbps,
367 l_int32 *pspp,
368 l_int32 *piscmap)
369 {
370 l_int32 size, format, ret, w, h, d, bps, spp, iscmap;
371 l_int32 type; /* ignored */
372 l_uint8 *data;
373 FILE *fp;
374 PIX *pix;
375
376 PROCNAME("pixReadHeader");
377
378 if (pw) *pw = 0;
379 if (ph) *ph = 0;
380 if (pbps) *pbps = 0;
381 if (pspp) *pspp = 0;
382 if (piscmap) *piscmap = 0;
383 if (pformat) *pformat = 0;
384 iscmap = 0; /* init to false */
385 if (!filename)
386 return ERROR_INT("filename not defined", procName, 1);
387
388 if ((fp = fopenReadStream(filename)) == NULL)
389 return ERROR_INT("image file not found", procName, 1);
390 findFileFormat(fp, &format);
391 fclose(fp);
392
393 switch (format)
394 {
395 case IFF_BMP: /* cheating: reading the entire file */
396 if ((pix = pixRead(filename)) == NULL)
397 return ERROR_INT( "bmp: pix not read", procName, 1);
398 pixGetDimensions(pix, &w, &h, &d);
399 pixDestroy(&pix);
400 bps = (d == 32) ? 8 : d;
401 spp = (d == 32) ? 3 : 1;
402 break;
403
404 case IFF_JFIF_JPEG:
405 ret = extractJpegDataFromFile(filename, &data, &size, &w, &h,
406 &bps, &spp);
407 if (ret)
408 return ERROR_INT( "jpeg: no header info returned", procName, 1);
409 FREE(data);
410 break;
411
412 case IFF_PNG:
413 ret = readHeaderPng(filename, &w, &h, &bps, &spp, &iscmap);
414 if (ret)
415 return ERROR_INT( "png: no header info returned", procName, 1);
416 break;
417
418 case IFF_TIFF:
419 case IFF_TIFF_PACKBITS:
420 case IFF_TIFF_RLE:
421 case IFF_TIFF_G3:
422 case IFF_TIFF_G4:
423 case IFF_TIFF_LZW:
424 case IFF_TIFF_ZIP:
425 /* Reading page 0 by default; possibly redefine format */
426 ret = readHeaderTiff(filename, 0, &w, &h, &bps, &spp, NULL, &iscmap,
427 &format);
428 if (ret)
429 return ERROR_INT( "tiff: no header info returned", procName, 1);
430 break;
431
432 case IFF_PNM:
433 if ((fp = fopenReadStream(filename)) == NULL)
434 return ERROR_INT("file stream not opened", procName, 1);
435 ret = freadHeaderPnm(fp, NULL, &w, &h, &d, &type, &bps, &spp);
436 fclose(fp);
437 if (ret)
438 return ERROR_INT( "pnm: no header info returned", procName, 1);
439 break;
440
441 case IFF_GIF: /* cheating: reading the entire file */
442 if ((pix = pixRead(filename)) == NULL)
443 return ERROR_INT( "gif: pix not read", procName, 1);
444 pixGetDimensions(pix, &w, &h, &d);
445 pixDestroy(&pix);
446 iscmap = 1; /* always colormapped; max 256 colors */
447 spp = 1;
448 bps = d;
449 break;
450
451 case IFF_JP2:
452 return ERROR_INT("jp2: format not supported", procName, 1);
453 break;
454
455 case IFF_UNKNOWN:
456 L_ERROR_STRING("unknown format in file %s", procName, filename);
457 return 1;
458 break;
459 }
460
461 if (pw) *pw = w;
462 if (ph) *ph = h;
463 if (pbps) *pbps = bps;
464 if (pspp) *pspp = spp;
465 if (piscmap) *piscmap = iscmap;
466 if (pformat) *pformat = format;
467 return 0;
468 }
469
470
471 /*---------------------------------------------------------------------*
472 * Format finders *
473 *---------------------------------------------------------------------*/
474 /*!
475 * findFileFormat()
476 *
477 * Input: fp (file stream)
478 * &format (<return>)
479 * Return: 0 if OK, 1 on error or if format is not recognized
480 *
481 * Notes:
482 * (1) Important: Side effect -- this resets fp to BOF.
483 */
484 l_int32
findFileFormat(FILE * fp,l_int32 * pformat)485 findFileFormat(FILE *fp,
486 l_int32 *pformat)
487 {
488 l_uint8 firstbytes[8];
489 l_int32 format;
490
491 PROCNAME("findFileFormat");
492
493 if (!pformat)
494 return ERROR_INT("&format not defined", procName, 1);
495 *pformat = 0;
496 if (!fp)
497 return ERROR_INT("stream not defined", procName, 1);
498
499 rewind(fp);
500 if (fnbytesInFile(fp) < 8)
501 return ERROR_INT("truncated file", procName, 1);
502
503 if (fread((char *)&firstbytes, 1, 8, fp) != 8)
504 return ERROR_INT("failed to read first 8 bytes of file", procName, 1);
505 rewind(fp);
506
507 findFileFormatBuffer(firstbytes, &format);
508 if (format == IFF_TIFF) {
509 findTiffCompression(fp, &format);
510 rewind(fp);
511 }
512 *pformat = format;
513 if (format == IFF_UNKNOWN)
514 return 1;
515 else
516 return 0;
517 }
518
519
520 /*!
521 * findFileFormatBuffer()
522 *
523 * Input: byte buffer (at least 8 bytes in size; we can't check)
524 * &format (<return>)
525 * Return: 0 if OK, 1 on error or if format is not recognized
526 *
527 * Notes:
528 * (1) This determines the file format from the first 8 bytes in
529 * the compressed data stream, which are stored in memory.
530 * (2) For tiff files, this returns IFF_TIFF. The specific tiff
531 * compression is then determined using findTiffCompression().
532 */
533 l_int32
findFileFormatBuffer(const l_uint8 * buf,l_int32 * pformat)534 findFileFormatBuffer(const l_uint8 *buf,
535 l_int32 *pformat)
536 {
537 l_uint16 twobytepw;
538
539 PROCNAME("findFileFormatBuffer");
540
541 if (!pformat)
542 return ERROR_INT("&format not defined", procName, 1);
543 *pformat = IFF_UNKNOWN;
544 if (!buf)
545 return ERROR_INT("byte buffer not defined", procName, 0);
546
547 /* Check the bmp and tiff 2-byte header ids */
548 ((char *)(&twobytepw))[0] = buf[0];
549 ((char *)(&twobytepw))[1] = buf[1];
550
551 if (convertOnBigEnd16(twobytepw) == BMP_ID) {
552 *pformat = IFF_BMP;
553 return 0;
554 }
555
556 if (twobytepw == TIFF_BIGEND_ID || twobytepw == TIFF_LITTLEEND_ID) {
557 *pformat = IFF_TIFF;
558 return 0;
559 }
560
561 /* Check for the p*m 2-byte header ids */
562 if ((buf[0] == 'P' && buf[1] == '4') || /* newer packed */
563 (buf[0] == 'P' && buf[1] == '1')) { /* old format */
564 *pformat = IFF_PNM;
565 return 0;
566 }
567
568 if ((buf[0] == 'P' && buf[1] == '5') || /* newer */
569 (buf[0] == 'P' && buf[1] == '2')) { /* old */
570 *pformat = IFF_PNM;
571 return 0;
572 }
573
574 if ((buf[0] == 'P' && buf[1] == '6') || /* newer */
575 (buf[0] == 'P' && buf[1] == '3')) { /* old */
576 *pformat = IFF_PNM;
577 return 0;
578 }
579
580 /* Consider the first 11 bytes of the standard JFIF JPEG header:
581 * - The first two bytes are the most important: 0xffd8.
582 * - The next two bytes are the jfif marker: 0xffe0.
583 * Not all jpeg files have this marker.
584 * - The next two bytes are the header length.
585 * - The next 5 bytes are a null-terminated string.
586 * For JFIF, the string is "JFIF", naturally. For others it
587 * can be "Exif" or just about anything else.
588 * - Because of all this variability, we only check the first
589 * two byte marker. All jpeg files are identified as
590 * IFF_JFIF_JPEG. */
591 if (buf[0] == 0xff && buf[1] == 0xd8) {
592 *pformat = IFF_JFIF_JPEG;
593 return 0;
594 }
595
596 /* Check for the 8 byte PNG signature (png_signature in png.c):
597 * {137, 80, 78, 71, 13, 10, 26, 10} */
598 if (buf[0] == 137 && buf[1] == 80 && buf[2] == 78 && buf[3] == 71 &&
599 buf[4] == 13 && buf[5] == 10 && buf[6] == 26 && buf[7] == 10) {
600 *pformat = IFF_PNG;
601 return 0;
602 }
603
604 /* Look for "GIF87a" or "GIF89a" */
605 if (buf[0] == 'G' && buf[1] == 'I' && buf[2] == 'F' && buf[3] == '8' &&
606 (buf[4] == '7' || buf[4] == '9') && buf[5] == 'a') {
607 *pformat = IFF_GIF;
608 return 0;
609 }
610
611 if (buf[0] == 0xff && buf[1] == 0x4f && buf[2] == 0xff && buf[3] == 0x51) {
612 *pformat = IFF_JP2;
613 return 0;
614 }
615
616 /* File format identifier not found; unknown */
617 return 1;
618 }
619
620
621 /*!
622 * fileFormatIsTiff()
623 *
624 * Input: fp (file stream)
625 * Return: 1 if file is tiff; 0 otherwise or on error
626 */
627 l_int32
fileFormatIsTiff(FILE * fp)628 fileFormatIsTiff(FILE *fp)
629 {
630 l_int32 format;
631
632 PROCNAME("fileFormatIsTiff");
633
634 if (!fp)
635 return ERROR_INT("stream not defined", procName, 0);
636
637 findFileFormat(fp, &format);
638 if (format == IFF_TIFF || format == IFF_TIFF_PACKBITS ||
639 format == IFF_TIFF_RLE || format == IFF_TIFF_G3 ||
640 format == IFF_TIFF_G4 || format == IFF_TIFF_LZW ||
641 format == IFF_TIFF_ZIP)
642 return 1;
643 else
644 return 0;
645 }
646
647
648 /*---------------------------------------------------------------------*
649 * Read from memory *
650 *---------------------------------------------------------------------*/
651 /*!
652 * pixReadMem()
653 *
654 * Input: data (const; encoded)
655 * datasize (size of data)
656 * Return: pix, or null on error
657 *
658 * Notes:
659 * (1) This is a variation of pixReadStream(), where the data is read
660 * from a memory buffer rather than a file.
661 * (2) On windows, this will only read tiff formatted files from
662 * memory. For other formats, it requires fmemopen(3).
663 * Attempts to read those formats will fail at runtime.
664 * (3) findFileFormatBuffer() requires up to 8 bytes to decide on
665 * the format. That determines the constraint here.
666 */
667 PIX *
pixReadMem(const l_uint8 * data,size_t size)668 pixReadMem(const l_uint8 *data,
669 size_t size)
670 {
671 l_int32 format;
672 PIX *pix;
673
674 PROCNAME("pixReadMem");
675
676 if (!data)
677 return (PIX *)ERROR_PTR("data not defined", procName, NULL);
678 if (size < 8)
679 return (PIX *)ERROR_PTR("size < 8", procName, NULL);
680 pix = NULL;
681
682 findFileFormatBuffer(data, &format);
683 switch (format)
684 {
685 case IFF_BMP:
686 if ((pix = pixReadMemBmp(data, size)) == NULL )
687 return (PIX *)ERROR_PTR( "bmp: no pix returned", procName, NULL);
688 break;
689
690 case IFF_JFIF_JPEG:
691 if ((pix = pixReadMemJpeg(data, size, READ_24_BIT_COLOR, 1, NULL, 0))
692 == NULL)
693 return (PIX *)ERROR_PTR( "jpeg: no pix returned", procName, NULL);
694 break;
695
696 case IFF_PNG:
697 if ((pix = pixReadMemPng(data, size)) == NULL)
698 return (PIX *)ERROR_PTR("png: no pix returned", procName, NULL);
699 break;
700
701 case IFF_TIFF:
702 case IFF_TIFF_PACKBITS:
703 case IFF_TIFF_RLE:
704 case IFF_TIFF_G3:
705 case IFF_TIFF_G4:
706 case IFF_TIFF_LZW:
707 case IFF_TIFF_ZIP:
708 /* Reading page 0 by default */
709 if ((pix = pixReadMemTiff(data, size, 0)) == NULL)
710 return (PIX *)ERROR_PTR("tiff: no pix returned", procName, NULL);
711 break;
712
713 case IFF_PNM:
714 if ((pix = pixReadMemPnm(data, size)) == NULL)
715 return (PIX *)ERROR_PTR("pnm: no pix returned", procName, NULL);
716 break;
717
718 case IFF_GIF:
719 if ((pix = pixReadMemGif(data, size)) == NULL)
720 return (PIX *)ERROR_PTR("gif: no pix returned", procName, NULL);
721 break;
722
723 case IFF_JP2:
724 return (PIX *)ERROR_PTR("jp2: format not supported", procName, NULL);
725 break;
726
727 case IFF_UNKNOWN:
728 return (PIX *)ERROR_PTR("Unknown format: no pix returned",
729 procName, NULL);
730 break;
731 }
732
733 /* Set the input format. For tiff reading from memory we lose
734 * the actual input format; for 1 bpp, default to G4. */
735 if (pix) {
736 if (format == IFF_TIFF && pixGetDepth(pix) == 1)
737 format = IFF_TIFF_G4;
738 pixSetInputFormat(pix, format);
739 }
740
741 return pix;
742 }
743
744
745 /*!
746 * pixReadHeaderMem()
747 *
748 * Input: data (const; encoded)
749 * datasize (size of data)
750 * &format (<optional returns> image format)
751 * &w, &h (<optional returns> width and height)
752 * &bps <optional return> bits/sample
753 * &spp <optional return> samples/pixel (1, 3 or 4)
754 * &iscmap (<optional return> 1 if cmap exists; 0 otherwise)
755 * Return: 0 if OK, 1 on error
756 *
757 * Notes:
758 * (1) This reads the actual headers for jpeg, png, tiff and pnm.
759 * For bmp and gif, we cheat and read all the data into a pix,
760 * from which we extract the "header" information.
761 * (2) On windows, this will only read tiff formatted files from
762 * memory. For other formats, it requires fmemopen(3).
763 * Attempts to read those formats will fail at runtime.
764 * (3) findFileFormatBuffer() requires up to 8 bytes to decide on
765 * the format. That determines the constraint here.
766 */
767 l_int32
pixReadHeaderMem(const l_uint8 * data,size_t size,l_int32 * pformat,l_int32 * pw,l_int32 * ph,l_int32 * pbps,l_int32 * pspp,l_int32 * piscmap)768 pixReadHeaderMem(const l_uint8 *data,
769 size_t size,
770 l_int32 *pformat,
771 l_int32 *pw,
772 l_int32 *ph,
773 l_int32 *pbps,
774 l_int32 *pspp,
775 l_int32 *piscmap)
776 {
777 l_int32 format, ret, w, h, d, bps, spp, iscmap;
778 l_int32 type; /* not used */
779 PIX *pix;
780
781 PROCNAME("pixReadHeaderMem");
782
783 if (pw) *pw = 0;
784 if (ph) *ph = 0;
785 if (pbps) *pbps = 0;
786 if (pspp) *pspp = 0;
787 if (piscmap) *piscmap = 0;
788 if (pformat) *pformat = 0;
789 iscmap = 0; /* init to false */
790 if (!data)
791 return ERROR_INT("data not defined", procName, 1);
792 if (size < 8)
793 return ERROR_INT("size < 8", procName, 1);
794
795 findFileFormatBuffer(data, &format);
796
797 switch (format)
798 {
799 case IFF_BMP: /* cheating: read the pix */
800 if ((pix = pixReadMemBmp(data, size)) == NULL)
801 return ERROR_INT( "bmp: pix not read", procName, 1);
802 pixGetDimensions(pix, &w, &h, &d);
803 pixDestroy(&pix);
804 bps = (d == 32) ? 8 : d;
805 spp = (d == 32) ? 3 : 1;
806 break;
807
808 case IFF_JFIF_JPEG:
809 ret = extractJpegDataFromArray(data, (l_int32)size, &w, &h, &bps, &spp);
810 if (ret)
811 return ERROR_INT( "jpeg: no header info returned", procName, 1);
812 break;
813
814 case IFF_PNG:
815 ret = sreadHeaderPng(data, &w, &h, &bps, &spp, &iscmap);
816 if (ret)
817 return ERROR_INT( "png: no header info returned", procName, 1);
818 break;
819
820 case IFF_TIFF:
821 case IFF_TIFF_PACKBITS:
822 case IFF_TIFF_RLE:
823 case IFF_TIFF_G3:
824 case IFF_TIFF_G4:
825 case IFF_TIFF_LZW:
826 case IFF_TIFF_ZIP:
827 /* Reading page 0 by default; possibly redefine format */
828 ret = readHeaderMemTiff(data, size, 0, &w, &h, &bps, &spp,
829 NULL, &iscmap, &format);
830 if (ret)
831 return ERROR_INT( "tiff: no header info returned", procName, 1);
832 break;
833
834 case IFF_PNM:
835 ret = sreadHeaderPnm(data, size, &w, &h, &d, &type, &bps, &spp);
836 if (ret)
837 return ERROR_INT( "pnm: no header info returned", procName, 1);
838 break;
839
840 case IFF_GIF: /* cheating: read the pix */
841 if ((pix = pixReadMemGif(data, size)) == NULL)
842 return ERROR_INT( "gif: pix not read", procName, 1);
843 pixGetDimensions(pix, &w, &h, &d);
844 pixDestroy(&pix);
845 iscmap = 1; /* always colormapped; max 256 colors */
846 spp = 1;
847 bps = d;
848 break;
849
850 case IFF_JP2:
851 return ERROR_INT("jp2: format not supported", procName, 1);
852 break;
853
854 case IFF_UNKNOWN:
855 return ERROR_INT("unknown format; no data returned", procName, 1);
856 break;
857 }
858
859 if (pw) *pw = w;
860 if (ph) *ph = h;
861 if (pbps) *pbps = bps;
862 if (pspp) *pspp = spp;
863 if (piscmap) *piscmap = iscmap;
864 if (pformat) *pformat = format;
865 return 0;
866 }
867
868
869 /*---------------------------------------------------------------------*
870 * Test function for I/O with different formats *
871 *---------------------------------------------------------------------*/
872 /*!
873 * ioFormatTest()
874 *
875 * Input: filename (input file)
876 * Return: 0 if OK; 1 on error or if the test fails
877 *
878 * Notes:
879 * (1) This writes and reads a set of output files losslessly
880 * in different formats to /tmp, and tests that the
881 * result before and after is unchanged.
882 * (2) This should work properly on input images of any depth,
883 * with and without colormaps.
884 * (3) All supported formats are tested for bmp, png, tiff and
885 * non-ascii pnm. Ascii pnm also works (but who'd ever want
886 * to use it?) We allow 2 bpp bmp, although it's not
887 * supported elsewhere. And we don't support reading
888 * 16 bpp png, although this can be turned on in pngio.c.
889 */
890 l_int32
ioFormatTest(const char * filename)891 ioFormatTest(const char *filename)
892 {
893 l_int32 d, equal, problems;
894 PIX *pixs, *pixc, *pixt, *pixt2;
895 PIXCMAP *cmap;
896
897 PROCNAME("ioFormatTest");
898
899 if (!filename)
900 return ERROR_INT("filename not defined", procName, 1);
901
902 if ((pixs = pixRead(filename)) == NULL)
903 return ERROR_INT("pixs not made", procName, 1);
904
905 /* Note that the reader automatically removes colormaps
906 * from 1 bpp BMP images, but not from 8 bpp BMP images.
907 * Therefore, if our 8 bpp image initially doesn't have a
908 * colormap, we are going to need to remove it from any
909 * pix read from a BMP file. */
910 pixc = pixClone(pixs); /* laziness */
911 cmap = pixGetColormap(pixc); /* colormap; can be NULL */
912 d = pixGetDepth(pixc);
913
914 problems = FALSE;
915
916 /* ----------------------- BMP -------------------------- */
917
918 /* BMP works for 1, 2, 4, 8 and 32 bpp images.
919 * It always writes colormaps for 1 and 8 bpp, so we must
920 * remove it after readback if the input image doesn't have
921 * a colormap. Although we can write/read 2 bpp BMP, nobody
922 * else can read them! */
923 if (d == 1 || d == 8) {
924 L_INFO("write/read bmp", procName);
925 pixWrite(FILE_BMP, pixc, IFF_BMP);
926 pixt = pixRead(FILE_BMP);
927 if (!cmap)
928 pixt2 = pixRemoveColormap(pixt, REMOVE_CMAP_BASED_ON_SRC);
929 else
930 pixt2 = pixClone(pixt);
931 pixEqual(pixc, pixt2, &equal);
932 if (!equal) {
933 L_INFO(" **** bad bmp image ****", procName);
934 problems = TRUE;
935 }
936 pixDestroy(&pixt);
937 pixDestroy(&pixt2);
938 }
939
940 if (d == 2 || d == 4 || d == 32) {
941 L_INFO("write/read bmp", procName);
942 pixWrite(FILE_BMP, pixc, IFF_BMP);
943 pixt = pixRead(FILE_BMP);
944 pixEqual(pixc, pixt, &equal);
945 if (!equal) {
946 L_INFO(" **** bad bmp image ****", procName);
947 problems = TRUE;
948 }
949 pixDestroy(&pixt);
950 }
951
952 /* ----------------------- PNG -------------------------- */
953
954 /* PNG works for all depths, but here, because we strip
955 * 16 --> 8 bpp on reading, we don't test png for 16 bpp. */
956 if (d != 16) {
957 L_INFO("write/read png", procName);
958 pixWrite(FILE_PNG, pixc, IFF_PNG);
959 pixt = pixRead(FILE_PNG);
960 pixEqual(pixc, pixt, &equal);
961 if (!equal) {
962 L_INFO(" **** bad png image ****", procName);
963 problems = TRUE;
964 }
965 pixDestroy(&pixt);
966 }
967
968 /* ----------------------- TIFF -------------------------- */
969
970 /* TIFF works for 1, 2, 4, 8, 16 and 32 bpp images.
971 * Because 8 bpp tiff always writes 256 entry colormaps, the
972 * colormap sizes may be different for 8 bpp images with
973 * colormap; we are testing if the image content is the same.
974 * Likewise, the 2 and 4 bpp tiff images with colormaps
975 * have colormap sizes 4 and 16, rsp. This test should
976 * work properly on the content, regardless of the number
977 * of color entries in pixc. */
978
979 /* tiff uncompressed works for all pixel depths */
980 L_INFO("write/read uncompressed tiff", procName);
981 pixWrite(FILE_TIFF, pixc, IFF_TIFF);
982 pixt = pixRead(FILE_TIFF);
983 pixEqual(pixc, pixt, &equal);
984 if (!equal) {
985 L_INFO(" **** bad tiff uncompressed image ****", procName);
986 problems = TRUE;
987 }
988 pixDestroy(&pixt);
989
990 /* tiff lzw works for all pixel depths */
991 L_INFO("write/read lzw compressed tiff", procName);
992 pixWrite(FILE_LZW, pixc, IFF_TIFF_LZW);
993 pixt = pixRead(FILE_LZW);
994 pixEqual(pixc, pixt, &equal);
995 if (!equal) {
996 L_INFO(" **** bad tiff lzw compressed image ****", procName);
997 problems = TRUE;
998 }
999 pixDestroy(&pixt);
1000
1001 /* tiff adobe deflate (zip) works for all pixel depths */
1002 L_INFO("write/read zip compressed tiff", procName);
1003 pixWrite(FILE_ZIP, pixc, IFF_TIFF_ZIP);
1004 pixt = pixRead(FILE_ZIP);
1005 pixEqual(pixc, pixt, &equal);
1006 if (!equal) {
1007 L_INFO(" **** bad tiff zip compressed image ****", procName);
1008 problems = TRUE;
1009 }
1010 pixDestroy(&pixt);
1011
1012 /* tiff g4, g3, rle and packbits work for 1 bpp */
1013 if (d == 1) {
1014 L_INFO("write/read g4 compressed tiff", procName);
1015 pixWrite(FILE_G4, pixc, IFF_TIFF_G4);
1016 pixt = pixRead(FILE_G4);
1017 pixEqual(pixc, pixt, &equal);
1018 if (!equal) {
1019 L_INFO(" **** bad tiff g4 image ****", procName);
1020 problems = TRUE;
1021 }
1022 pixDestroy(&pixt);
1023
1024 L_INFO("write/read g3 compressed tiff", procName);
1025 pixWrite(FILE_G3, pixc, IFF_TIFF_G3);
1026 pixt = pixRead(FILE_G3);
1027 pixEqual(pixc, pixt, &equal);
1028 if (!equal) {
1029 L_INFO(" **** bad tiff g3 image ****", procName);
1030 problems = TRUE;
1031 }
1032 pixDestroy(&pixt);
1033
1034 L_INFO("write/read rle compressed tiff", procName);
1035 pixWrite(FILE_RLE, pixc, IFF_TIFF_RLE);
1036 pixt = pixRead(FILE_RLE);
1037 pixEqual(pixc, pixt, &equal);
1038 if (!equal) {
1039 L_INFO(" **** bad tiff rle image ****", procName);
1040 problems = TRUE;
1041 }
1042 pixDestroy(&pixt);
1043
1044 L_INFO("write/read packbits compressed tiff", procName);
1045 pixWrite(FILE_PB, pixc, IFF_TIFF_PACKBITS);
1046 pixt = pixRead(FILE_PB);
1047 pixEqual(pixc, pixt, &equal);
1048 if (!equal) {
1049 L_INFO(" **** bad tiff packbits image ****", procName);
1050 problems = TRUE;
1051 }
1052 pixDestroy(&pixt);
1053 }
1054
1055 /* ----------------------- PNM -------------------------- */
1056
1057 /* pnm works for 1, 2, 4, 8, 16 and 32 bpp.
1058 * pnm doesn't have colormaps, so when we write colormapped
1059 * pix out as pnm, the colormap is removed. Thus for the test,
1060 * we must remove the colormap from pixc before testing. */
1061 L_INFO("write/read pnm", procName);
1062 pixWrite(FILE_PNM, pixc, IFF_PNM);
1063 pixt = pixRead(FILE_PNM);
1064 if (cmap)
1065 pixt2 = pixRemoveColormap(pixc, REMOVE_CMAP_BASED_ON_SRC);
1066 else
1067 pixt2 = pixClone(pixc);
1068 pixEqual(pixt, pixt2, &equal);
1069 if (!equal) {
1070 L_INFO(" **** bad pnm image ****", procName);
1071 problems = TRUE;
1072 }
1073 pixDestroy(&pixt);
1074 pixDestroy(&pixt2);
1075
1076 if (problems == FALSE)
1077 L_INFO("All formats read and written OK!", procName);
1078
1079 pixDestroy(&pixc);
1080 pixDestroy(&pixs);
1081 return problems;
1082 }
1083
1084