1 /*====================================================================*
2 - Copyright (C) 2001 Leptonica. All rights reserved.
3 -
4 - Redistribution and use in source and binary forms, with or without
5 - modification, are permitted provided that the following conditions
6 - are met:
7 - 1. Redistributions of source code must retain the above copyright
8 - notice, this list of conditions and the following disclaimer.
9 - 2. Redistributions in binary form must reproduce the above
10 - copyright notice, this list of conditions and the following
11 - disclaimer in the documentation and/or other materials
12 - provided with the distribution.
13 -
14 - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15 - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16 - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17 - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY
18 - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23 - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 *====================================================================*/
26
27 /*!
28 * \file psio1.c
29 * <pre>
30 *
31 * |=============================================================|
32 * | Important note |
33 * |=============================================================|
34 * | Some of these functions require libtiff, libjpeg and libz. |
35 * | If you do not have these libraries, you must set |
36 * | #define USE_PSIO 0 |
37 * | in environ.h. This will link psio1stub.c |
38 * |=============================================================|
39 *
40 * This is a PostScript "device driver" for wrapping images
41 * in PostScript. The images can be rendered by a PostScript
42 * interpreter for viewing, using evince or gv. They can also be
43 * rasterized for printing, using gs or an embedded interpreter
44 * in a PostScript printer. And they can be converted to a pdf
45 * using gs (ps2pdf).
46 *
47 * Convert specified files to PS
48 * l_int32 convertFilesToPS()
49 * l_int32 sarrayConvertFilesToPS()
50 * l_int32 convertFilesFittedToPS()
51 * l_int32 sarrayConvertFilesFittedToPS()
52 * l_int32 writeImageCompressedToPSFile()
53 *
54 * Convert mixed text/image files to PS
55 * l_int32 convertSegmentedPagesToPS()
56 * l_int32 pixWriteSegmentedPageToPS()
57 * l_int32 pixWriteMixedToPS()
58 *
59 * Convert any image file to PS for embedding
60 * l_int32 convertToPSEmbed()
61 *
62 * Write all images in a pixa out to PS
63 * l_int32 pixaWriteCompressedToPS()
64 *
65 * These PostScript converters are used in three different ways.
66 *
67 * (1) For embedding a PS file in a program like TeX.
68 * convertToPSEmbed() handles this for levels 1, 2 and 3 output,
69 * and prog/converttops wraps this in an executable.
70 * converttops is a generalization of Thomas Merz's jpeg2ps wrapper,
71 * in that it works for all types (formats, depth, colormap)
72 * of input images and gives PS output in one of these formats
73 * * level 1 (uncompressed)
74 * * level 2 (compressed ccittg4 or dct)
75 * * level 3 (compressed flate)
76 *
77 * (2) For composing a set of pages with any number of images
78 * painted on them, in either level 2 or level 3 formats.
79 *
80 * (3) For printing a page image or a set of page images, at a
81 * resolution that optimally fills the page, using
82 * convertFilesFittedToPS().
83 *
84 * The top-level calls of utilities in category 2, which can compose
85 * multiple images on a page, and which generate a PostScript file for
86 * printing or display (e.g., conversion to pdf), are:
87 * convertFilesToPS()
88 * convertFilesFittedToPS()
89 * convertSegmentedPagesToPS()
90 *
91 * All images are output with page numbers. Bounding box hints are
92 * more subtle. They must be included for embeding images in
93 * TeX, for example, and the low-level writers include bounding
94 * box hints by default. However, these hints should not be included for
95 * multi-page PostScript that is composed of a sequence of images;
96 * consequently, they are not written when calling higher level
97 * functions such as convertFilesToPS(), convertFilesFittedToPS()
98 * and convertSegmentedPagesToPS(). The function l_psWriteBoundingBox()
99 * sets a flag to give low-level control over this.
100 * </pre>
101 */
102
103 #include <string.h>
104 #include "allheaders.h"
105
106 /* --------------------------------------------*/
107 #if USE_PSIO /* defined in environ.h */
108 /* --------------------------------------------*/
109
110 /*-------------------------------------------------------------*
111 * Convert files in a directory to PS *
112 *-------------------------------------------------------------*/
113 /*
114 * convertFilesToPS()
115 *
116 * Input: dirin (input directory)
117 * substr (<optional> substring filter on filenames; can be NULL)
118 * res (typ. 300 or 600 ppi)
119 * fileout (output ps file)
120 * Return: 0 if OK, 1 on error
121 *
122 * Notes:
123 * (1) This generates a PS file for all image files in a specified
124 * directory that contain the substr pattern to be matched.
125 * (2) Each image is written to a separate page in the output PS file.
126 * (3) All images are written compressed:
127 * * if tiffg4 --> use ccittg4
128 * * if jpeg --> use dct
129 * * all others --> use flate
130 * If the image is jpeg or tiffg4, we use the existing compressed
131 * strings for the encoding; otherwise, we read the image into
132 * a pix and flate-encode the pieces.
133 * (4) The resolution is often confusing. It is interpreted
134 * as the resolution of the output display device: "If the
135 * input image were digitized at 300 ppi, what would it
136 * look like when displayed at res ppi." So, for example,
137 * if res = 100 ppi, then the display pixels are 3x larger
138 * than the 300 ppi pixels, and the image will be rendered
139 * 3x larger.
140 * (5) The size of the PostScript file is independent of the resolution,
141 * because the entire file is encoded. The res parameter just
142 * tells the PS decomposer how to render the page. Therefore,
143 * for minimum file size without loss of visual information,
144 * if the output res is less than 300, you should downscale
145 * the image to the output resolution before wrapping in PS.
146 * (6) The "canvas" on which the image is rendered, at the given
147 * output resolution, is a standard page size (8.5 x 11 in).
148 */
149 l_int32
convertFilesToPS(const char * dirin,const char * substr,l_int32 res,const char * fileout)150 convertFilesToPS(const char *dirin,
151 const char *substr,
152 l_int32 res,
153 const char *fileout)
154 {
155 SARRAY *sa;
156
157 PROCNAME("convertFilesToPS");
158
159 if (!dirin)
160 return ERROR_INT("dirin not defined", procName, 1);
161 if (!fileout)
162 return ERROR_INT("fileout not defined", procName, 1);
163 if (res <= 0) {
164 L_INFO("setting res to 300 ppi\n", procName);
165 res = 300;
166 }
167 if (res < 10 || res > 4000)
168 L_WARNING("res is typically in the range 300-600 ppi\n", procName);
169
170 /* Get all filtered and sorted full pathnames. */
171 sa = getSortedPathnamesInDirectory(dirin, substr, 0, 0);
172
173 /* Generate the PS file. Don't use bounding boxes. */
174 l_psWriteBoundingBox(FALSE);
175 sarrayConvertFilesToPS(sa, res, fileout);
176 l_psWriteBoundingBox(TRUE);
177 sarrayDestroy(&sa);
178 return 0;
179 }
180
181
182 /*
183 * sarrayConvertFilesToPS()
184 *
185 * Input: sarray (of full path names)
186 * res (typ. 300 or 600 ppi)
187 * fileout (output ps file)
188 * Return: 0 if OK, 1 on error
189 *
190 * Notes:
191 * (1) See convertFilesToPS()
192 */
193 l_int32
sarrayConvertFilesToPS(SARRAY * sa,l_int32 res,const char * fileout)194 sarrayConvertFilesToPS(SARRAY *sa,
195 l_int32 res,
196 const char *fileout)
197 {
198 char *fname;
199 l_int32 i, nfiles, index, firstfile, ret, format;
200
201 PROCNAME("sarrayConvertFilesToPS");
202
203 if (!sa)
204 return ERROR_INT("sa not defined", procName, 1);
205 if (!fileout)
206 return ERROR_INT("fileout not defined", procName, 1);
207 if (res <= 0) {
208 L_INFO("setting res to 300 ppi\n", procName);
209 res = 300;
210 }
211 if (res < 10 || res > 4000)
212 L_WARNING("res is typically in the range 300-600 ppi\n", procName);
213
214 nfiles = sarrayGetCount(sa);
215 firstfile = TRUE;
216 for (i = 0, index = 0; i < nfiles; i++) {
217 fname = sarrayGetString(sa, i, L_NOCOPY);
218 ret = pixReadHeader(fname, &format, NULL, NULL, NULL, NULL, NULL);
219 if (ret) continue;
220 if (format == IFF_UNKNOWN)
221 continue;
222
223 writeImageCompressedToPSFile(fname, fileout, res, &firstfile, &index);
224 }
225
226 return 0;
227 }
228
229
230 /*
231 * convertFilesFittedToPS()
232 *
233 * Input: dirin (input directory)
234 * substr (<optional> substring filter on filenames; can be NULL)
235 * xpts, ypts (desired size in printer points; use 0 for default)
236 * fileout (output ps file)
237 * Return: 0 if OK, 1 on error
238 *
239 * Notes:
240 * (1) This generates a PS file for all files in a specified directory
241 * that contain the substr pattern to be matched.
242 * (2) Each image is written to a separate page in the output PS file.
243 * (3) All images are written compressed:
244 * * if tiffg4 --> use ccittg4
245 * * if jpeg --> use dct
246 * * all others --> use flate
247 * If the image is jpeg or tiffg4, we use the existing compressed
248 * strings for the encoding; otherwise, we read the image into
249 * a pix and flate-encode the pieces.
250 * (4) The resolution is internally determined such that the images
251 * are rendered, in at least one direction, at 100% of the given
252 * size in printer points. Use 0.0 for xpts or ypts to get
253 * the default value, which is 612.0 or 792.0, rsp.
254 * (5) The size of the PostScript file is independent of the resolution,
255 * because the entire file is encoded. The %xpts and %ypts
256 * parameter tells the PS decomposer how to render the page.
257 */
258 l_int32
convertFilesFittedToPS(const char * dirin,const char * substr,l_float32 xpts,l_float32 ypts,const char * fileout)259 convertFilesFittedToPS(const char *dirin,
260 const char *substr,
261 l_float32 xpts,
262 l_float32 ypts,
263 const char *fileout)
264 {
265 SARRAY *sa;
266
267 PROCNAME("convertFilesFittedToPS");
268
269 if (!dirin)
270 return ERROR_INT("dirin not defined", procName, 1);
271 if (!fileout)
272 return ERROR_INT("fileout not defined", procName, 1);
273 if (xpts <= 0.0) {
274 L_INFO("setting xpts to 612.0 ppi\n", procName);
275 xpts = 612.0;
276 }
277 if (ypts <= 0.0) {
278 L_INFO("setting ypts to 792.0 ppi\n", procName);
279 ypts = 792.0;
280 }
281 if (xpts < 100.0 || xpts > 2000.0 || ypts < 100.0 || ypts > 2000.0)
282 L_WARNING("xpts,ypts are typically in the range 500-800\n", procName);
283
284 /* Get all filtered and sorted full pathnames. */
285 sa = getSortedPathnamesInDirectory(dirin, substr, 0, 0);
286
287 /* Generate the PS file. Don't use bounding boxes. */
288 l_psWriteBoundingBox(FALSE);
289 sarrayConvertFilesFittedToPS(sa, xpts, ypts, fileout);
290 l_psWriteBoundingBox(TRUE);
291 sarrayDestroy(&sa);
292 return 0;
293 }
294
295
296 /*
297 * sarrayConvertFilesFittedToPS()
298 *
299 * Input: sarray (of full path names)
300 * xpts, ypts (desired size in printer points; use 0 for default)
301 * fileout (output ps file)
302 * Return: 0 if OK, 1 on error
303 *
304 * Notes:
305 * (1) See convertFilesFittedToPS()
306 */
307 l_int32
sarrayConvertFilesFittedToPS(SARRAY * sa,l_float32 xpts,l_float32 ypts,const char * fileout)308 sarrayConvertFilesFittedToPS(SARRAY *sa,
309 l_float32 xpts,
310 l_float32 ypts,
311 const char *fileout)
312 {
313 char *fname;
314 l_int32 ret, i, w, h, nfiles, index, firstfile, format, res;
315
316 PROCNAME("sarrayConvertFilesFittedToPS");
317
318 if (!sa)
319 return ERROR_INT("sa not defined", procName, 1);
320 if (!fileout)
321 return ERROR_INT("fileout not defined", procName, 1);
322 if (xpts <= 0.0) {
323 L_INFO("setting xpts to 612.0\n", procName);
324 xpts = 612.0;
325 }
326 if (ypts <= 0.0) {
327 L_INFO("setting ypts to 792.0\n", procName);
328 ypts = 792.0;
329 }
330 if (xpts < 100.0 || xpts > 2000.0 || ypts < 100.0 || ypts > 2000.0)
331 L_WARNING("xpts,ypts are typically in the range 500-800\n", procName);
332
333 nfiles = sarrayGetCount(sa);
334 firstfile = TRUE;
335 for (i = 0, index = 0; i < nfiles; i++) {
336 fname = sarrayGetString(sa, i, L_NOCOPY);
337 ret = pixReadHeader(fname, &format, &w, &h, NULL, NULL, NULL);
338 if (ret) continue;
339 if (format == IFF_UNKNOWN)
340 continue;
341
342 /* Be sure the entire image is wrapped */
343 if (xpts * h < ypts * w)
344 res = (l_int32)((l_float32)w * 72.0 / xpts);
345 else
346 res = (l_int32)((l_float32)h * 72.0 / ypts);
347
348 writeImageCompressedToPSFile(fname, fileout, res, &firstfile, &index);
349 }
350
351 return 0;
352 }
353
354
355 /*
356 * writeImageCompressedToPSFile()
357 *
358 * Input: filein (input image file)
359 * fileout (output ps file)
360 * res (output printer resolution)
361 * &firstfile (<input and return> 1 if the first image;
362 * 0 otherwise)
363 * &index (<input and return> index of image in output ps file)
364 * Return: 0 if OK, 1 on error
365 *
366 * Notes:
367 * (1) This wraps a single page image in PS.
368 * (2) The input file can be in any format. It is compressed as follows:
369 * * if in tiffg4 --> use ccittg4
370 * * if in jpeg --> use dct
371 * * all others --> use flate
372 * (3) Before the first call, set %firstpage = 1. After writing
373 * the first page, it will be set to 0.
374 * (4) %index is incremented if the page is successfully written.
375 */
376 l_int32
writeImageCompressedToPSFile(const char * filein,const char * fileout,l_int32 res,l_int32 * pfirstfile,l_int32 * pindex)377 writeImageCompressedToPSFile(const char *filein,
378 const char *fileout,
379 l_int32 res,
380 l_int32 *pfirstfile,
381 l_int32 *pindex)
382 {
383 const char *op;
384 l_int32 format, retval;
385
386 PROCNAME("writeImageCompressedToPSFile");
387
388 if (!pfirstfile || !pindex)
389 return ERROR_INT("&firstfile and &index not defined", procName, 1);
390
391 findFileFormat(filein, &format);
392 if (format == IFF_UNKNOWN) {
393 L_ERROR("format of %s not known\n", procName, filein);
394 return 1;
395 }
396
397 op = (*pfirstfile == TRUE) ? "w" : "a";
398 if (format == IFF_JFIF_JPEG) {
399 retval = convertJpegToPS(filein, fileout, op, 0, 0,
400 res, 1.0, *pindex + 1, TRUE);
401 if (retval == 0) {
402 *pfirstfile = FALSE;
403 (*pindex)++;
404 }
405 } else if (format == IFF_TIFF_G4) {
406 retval = convertG4ToPS(filein, fileout, op, 0, 0,
407 res, 1.0, *pindex + 1, FALSE, TRUE);
408 if (retval == 0) {
409 *pfirstfile = FALSE;
410 (*pindex)++;
411 }
412 } else { /* all other image formats */
413 retval = convertFlateToPS(filein, fileout, op, 0, 0,
414 res, 1.0, *pindex + 1, TRUE);
415 if (retval == 0) {
416 *pfirstfile = FALSE;
417 (*pindex)++;
418 }
419 }
420
421 return retval;
422 }
423
424
425 /*-------------------------------------------------------------*
426 * Convert mixed text/image files to PS *
427 *-------------------------------------------------------------*/
428 /*
429 * convertSegmentedPagesToPS()
430 *
431 * Input: pagedir (input page image directory)
432 * pagestr (<optional> substring filter on page filenames;
433 * can be NULL)
434 * page_numpre (number of characters in page name before number)
435 * maskdir (input mask image directory)
436 * maskstr (<optional> substring filter on mask filenames;
437 * can be NULL)
438 * mask_numpre (number of characters in mask name before number)
439 * numpost (number of characters in names after number)
440 * maxnum (only consider page numbers up to this value)
441 * textscale (scale of text output relative to pixs)
442 * imagescale (scale of image output relative to pixs)
443 * threshold (for binarization; typ. about 190; 0 for default)
444 * fileout (output ps file)
445 * Return: 0 if OK, 1 on error
446 *
447 * Notes:
448 * (1) This generates a PS file for all page image and mask files in two
449 * specified directories and that contain the page numbers as
450 * specified below. The two directories can be the same, in which
451 * case the page and mask files are differentiated by the two
452 * substrings for string matches.
453 * (2) The page images are taken in lexicographic order.
454 * Mask images whose numbers match the page images are used to
455 * segment the page images. Page images without a matching
456 * mask image are scaled, thresholded and rendered entirely as text.
457 * (3) Each PS page is generated as a compressed representation of
458 * the page image, where the part of the image under the mask
459 * is suitably scaled and compressed as DCT (i.e., jpeg), and
460 * the remaining part of the page is suitably scaled, thresholded,
461 * compressed as G4 (i.e., tiff g4), and rendered by painting
462 * black through the resulting text mask.
463 * (4) The scaling is typically 2x down for the DCT component
464 * (%imagescale = 0.5) and 2x up for the G4 component
465 * (%textscale = 2.0).
466 * (5) The resolution is automatically set to fit to a
467 * letter-size (8.5 x 11 inch) page.
468 * (6) Both the DCT and the G4 encoding are PostScript level 2.
469 * (7) It is assumed that the page number is contained within
470 * the basename (the filename without directory or extension).
471 * %page_numpre is the number of characters in the page basename
472 * preceding the actual page number; %mask_numpre is likewise for
473 * the mask basename; %numpost is the number of characters
474 * following the page number. For example, for mask name
475 * mask_006.tif, mask_numpre = 5 ("mask_).
476 * (8) To render a page as is -- that is, with no thresholding
477 * of any pixels -- use a mask in the mask directory that is
478 * full size with all pixels set to 1. If the page is 1 bpp,
479 * it is not necessary to have a mask.
480 */
481 l_int32
convertSegmentedPagesToPS(const char * pagedir,const char * pagestr,l_int32 page_numpre,const char * maskdir,const char * maskstr,l_int32 mask_numpre,l_int32 numpost,l_int32 maxnum,l_float32 textscale,l_float32 imagescale,l_int32 threshold,const char * fileout)482 convertSegmentedPagesToPS(const char *pagedir,
483 const char *pagestr,
484 l_int32 page_numpre,
485 const char *maskdir,
486 const char *maskstr,
487 l_int32 mask_numpre,
488 l_int32 numpost,
489 l_int32 maxnum,
490 l_float32 textscale,
491 l_float32 imagescale,
492 l_int32 threshold,
493 const char *fileout)
494 {
495 l_int32 pageno, i, npages;
496 PIX *pixs, *pixm;
497 SARRAY *sapage, *samask;
498
499 PROCNAME("convertSegmentedPagesToPS");
500
501 if (!pagedir)
502 return ERROR_INT("pagedir not defined", procName, 1);
503 if (!maskdir)
504 return ERROR_INT("maskdir not defined", procName, 1);
505 if (!fileout)
506 return ERROR_INT("fileout not defined", procName, 1);
507 if (threshold <= 0) {
508 L_INFO("setting threshold to 190\n", procName);
509 threshold = 190;
510 }
511
512 /* Get numbered full pathnames; max size of sarray is maxnum */
513 sapage = getNumberedPathnamesInDirectory(pagedir, pagestr,
514 page_numpre, numpost, maxnum);
515 samask = getNumberedPathnamesInDirectory(maskdir, maskstr,
516 mask_numpre, numpost, maxnum);
517 sarrayPadToSameSize(sapage, samask, (char *)"");
518 if ((npages = sarrayGetCount(sapage)) == 0) {
519 sarrayDestroy(&sapage);
520 sarrayDestroy(&samask);
521 return ERROR_INT("no matching pages found", procName, 1);
522 }
523
524 /* Generate the PS file */
525 pageno = 1;
526 for (i = 0; i < npages; i++) {
527 if ((pixs = pixReadIndexed(sapage, i)) == NULL)
528 continue;
529 pixm = pixReadIndexed(samask, i);
530 pixWriteSegmentedPageToPS(pixs, pixm, textscale, imagescale,
531 threshold, pageno, fileout);
532 pixDestroy(&pixs);
533 pixDestroy(&pixm);
534 pageno++;
535 }
536
537 sarrayDestroy(&sapage);
538 sarrayDestroy(&samask);
539 return 0;
540 }
541
542
543 /*
544 * pixWriteSegmentedPageToPS()
545 *
546 * Input: pixs (all depths; colormap ok)
547 * pixm (<optional> 1 bpp segmentation mask over image region)
548 * textscale (scale of text output relative to pixs)
549 * imagescale (scale of image output relative to pixs)
550 * threshold (threshold for binarization; typ. 190)
551 * pageno (page number in set; use 1 for new output file)
552 * fileout (output ps file)
553 * Return: 0 if OK, 1 on error
554 *
555 * Notes:
556 * (1) This generates the PS string for a mixed text/image page,
557 * and adds it to an existing file if %pageno > 1.
558 * The PS output is determined by fitting the result to
559 * a letter-size (8.5 x 11 inch) page.
560 * (2) The two images (pixs and pixm) are at the same resolution
561 * (typically 300 ppi). They are used to generate two compressed
562 * images, pixb and pixc, that are put directly into the output
563 * PS file.
564 * (3) pixb is the text component. In the PostScript world, we think of
565 * it as a mask through which we paint black. It is produced by
566 * scaling pixs by %textscale, and thresholding to 1 bpp.
567 * (4) pixc is the image component, which is that part of pixs under
568 * the mask pixm. It is scaled from pixs by %imagescale.
569 * (5) Typical values are textscale = 2.0 and imagescale = 0.5.
570 * (6) If pixm == NULL, the page has only text. If it is all black,
571 * the page is all image and has no text.
572 * (7) This can be used to write a multi-page PS file, by using
573 * sequential page numbers with the same output file. It can
574 * also be used to write separate PS files for each page,
575 * by using different output files with %pageno = 0 or 1.
576 */
577 l_int32
pixWriteSegmentedPageToPS(PIX * pixs,PIX * pixm,l_float32 textscale,l_float32 imagescale,l_int32 threshold,l_int32 pageno,const char * fileout)578 pixWriteSegmentedPageToPS(PIX *pixs,
579 PIX *pixm,
580 l_float32 textscale,
581 l_float32 imagescale,
582 l_int32 threshold,
583 l_int32 pageno,
584 const char *fileout)
585 {
586 l_int32 alltext, notext, d, ret;
587 l_uint32 val;
588 l_float32 scaleratio;
589 PIX *pixmi, *pixmis, *pixt, *pixg, *pixsc, *pixb, *pixc;
590
591 PROCNAME("pixWriteSegmentedPageToPS");
592
593 if (!pixs)
594 return ERROR_INT("pixs not defined", procName, 1);
595 if (!fileout)
596 return ERROR_INT("fileout not defined", procName, 1);
597 if (imagescale <= 0.0 || textscale <= 0.0)
598 return ERROR_INT("relative scales must be > 0.0", procName, 1);
599
600 /* Analyze the page. Determine the ratio by which the
601 * binary text mask is scaled relative to the image part.
602 * If there is no image region (alltext == TRUE), the
603 * text mask will be rendered directly to fit the page,
604 * and scaleratio = 1.0. */
605 alltext = TRUE;
606 notext = FALSE;
607 scaleratio = 1.0;
608 if (pixm) {
609 pixZero(pixm, &alltext); /* pixm empty: all text */
610 if (alltext) {
611 pixm = NULL; /* treat it as not existing here */
612 } else {
613 pixmi = pixInvert(NULL, pixm);
614 pixZero(pixmi, ¬ext); /* pixm full; no text */
615 pixDestroy(&pixmi);
616 scaleratio = textscale / imagescale;
617 }
618 }
619
620 if (pixGetDepth(pixs) == 1) { /* render tiff g4 */
621 pixb = pixClone(pixs);
622 pixc = NULL;
623 } else {
624 pixt = pixConvertTo8Or32(pixs, L_CLONE, 0); /* clone if possible */
625
626 /* Get the binary text mask. Note that pixg cannot be a
627 * clone of pixs, because it may be altered by pixSetMasked(). */
628 pixb = NULL;
629 if (notext == FALSE) {
630 d = pixGetDepth(pixt);
631 if (d == 8)
632 pixg = pixCopy(NULL, pixt);
633 else /* d == 32 */
634 pixg = pixConvertRGBToLuminance(pixt);
635 if (pixm) /* clear out the image parts */
636 pixSetMasked(pixg, pixm, 255);
637 if (textscale == 1.0)
638 pixsc = pixClone(pixg);
639 else if (textscale >= 0.7)
640 pixsc = pixScaleGrayLI(pixg, textscale, textscale);
641 else
642 pixsc = pixScaleAreaMap(pixg, textscale, textscale);
643 pixb = pixThresholdToBinary(pixsc, threshold);
644 pixDestroy(&pixg);
645 pixDestroy(&pixsc);
646 }
647
648 /* Get the scaled image region */
649 pixc = NULL;
650 if (pixm) {
651 if (imagescale == 1.0)
652 pixsc = pixClone(pixt); /* can possibly be a clone of pixs */
653 else
654 pixsc = pixScale(pixt, imagescale, imagescale);
655
656 /* If pixm is not full, clear the pixels in pixsc
657 * corresponding to bg in pixm, where there can be text
658 * that is written through the mask pixb. Note that
659 * we could skip this and use pixsc directly in
660 * pixWriteMixedToPS(); however, clearing these
661 * non-image regions to a white background will reduce
662 * the size of pixc (relative to pixsc), and hence
663 * reduce the size of the PS file that is generated.
664 * Use a copy so that we don't accidentally alter pixs. */
665 if (notext == FALSE) {
666 pixmis = pixScale(pixm, imagescale, imagescale);
667 pixmi = pixInvert(NULL, pixmis);
668 val = (d == 8) ? 0xff : 0xffffff00;
669 pixc = pixCopy(NULL, pixsc);
670 pixSetMasked(pixc, pixmi, val); /* clear non-image part */
671 pixDestroy(&pixmis);
672 pixDestroy(&pixmi);
673 } else {
674 pixc = pixClone(pixsc);
675 }
676 pixDestroy(&pixsc);
677 }
678 pixDestroy(&pixt);
679 }
680
681 /* Generate the PS file. Don't use bounding boxes. */
682 l_psWriteBoundingBox(FALSE);
683 ret = pixWriteMixedToPS(pixb, pixc, scaleratio, pageno, fileout);
684 l_psWriteBoundingBox(TRUE);
685 pixDestroy(&pixb);
686 pixDestroy(&pixc);
687 return ret;
688 }
689
690
691 /*
692 * pixWriteMixedToPS()
693 *
694 * Input: pixb (<optionall> 1 bpp "mask"; typically for text)
695 * pixc (<optional> 8 or 32 bpp image regions)
696 * scale (relative scale factor for rendering pixb
697 * relative to pixc; typ. 4.0)
698 * pageno (page number in set; use 1 for new output file)
699 * fileout (output ps file)
700 * Return: 0 if OK, 1 on error
701 *
702 * Notes:
703 * (1) This low level function generates the PS string for a mixed
704 * text/image page, and adds it to an existing file if
705 * %pageno > 1.
706 * (2) The two images (pixb and pixc) are typically generated at the
707 * resolution that they will be rendered in the PS file.
708 * (3) pixb is the text component. In the PostScript world, we think of
709 * it as a mask through which we paint black.
710 * (4) pixc is the (typically halftone) image component. It is
711 * white in the rest of the page. To minimize the size of the
712 * PS file, it should be rendered at a resolution that is at
713 * least equal to its actual resolution.
714 * (5) %scale gives the ratio of resolution of pixb to pixc.
715 * Typical resolutions are: 600 ppi for pixb, 150 ppi for pixc;
716 * so %scale = 4.0. If one of the images is not defined,
717 * the value of %scale is ignored.
718 * (6) We write pixc with DCT compression (jpeg). This is followed
719 * by painting the text as black through the mask pixb. If
720 * pixc doesn't exist (alltext), we write the text with the
721 * PS "image" operator instead of the "imagemask" operator,
722 * because ghostscript's ps2pdf is flaky when the latter is used.
723 * (7) The actual output resolution is determined by fitting the
724 * result to a letter-size (8.5 x 11 inch) page.
725 */
726 l_int32
pixWriteMixedToPS(PIX * pixb,PIX * pixc,l_float32 scale,l_int32 pageno,const char * fileout)727 pixWriteMixedToPS(PIX *pixb,
728 PIX *pixc,
729 l_float32 scale,
730 l_int32 pageno,
731 const char *fileout)
732 {
733 char *tname;
734 const char *op;
735 l_int32 resb, resc, endpage, maskop, ret;
736
737 PROCNAME("pixWriteMixedToPS");
738
739 if (!pixb && !pixc)
740 return ERROR_INT("pixb and pixc both undefined", procName, 1);
741 if (!fileout)
742 return ERROR_INT("fileout not defined", procName, 1);
743
744 /* Compute the resolution that fills a letter-size page. */
745 if (!pixc) {
746 resb = getResLetterPage(pixGetWidth(pixb), pixGetHeight(pixb), 0);
747 } else {
748 resc = getResLetterPage(pixGetWidth(pixc), pixGetHeight(pixc), 0);
749 if (pixb)
750 resb = (l_int32)(scale * resc);
751 }
752
753 /* Write the jpeg image first */
754 if (pixc) {
755 tname = l_makeTempFilename();
756 pixWrite(tname, pixc, IFF_JFIF_JPEG);
757 endpage = (pixb) ? FALSE : TRUE;
758 op = (pageno <= 1) ? "w" : "a";
759 ret = convertJpegToPS(tname, fileout, op, 0, 0, resc, 1.0,
760 pageno, endpage);
761 lept_rmfile(tname);
762 LEPT_FREE(tname);
763 if (ret)
764 return ERROR_INT("jpeg data not written", procName, 1);
765 }
766
767 /* Write the binary data, either directly or, if there is
768 * a jpeg image on the page, through the mask. */
769 if (pixb) {
770 tname = l_makeTempFilename();
771 pixWrite(tname, pixb, IFF_TIFF_G4);
772 op = (pageno <= 1 && !pixc) ? "w" : "a";
773 maskop = (pixc) ? 1 : 0;
774 ret = convertG4ToPS(tname, fileout, op, 0, 0, resb, 1.0,
775 pageno, maskop, 1);
776 lept_rmfile(tname);
777 LEPT_FREE(tname);
778 if (ret)
779 return ERROR_INT("tiff data not written", procName, 1);
780 }
781
782 return 0;
783 }
784
785
786 /*-------------------------------------------------------------*
787 * Convert any image file to PS for embedding *
788 *-------------------------------------------------------------*/
789 /*
790 * convertToPSEmbed()
791 *
792 * Input: filein (input image file -- any format)
793 * fileout (output ps file)
794 * level (compression: 1 (uncompressed), 2 or 3)
795 * Return: 0 if OK, 1 on error
796 *
797 * Notes:
798 * (1) This is a wrapper function that generates a PS file with
799 * a bounding box, from any input image file.
800 * (2) Do the best job of compression given the specified level.
801 * %level=3 does flate compression on anything that is not
802 * tiffg4 (1 bpp) or jpeg (8 bpp or rgb).
803 * (3) If %level=2 and the file is not tiffg4 or jpeg, it will
804 * first be written to file as jpeg with quality = 75.
805 * This will remove the colormap and cause some degradation
806 * in the image.
807 * (4) The bounding box is required when a program such as TeX
808 * (through epsf) places and rescales the image. It is
809 * sized for fitting the image to an 8.5 x 11.0 inch page.
810 */
811 l_int32
convertToPSEmbed(const char * filein,const char * fileout,l_int32 level)812 convertToPSEmbed(const char *filein,
813 const char *fileout,
814 l_int32 level)
815 {
816 char *tname;
817 l_int32 d, format;
818 PIX *pix, *pixs;
819
820 PROCNAME("convertToPSEmbed");
821
822 if (!filein)
823 return ERROR_INT("filein not defined", procName, 1);
824 if (!fileout)
825 return ERROR_INT("fileout not defined", procName, 1);
826 if (level != 1 && level != 2 && level != 3) {
827 L_ERROR("invalid level specified; using level 2\n", procName);
828 level = 2;
829 }
830
831 if (level == 1) { /* no compression */
832 pixWritePSEmbed(filein, fileout);
833 return 0;
834 }
835
836 /* Find the format and write out directly if in jpeg or tiff g4 */
837 findFileFormat(filein, &format);
838 if (format == IFF_JFIF_JPEG) {
839 convertJpegToPSEmbed(filein, fileout);
840 return 0;
841 } else if (format == IFF_TIFF_G4) {
842 convertG4ToPSEmbed(filein, fileout);
843 return 0;
844 } else if (format == IFF_UNKNOWN) {
845 L_ERROR("format of %s not known\n", procName, filein);
846 return 1;
847 }
848
849 /* If level 3, flate encode. */
850 if (level == 3) {
851 convertFlateToPSEmbed(filein, fileout);
852 return 0;
853 }
854
855 /* OK, it's level 2, so we must convert to jpeg or tiff g4 */
856 if ((pixs = pixRead(filein)) == NULL)
857 return ERROR_INT("image not read from file", procName, 1);
858 d = pixGetDepth(pixs);
859 if ((d == 2 || d == 4) && !pixGetColormap(pixs))
860 pix = pixConvertTo8(pixs, 0);
861 else if (d == 16)
862 pix = pixConvert16To8(pixs, 1);
863 else
864 pix = pixRemoveColormap(pixs, REMOVE_CMAP_BASED_ON_SRC);
865
866 d = pixGetDepth(pix);
867 tname = l_makeTempFilename();
868 if (d == 1) {
869 pixWrite(tname, pix, IFF_TIFF_G4);
870 convertG4ToPSEmbed(tname, fileout);
871 } else {
872 pixWrite(tname, pix, IFF_JFIF_JPEG);
873 convertJpegToPSEmbed(tname, fileout);
874 }
875
876 lept_rmfile(tname);
877 LEPT_FREE(tname);
878 pixDestroy(&pix);
879 pixDestroy(&pixs);
880 return 0;
881 }
882
883
884 /*-------------------------------------------------------------*
885 * Write all images in a pixa out to PS *
886 *-------------------------------------------------------------*/
887 /*
888 * pixaWriteCompressedToPS()
889 *
890 * Input: pixa (any set of images)
891 * fileout (output ps file)
892 * res (of input image)
893 * level (compression: 2 or 3)
894 * Return: 0 if OK, 1 on error
895 *
896 * Notes:
897 * (1) This generates a PS file of multiple page images, all
898 * with bounding boxes.
899 * (2) It compresses to:
900 * cmap + level2: jpeg
901 * cmap + level3: flate
902 * 1 bpp: tiffg4
903 * 2 or 4 bpp + level2: jpeg
904 * 2 or 4 bpp + level3: flate
905 * 8 bpp: jpeg
906 * 16 bpp: flate
907 * 32 bpp: jpeg
908 * (3) To generate a pdf, use: ps2pdf <infile.ps> <outfile.pdf>
909 */
910 l_int32
pixaWriteCompressedToPS(PIXA * pixa,const char * fileout,l_int32 res,l_int32 level)911 pixaWriteCompressedToPS(PIXA *pixa,
912 const char *fileout,
913 l_int32 res,
914 l_int32 level)
915 {
916 char *tname;
917 l_int32 i, n, firstfile, index, writeout, d;
918 PIX *pix, *pixt;
919 PIXCMAP *cmap;
920
921 PROCNAME("pixaWriteCompressedToPS");
922
923 if (!pixa)
924 return ERROR_INT("pixa not defined", procName, 1);
925 if (!fileout)
926 return ERROR_INT("fileout not defined", procName, 1);
927 if (level != 2 && level != 3) {
928 L_ERROR("only levels 2 and 3 permitted; using level 2\n", procName);
929 level = 2;
930 }
931
932 n = pixaGetCount(pixa);
933 firstfile = TRUE;
934 index = 0;
935 tname = l_makeTempFilename();
936 for (i = 0; i < n; i++) {
937 writeout = TRUE;
938 pix = pixaGetPix(pixa, i, L_CLONE);
939 d = pixGetDepth(pix);
940 cmap = pixGetColormap(pix);
941 if (d == 1) {
942 pixWrite(tname, pix, IFF_TIFF_G4);
943 } else if (cmap) {
944 if (level == 2) {
945 pixt = pixConvertForPSWrap(pix);
946 pixWrite(tname, pixt, IFF_JFIF_JPEG);
947 pixDestroy(&pixt);
948 } else { /* level == 3 */
949 pixWrite(tname, pix, IFF_PNG);
950 }
951 } else if (d == 16) {
952 if (level == 2)
953 L_WARNING("d = 16; must write out flate\n", procName);
954 pixWrite(tname, pix, IFF_PNG);
955 } else if (d == 2 || d == 4) {
956 if (level == 2) {
957 pixt = pixConvertTo8(pix, 0);
958 pixWrite(tname, pixt, IFF_JFIF_JPEG);
959 pixDestroy(&pixt);
960 } else { /* level == 3 */
961 pixWrite(tname, pix, IFF_PNG);
962 }
963 } else if (d == 8 || d == 32) {
964 pixWrite(tname, pix, IFF_JFIF_JPEG);
965 } else { /* shouldn't happen */
966 L_ERROR("invalid depth: %d\n", procName, d);
967 writeout = FALSE;
968 }
969 pixDestroy(&pix);
970
971 if (writeout)
972 writeImageCompressedToPSFile(tname, fileout, res,
973 &firstfile, &index);
974 }
975
976 lept_rmfile(tname);
977 LEPT_FREE(tname);
978 return 0;
979 }
980
981
982 /* --------------------------------------------*/
983 #endif /* USE_PSIO */
984 /* --------------------------------------------*/
985