1 /* Copyright (C) 2001-2019 Artifex Software, Inc.
2 All Rights Reserved.
3
4 This software is provided AS-IS with no warranty, either express or
5 implied.
6
7 This software is distributed under license and may not be copied,
8 modified or distributed except as expressly authorized under the terms
9 of the license contained in the file LICENSE in this distribution.
10
11 Refer to licensing information at http://www.artifex.com or contact
12 Artifex Software, Inc., 1305 Grant Avenue - Suite 200, Novato,
13 CA 94945, U.S.A., +1(415)492-9861, for further information.
14 */
15
16
17 /* Composite and CID-based text processing for pdfwrite. */
18 #include "memory_.h"
19 #include "gx.h"
20 #include "gserrors.h"
21 #include "gxfcmap.h"
22 #include "gxfont.h"
23 #include "gxfont0.h"
24 #include "gxfont0c.h"
25 #include "gzpath.h"
26 #include "gxchar.h"
27 #include "gdevpsf.h"
28 #include "gdevpdfx.h"
29 #include "gdevpdtx.h"
30 #include "gdevpdtd.h"
31 #include "gdevpdtf.h"
32 #include "gdevpdts.h"
33 #include "gdevpdtt.h"
34
35 #include "gximage.h"
36 #include "gxcpath.h"
37 /* ---------------- Non-CMap-based composite font ---------------- */
38
39 /*
40 * Process a text string in a composite font with FMapType != 9 (CMap).
41 */
42 int
process_composite_text(gs_text_enum_t * pte,void * vbuf,uint bsize)43 process_composite_text(gs_text_enum_t *pte, void *vbuf, uint bsize)
44 {
45 byte *const buf = vbuf;
46 pdf_text_enum_t *const penum = (pdf_text_enum_t *)pte;
47 int code = 0;
48 gs_string str;
49 pdf_text_process_state_t text_state;
50 pdf_text_enum_t curr, prev, out;
51 gs_point total_width;
52 const gs_matrix *psmat = 0;
53 gs_font *prev_font = 0;
54 gs_char chr, char_code = 0x0badf00d, space_char = GS_NO_CHAR;
55 int buf_index = 0;
56 bool return_width = (penum->text.operation & TEXT_RETURN_WIDTH);
57
58 str.data = buf;
59 if (return_width) {
60 code = gx_path_current_point(penum->path, &penum->origin);
61 if (code < 0)
62 return code;
63 }
64 if (pte->text.operation &
65 (TEXT_FROM_ANY - (TEXT_FROM_STRING | TEXT_FROM_BYTES))
66 )
67 return_error(gs_error_rangecheck);
68 if (pte->text.operation & TEXT_INTERVENE) {
69 /* Not implemented. (PostScript doesn't even allow this case.) */
70 return_error(gs_error_rangecheck);
71 }
72 total_width.x = total_width.y = 0;
73 curr = *penum;
74 prev = curr;
75 out = curr;
76 out.current_font = 0;
77 /* Scan runs of characters in the same leaf font. */
78 for ( ; ; ) {
79 int font_code;
80 gs_font *new_font = 0;
81
82 gs_text_enum_copy_dynamic((gs_text_enum_t *)&out,
83 (gs_text_enum_t *)&curr, false);
84 for (;;) {
85 gs_glyph glyph;
86
87 gs_text_enum_copy_dynamic((gs_text_enum_t *)&prev,
88 (gs_text_enum_t *)&curr, false);
89 font_code = pte->orig_font->procs.next_char_glyph
90 ((gs_text_enum_t *)&curr, &chr, &glyph);
91 /*
92 * We check for a font change by comparing the current
93 * font, rather than testing the return code, because
94 * it makes the control structure a little simpler.
95 */
96 switch (font_code) {
97 case 0: /* no font change */
98 case 1: /* font change */
99 curr.returned.current_char = chr;
100 char_code = gx_current_char((gs_text_enum_t *)&curr);
101 new_font = curr.fstack.items[curr.fstack.depth].font;
102 if (new_font != prev_font)
103 break;
104 if (chr != (byte)chr) /* probably can't happen */
105 return_error(gs_error_rangecheck);
106 if (buf_index >= bsize)
107 return_error(gs_error_unregistered); /* Must not happen. */
108 buf[buf_index] = (byte)chr;
109 buf_index++;
110 prev_font = new_font;
111 psmat = &curr.fstack.items[curr.fstack.depth - 1].font->FontMatrix;
112 if ((pte->text.operation & TEXT_ADD_TO_SPACE_WIDTH) &&
113 pte->text.space.s_char == char_code)
114 space_char = chr;
115 continue;
116 case 2: /* end of string */
117 break;
118 default: /* error */
119 return font_code;
120 }
121 break;
122 }
123 str.size = buf_index;
124 if (buf_index) {
125 /* buf_index == 0 is only possible the very first time. */
126 /*
127 * The FontMatrix of leaf descendant fonts is not updated
128 * by scalefont. Compute the effective FontMatrix now.
129 */
130 gs_matrix fmat;
131
132 /* set up the base font : */
133 out.fstack.depth = 0;
134 out.fstack.items[out.fstack.depth].font = out.current_font = prev_font;
135 pte->current_font = prev_font;
136
137 /* Provide the decoded space character : */
138 out.text.space.s_char = space_char;
139
140 gs_matrix_multiply(&prev_font->FontMatrix, psmat, &fmat);
141 out.index = 0; /* Note : we don't reset out.xy_index here. */
142 code = pdf_process_string_aux(&out, &str, NULL, &fmat, &text_state);
143 if (code < 0) {
144 if (code == gs_error_undefined && new_font && new_font->FontType == ft_encrypted2)
145 /* Caused by trying to make a CFF font resource for ps2write, which doesn't support CFF, abort now! */
146 return_error(gs_error_invalidfont);
147 return code;
148 }
149 curr.xy_index = out.xy_index; /* pdf_encode_process_string advanced it. */
150 if (out.index < str.size) {
151 gs_glyph glyph;
152
153 /* Advance *pte exactly for out.index chars,
154 because above we stored bytes into buf. */
155 while (out.index--)
156 pte->orig_font->procs.next_char_glyph(pte, &chr, &glyph);
157 font_code = 2; /* force exiting the loop */
158 } else {
159 /* advance *pte past the current substring */
160 gs_text_enum_copy_dynamic(pte, (gs_text_enum_t *)&prev, true);
161 }
162 pte->xy_index = out.xy_index;
163 if (return_width) {
164 /* This is silly, but its a consequence of the way pdf_process_string
165 * works. When we have TEXT_DO_NONE (stringwidth) we add the width of the
166 * glyph(s) to the enumerator 'returned.total_width' so we keep track
167 * of the total width as we go. However when we are returning the width
168 * but its NOT for a stringwidth, we set the enumerator 'retuerned'
169 * value to just the width of the glyph(s) processed. So when we are *not*
170 * handling a stringwidth we need to keep track of the total width
171 * ourselves. I'd have preferred to alter pdf_process_string, but that
172 * is used in many other places, and those places rely on this behaviour.
173 */
174 if (pte->text.operation & TEXT_DO_NONE) {
175 pte->returned.total_width.x = total_width.x = out.returned.total_width.x;
176 pte->returned.total_width.y = total_width.y = out.returned.total_width.y;
177 } else {
178 pte->returned.total_width.x = total_width.x +=
179 out.returned.total_width.x;
180 pte->returned.total_width.y = total_width.y +=
181 out.returned.total_width.y;
182 }
183 }
184 pdf_text_release_cgp(penum);
185 }
186 if (font_code == 2)
187 break;
188 buf[0] = (byte)chr;
189 buf_index = 1;
190 space_char = ((pte->text.operation & TEXT_ADD_TO_SPACE_WIDTH) &&
191 pte->text.space.s_char == char_code ? chr : ~0);
192 psmat = &curr.fstack.items[curr.fstack.depth - 1].font->FontMatrix;
193 prev_font = new_font;
194 }
195 if (!return_width)
196 return 0;
197 return pdf_shift_text_currentpoint(penum, &total_width);
198 }
199
200 /* ---------------- CMap-based composite font ---------------- */
201
202 /*
203 * Process a text string in a composite font with FMapType == 9 (CMap).
204 */
205 static const char *const standard_cmap_names[] = {
206 /* The following were added in PDF 1.5. */
207
208 "UniGB-UTF16-H", "UniGB-UTF16-V",
209
210 "GBKp-EUC-H", "GBKp-EUC-V",
211 "HKscs-B5-H", "HKscs-B5-V",
212 "UniCNS-UTF16-H", "UniCNS-UTF16-V",
213 "UniJIS-UTF16-H", "UniJIS-UTF16-V",
214 "UniKS-UTF16-H", "UniKS-UTF16-V",
215 #define END_PDF15_CMAP_NAMES_INDEX 12
216 /* The following were added in PDF 1.4. */
217 "GBKp-EUC-H", "GBKp-EUC-V",
218 "GBK2K-H", "GBK2K-V",
219 "HKscs-B5-H", "HKscs-B5-V",
220 #define END_PDF14_CMAP_NAMES_INDEX 18
221 /* The following were added in PDF 1.3. */
222
223 "GBpc-EUC-V",
224 "GBK-EUC-H", "GBK-EUC-V",
225 "UniGB-UCS2-H", "UniGB-UCS2-V",
226
227 "ETenms-B5-H", "ETenms-B5-V",
228
229 "UniCNS-UCS2-H", "UniCNS-UCS2-V",
230
231 "90msp-RKSJ-H", "90msp-RKSJ-V",
232 "EUC-H", "EUC-V",
233 "UniJIS-UCS2-H", "UniJIS-UCS2-V",
234 "UniJIS-UCS2-HW-H", "UniJIS-UCS2-HW-V",
235
236 "KSCms-UHC-HW-H", "KSCms-UHC-HW-V",
237 "UniKS-UCS2-H", "UniKS-UCS2-V",
238
239 #define END_PDF13_CMAP_NAMES_INDEX 39
240 /* The following were added in PDF 1.2. */
241
242 "GB-EUC-H", "GB-EUC-V",
243 "GBpc-EUC-H",
244
245 "B5pc-H", "B5pc-V",
246 "ETen-B5-H", "ETen-B5-V",
247 "CNS-EUC-H", "CNS-EUC-V",
248
249 "83pv-RKSJ-H",
250 "90ms-RKSJ-H", "90ms-RKSJ-V",
251 "90pv-RKSJ-H",
252 "Add-RKSJ-H", "Add-RKSJ-V",
253 "Ext-RKSJ-H", "Ext-RKSJ-V",
254 "H", "V",
255
256 "KSC-EUC-H", "KSC-EUC-V",
257 "KSCms-UHC-H", "KSCms-UHC-V",
258 "KSCpc-EUC-H",
259
260 "Identity-H", "Identity-V",
261
262 0
263 };
264
265 static int
attach_cmap_resource(gx_device_pdf * pdev,pdf_font_resource_t * pdfont,const gs_cmap_t * pcmap,int font_index_only)266 attach_cmap_resource(gx_device_pdf *pdev, pdf_font_resource_t *pdfont,
267 const gs_cmap_t *pcmap, int font_index_only)
268 {
269 const char *const *pcmn =
270 standard_cmap_names +
271 (pdev->CompatibilityLevel < 1.3 ? END_PDF13_CMAP_NAMES_INDEX :
272 pdev->CompatibilityLevel < 1.4 ? END_PDF14_CMAP_NAMES_INDEX :
273 pdev->CompatibilityLevel < 1.5 ? END_PDF15_CMAP_NAMES_INDEX : 0);
274 bool is_identity = false;
275 pdf_resource_t *pcmres = 0; /* CMap */
276 int code;
277
278 /* Make sure cmap names is properly initialised. Silences Coverity warning */
279 if (!pcmn)
280 return_error(gs_error_unknownerror);
281
282 /*
283 * If the CMap isn't standard, write it out if necessary.
284 */
285 for (; *pcmn != 0; ++pcmn)
286 if (pcmap->CMapName.size == strlen(*pcmn) &&
287 !memcmp(*pcmn, pcmap->CMapName.data, pcmap->CMapName.size))
288 break;
289
290 /* For PDF/A we need to write out all non-identity CMaps
291 * first force the identity check.
292 */
293 if (*pcmn == 0 || pdev->PDFA != 0) {
294 /*
295 * PScript5.dll Version 5.2 creates identity CMaps with
296 * instandard name. Check this specially here
297 * and later replace with a standard name.
298 * This is a temporary fix for SF bug #615994 "CMAP is corrupt".
299 */
300 is_identity = gs_cmap_is_identity(pcmap, font_index_only);
301 }
302 /* If the CMap is non-standard, or we are producing PDF/A, and its not
303 * an Identity CMap, then we need to emit it.
304 */
305 if ((*pcmn == 0 || pdev->PDFA != 0) && !is_identity) { /* not standard */
306 pcmres = pdf_find_resource_by_gs_id(pdev, resourceCMap, pcmap->id + font_index_only);
307 if (pcmres == 0) {
308 /* Create and write the CMap object. */
309 code = pdf_cmap_alloc(pdev, pcmap, &pcmres, font_index_only);
310 if (code < 0)
311 return code;
312 }
313 }
314 if (pcmap->from_Unicode) {
315 gs_cmap_ranges_enum_t renum;
316
317 gs_cmap_ranges_enum_init(pcmap, &renum);
318 if (gs_cmap_enum_next_range(&renum) == 0 && renum.range.size == 2 &&
319 gs_cmap_enum_next_range(&renum) == 1) {
320 /*
321 * Exactly one code space range, of size 2. Add an identity
322 * ToUnicode CMap.
323 */
324 if (!pdev->Identity_ToUnicode_CMaps[pcmap->WMode]) {
325 /* Create and write an identity ToUnicode CMap now. */
326 gs_cmap_t *pidcmap;
327
328 code = gs_cmap_create_char_identity(&pidcmap, 2, pcmap->WMode,
329 pdev->memory);
330 if (code < 0)
331 return code;
332 pidcmap->CMapType = 2; /* per PDF Reference */
333 pidcmap->ToUnicode = true;
334 code = pdf_cmap_alloc(pdev, pidcmap,
335 &pdev->Identity_ToUnicode_CMaps[pcmap->WMode], -1);
336 if (code < 0)
337 return code;
338 }
339 pdfont->res_ToUnicode = pdev->Identity_ToUnicode_CMaps[pcmap->WMode];
340 }
341 }
342 if (pcmres || is_identity) {
343 uint size = pcmap->CMapName.size;
344 byte *chars = gs_alloc_string(pdev->pdf_memory, size,
345 "pdf_font_resource_t(CMapName)");
346
347 if (chars == 0)
348 return_error(gs_error_VMerror);
349 memcpy(chars, pcmap->CMapName.data, size);
350 if (is_identity)
351 strcpy(pdfont->u.type0.Encoding_name,
352 (pcmap->WMode ? "/Identity-V" : "/Identity-H"));
353 else
354 gs_sprintf(pdfont->u.type0.Encoding_name, "%ld 0 R",
355 pdf_resource_id(pcmres));
356 pdfont->u.type0.CMapName.data = chars;
357 pdfont->u.type0.CMapName.size = size;
358 } else {
359 if (!*pcmn)
360 /* Should not be possible, if *pcmn is NULL then either
361 * is_identity is true or we create pcmres.
362 */
363 return_error(gs_error_invalidfont);
364
365 gs_sprintf(pdfont->u.type0.Encoding_name, "/%s", *pcmn);
366 pdfont->u.type0.CMapName.data = (const byte *)*pcmn;
367 pdfont->u.type0.CMapName.size = strlen(*pcmn);
368 pdfont->u.type0.cmap_is_standard = true;
369 }
370 pdfont->u.type0.WMode = pcmap->WMode;
371 return 0;
372 }
373
estimate_fontbbox(pdf_text_enum_t * pte,gs_font_base * font,const gs_matrix * pfmat,gs_rect * text_bbox)374 static int estimate_fontbbox(pdf_text_enum_t *pte, gs_font_base *font,
375 const gs_matrix *pfmat,
376 gs_rect *text_bbox)
377 {
378 gs_matrix m;
379 gs_point p0, p1, p2, p3;
380
381 if (font->FontBBox.p.x == font->FontBBox.q.x ||
382 font->FontBBox.p.y == font->FontBBox.q.y)
383 return_error(gs_error_undefined);
384 if (pfmat == 0)
385 pfmat = &font->FontMatrix;
386 m = ctm_only(pte->pgs);
387 m.tx = fixed2float(pte->origin.x);
388 m.ty = fixed2float(pte->origin.y);
389 gs_matrix_multiply(pfmat, &m, &m);
390
391 gs_point_transform(font->FontBBox.p.x, font->FontBBox.p.y, &m, &p0);
392 gs_point_transform(font->FontBBox.p.x, font->FontBBox.q.y, &m, &p1);
393 gs_point_transform(font->FontBBox.q.x, font->FontBBox.p.y, &m, &p2);
394 gs_point_transform(font->FontBBox.q.x, font->FontBBox.q.y, &m, &p3);
395 text_bbox->p.x = min(min(p0.x, p1.x), min(p1.x, p2.x));
396 text_bbox->p.y = min(min(p0.y, p1.y), min(p1.y, p2.y));
397 text_bbox->q.x = max(max(p0.x, p1.x), max(p1.x, p2.x));
398 text_bbox->q.y = max(max(p0.y, p1.y), max(p1.y, p2.y));
399
400 return 0;
401 }
402
403 /* Record widths and CID => GID mappings. */
404 static int
scan_cmap_text(pdf_text_enum_t * pte,void * vbuf)405 scan_cmap_text(pdf_text_enum_t *pte, void *vbuf)
406 {
407 gx_device_pdf *pdev = (gx_device_pdf *)pte->dev;
408 /* gs_font_type0 *const font = (gs_font_type0 *)pte->current_font;*/ /* Type 0, fmap_CMap */
409 gs_font_type0 *const font = (gs_font_type0 *)pte->orig_font; /* Type 0, fmap_CMap */
410 /* Not sure. Changed for CDevProc callout. Was pte->current_font */
411 gs_text_enum_t scan = *(gs_text_enum_t *)pte;
412 int wmode = font->WMode, code, rcode = 0;
413 pdf_font_resource_t *pdsubf0 = NULL;
414 gs_font *subfont0 = NULL, *saved_subfont = NULL;
415 uint index = scan.index, xy_index = scan.xy_index, start_index = index;
416 uint font_index0 = 0x7badf00d;
417 bool done = false;
418 pdf_char_glyph_pairs_t p;
419 gs_glyph *type1_glyphs = (gs_glyph *)vbuf;
420 int num_type1_glyphs = 0;
421
422 p.num_all_chars = 1;
423 p.num_unused_chars = 1;
424 p.unused_offset = 0;
425 pte->returned.total_width.x = pte->returned.total_width.y = 0;;
426 for (;;) {
427 uint break_index, break_xy_index;
428 uint font_index = 0x7badf00d;
429 gs_const_string str;
430 pdf_text_process_state_t text_state;
431 pdf_font_resource_t *pdsubf;
432 gs_font *subfont = NULL;
433 gs_point wxy;
434 bool font_change = 0;
435
436 code = gx_path_current_point(pte->path, &pte->origin);
437 if (code < 0)
438 return code;
439 do {
440 gs_char chr;
441 gs_glyph glyph;
442 pdf_font_descriptor_t *pfd;
443 byte *glyph_usage;
444 double *real_widths, *w, *v, *w0;
445 int char_cache_size, width_cache_size;
446 gs_char cid;
447
448 break_index = scan.index;
449 break_xy_index = scan.xy_index;
450 code = font->procs.next_char_glyph(&scan, &chr, &glyph);
451 if (code == 2) { /* end of string */
452 done = true;
453 break;
454 }
455 if (code < 0)
456 return code;
457 subfont = scan.fstack.items[scan.fstack.depth].font;
458 font_index = scan.fstack.items[scan.fstack.depth - 1].index;
459 scan.xy_index++;
460 if (glyph == GS_NO_GLYPH)
461 glyph = GS_MIN_CID_GLYPH;
462 cid = glyph - GS_MIN_CID_GLYPH;
463 switch (subfont->FontType) {
464 case ft_encrypted:
465 case ft_encrypted2:{
466 if (glyph == GS_MIN_CID_GLYPH) {
467 glyph = subfont->procs.encode_char(subfont, chr, GLYPH_SPACE_NAME);
468 }
469 type1_glyphs[num_type1_glyphs] = glyph;
470 num_type1_glyphs++;
471 break;
472 }
473 case ft_CID_encrypted:
474 case ft_CID_TrueType: {
475 p.s[0].glyph = glyph;
476 p.s[0].chr = cid;
477 code = pdf_obtain_cidfont_resource(pdev, subfont, &pdsubf, &p);
478 if (code < 0)
479 return code;
480 break;
481 }
482 case ft_user_defined:
483 case ft_PDF_user_defined:
484 {
485 gs_string str1;
486
487 str1.data = NULL;
488 str1.size = 0;
489 pte->current_font = subfont;
490 code = pdf_obtain_font_resource(pte, &str1, &pdsubf);
491 if (code < 0)
492 return code;
493 cid = pdf_find_glyph(pdsubf, glyph);
494 if (cid == GS_NO_CHAR) {
495 code = pdf_make_font3_resource(pdev, subfont, &pdsubf);
496 if (code < 0)
497 return code;
498 code = pdf_attach_font_resource(pdev, subfont, pdsubf);
499 if (code < 0)
500 return code;
501 cid = 0;
502 }
503 break;
504 }
505 default:
506 /* An unsupported case, fall back to default implementation. */
507 return_error(gs_error_rangecheck);
508 }
509 code = pdf_attached_font_resource(pdev, (gs_font *)subfont, &pdsubf,
510 &glyph_usage, &real_widths, &char_cache_size, &width_cache_size);
511 if (code < 0)
512 return code;
513 if (break_index > start_index && pdev->charproc_just_accumulated)
514 break;
515 if ((subfont->FontType == ft_user_defined || subfont->FontType == ft_PDF_user_defined )&&
516 (break_index > start_index || !pdev->charproc_just_accumulated) &&
517 !(pdsubf->u.simple.s.type3.cached[cid >> 3] & (0x80 >> (cid & 7)))) {
518 if (subfont0 && subfont0->FontType != ft_user_defined && subfont0->FontType != ft_PDF_user_defined)
519 /* This is hacky. By pretending to be in a type 3 font doing a charpath we force
520 * text handling to fall right back to bitmap glyphs. This is because we can't handle
521 * CIDFonts with mixed type 1/3 descendants. Ugly but it produces correct output for
522 * what is after all a dumb setup.
523 */
524 pdev->type3charpath = 1;
525 pte->current_font = subfont;
526 return_error(gs_error_undefined);
527 }
528 if (subfont->FontType == ft_encrypted || subfont->FontType == ft_encrypted2) {
529 font_change = (subfont != subfont0 && subfont0 != NULL);
530 if (font_change) {
531 saved_subfont = subfont;
532 subfont = subfont0;
533 num_type1_glyphs--;
534 }
535 } else
536 font_change = (pdsubf != pdsubf0 && pdsubf0 != NULL);
537 if (!font_change) {
538 pdsubf0 = pdsubf;
539 font_index0 = font_index;
540 subfont0 = subfont;
541 }
542 if (subfont->FontType != ft_encrypted && subfont->FontType != ft_encrypted2) {
543 pfd = pdsubf->FontDescriptor;
544 code = pdf_resize_resource_arrays(pdev, pdsubf, cid + 1);
545 if (code < 0)
546 return code;
547 if (subfont->FontType == ft_CID_encrypted || subfont->FontType == ft_CID_TrueType) {
548 if (cid >=width_cache_size) {
549 /* fixme: we add the CID=0 glyph as CID=cid glyph to the output font.
550 Really it must not add and leave the CID undefined. */
551 cid = 0; /* notdef. */
552 }
553 }
554 if (cid >= char_cache_size || cid >= width_cache_size)
555 return_error(gs_error_unregistered); /* Must not happen */
556 if (pdsubf->FontType == ft_user_defined || pdsubf->FontType == ft_PDF_user_defined || pdsubf->FontType == ft_encrypted ||
557 pdsubf->FontType == ft_encrypted2) {
558 } else {
559 pdf_font_resource_t *pdfont;
560 bool notdef_subst = false;
561
562 code = pdf_obtain_cidfont_widths_arrays(pdev, pdsubf, wmode, &w, &w0, &v);
563 if (code < 0)
564 return code;
565 code = pdf_obtain_parent_type0_font_resource(pdev, pdsubf, font_index,
566 &font->data.CMap->CMapName, &pdfont);
567 if (code < 0)
568 return code;
569 if (pdf_is_CID_font(subfont)) {
570 /* Some Pscript5 output has non-identity mappings between character code and CID
571 * and the GlyphNames2Unicode dictionary uses character codes, not glyph names. So
572 * if we detect ths condition we cheat and claim not to be a CIDFont, so that the
573 * decode_glyph procedure can use the character code to look up the GlyphNames2Unicode
574 * dictionary. See bugs #696021, #688768 and #687954 for examples of the various ways
575 * this code can be exercised.
576 */
577 if (chr == glyph - GS_MIN_CID_GLYPH)
578 code = subfont->procs.decode_glyph((gs_font *)subfont, glyph, -1, NULL, 0);
579 else
580 code = subfont->procs.decode_glyph((gs_font *)subfont, glyph, chr, NULL, 0);
581 if (code != 0)
582 /* Since PScript5.dll creates GlyphNames2Unicode with character codes
583 instead CIDs, and with the WinCharSetFFFF-H2 CMap
584 character codes appears different than CIDs (Bug 687954),
585 pass the character code intead the CID. */
586 code = pdf_add_ToUnicode(pdev, subfont, pdfont,
587 chr + GS_MIN_CID_GLYPH, chr, NULL);
588 else {
589 /* If we interpret a PDF document, ToUnicode
590 CMap may be attached to the Type 0 font. */
591 code = pdf_add_ToUnicode(pdev, pte->orig_font, pdfont,
592 chr + GS_MIN_CID_GLYPH, chr, NULL);
593 }
594 }
595 else
596 code = pdf_add_ToUnicode(pdev, subfont, pdfont, glyph, cid, NULL);
597 if (code < 0)
598 return code;
599 /* We can't check pdsubf->used[cid >> 3] here,
600 because it mixed data for different values of WMode.
601 Perhaps pdf_font_used_glyph returns fast with reused glyphs.
602 */
603 code = pdf_font_used_glyph(pfd, glyph, (gs_font_base *)subfont);
604 if (code == gs_error_rangecheck) {
605 if (!(pdsubf->used[cid >> 3] & (0x80 >> (cid & 7)))) {
606 char buf[gs_font_name_max + 1];
607 int l = min(sizeof(buf) - 1, subfont->font_name.size);
608
609 memcpy(buf, subfont->font_name.chars, l);
610 buf[l] = 0;
611 emprintf3(pdev->memory,
612 "Missing glyph CID=%d, glyph=%04x in the font %s . The output PDF may fail with some viewers.\n",
613 (int)cid,
614 (unsigned int)(glyph - GS_MIN_CID_GLYPH),
615 buf);
616 pdsubf->used[cid >> 3] |= 0x80 >> (cid & 7);
617 if (pdev->PDFA != 0) {
618 switch (pdev->PDFACompatibilityPolicy) {
619 /* Default behaviour matches Adobe Acrobat, warn and continue,
620 * output file will not be PDF/A compliant
621 */
622 case 0:
623 case 1:
624 case 3:
625 emprintf(pdev->memory,
626 "All used glyphs mst be present in fonts for PDF/A, reverting to normal PDF output.\n");
627 pdev->AbortPDFAX = true;
628 pdev->PDFA = 0;
629 break;
630 case 2:
631 emprintf(pdev->memory,
632 "All used glyphs mst be present in fonts for PDF/A, aborting conversion.\n");
633 return_error(gs_error_invalidfont);
634 break;
635 default:
636 emprintf(pdev->memory,
637 "All used glyphs mst be present in fonts for PDF/A, unrecognised PDFACompatibilityLevel,\nreverting to normal PDF output\n");
638 pdev->AbortPDFAX = true;
639 pdev->PDFA = 0;
640 break;
641 }
642 }
643 }
644 cid = 0, code = 1; /* undefined glyph. */
645 notdef_subst = true;
646 /* If this is the first use of CID=0, get its width */
647 if (pdsubf->Widths[cid] == 0) {
648 pdf_glyph_widths_t widths;
649
650 code = pdf_glyph_widths(pdsubf, wmode, glyph, (gs_font *)subfont, &widths,
651 pte->cdevproc_callout ? pte->cdevproc_result : NULL);
652 }
653 } else if (code < 0)
654 return code;
655 if (glyph == GS_MIN_CID_GLYPH && pdev->PDFA != 0) {
656 switch (pdev->PDFACompatibilityPolicy) {
657 case 0:
658 case 1:
659 case 3:
660 emprintf(pdev->memory,
661 "A CIDFont uses CID 0, which is not legal for PDF/A, reverting to normal PDF output.\n");
662 pdev->AbortPDFAX = true;
663 pdev->PDFA = 0;
664 break;
665 case 2:
666 emprintf(pdev->memory,
667 "A CIDFont uses CID 0, which is not legal for PDF/A, aborting conversion.\n");
668 return_error(gs_error_invalidfont);
669 break;
670 default:
671 emprintf(pdev->memory,
672 "A CIDFont uses CID 0, which is not legal for PDF/A, unrecognised PDFACompatibilityLevel,\nreverting to normal PDF output\n");
673 pdev->AbortPDFAX = true;
674 pdev->PDFA = 0;
675 break;
676 }
677 }
678 if ((code == 0 /* just copied */ || pdsubf->Widths[cid] == 0) && !notdef_subst) {
679 pdf_glyph_widths_t widths;
680
681 code = pdf_glyph_widths(pdsubf, wmode, glyph, (gs_font *)subfont, &widths,
682 pte->cdevproc_callout ? pte->cdevproc_result : NULL);
683 if (code < 0)
684 return code;
685 if (code == TEXT_PROCESS_CDEVPROC) {
686 pte->returned.current_glyph = glyph;
687 pte->current_font = subfont;
688 rcode = TEXT_PROCESS_CDEVPROC;
689 break;
690 }
691 if (code >= 0) {
692 if (cid > pdsubf->count)
693 return_error(gs_error_unregistered); /* Must not happen. */
694 w[cid] = widths.Width.w;
695 if (v != NULL) {
696 v[cid * 2 + 0] = widths.Width.v.x;
697 v[cid * 2 + 1] = widths.Width.v.y;
698 }
699 real_widths[cid] = widths.real_width.w;
700 }
701 if (wmode) {
702 /* Since AR5 use W or DW to compute the x-coordinate of
703 v-vector, comupte and store the glyph width for WMode 0. */
704 /* fixme : skip computing real_width here. */
705 code = pdf_glyph_widths(pdsubf, 0, glyph, (gs_font *)subfont, &widths,
706 pte->cdevproc_callout ? pte->cdevproc_result : NULL);
707 if (code < 0)
708 return code;
709 w0[cid] = widths.Width.w;
710 }
711 if (pdsubf->u.cidfont.CIDToGIDMap != 0) {
712 uint gid = 0;
713 gs_font_cid2 *subfont2 = (gs_font_cid2 *)subfont;
714
715 gid = subfont2->cidata.CIDMap_proc(subfont2, glyph);
716
717 /* If this is a TrueType CIDFont, check the GSUB table to see if there's
718 * a suitable substitute glyph.
719 */
720 if (subfont2->FontType == ft_CID_TrueType)
721 gid = subfont2->data.substitute_glyph_index_vertical((gs_font_type42 *)subfont, gid, subfont2->WMode, glyph);
722 pdsubf->u.cidfont.CIDToGIDMap[cid] = gid;
723 }
724 }
725 if (wmode)
726 pdsubf->u.cidfont.used2[cid >> 3] |= 0x80 >> (cid & 7);
727 }
728 pdsubf->used[cid >> 3] |= 0x80 >> (cid & 7);
729 }
730 if (pte->cdevproc_callout) {
731 /* Only handle a single character because its width is stored
732 into pte->cdevproc_result, and process_text_modify_width neds it.
733 fixme: next time take from w, v, real_widths. */
734 break_index = scan.index;
735 break_xy_index = scan.xy_index;
736 break;
737 }
738 } while (!font_change);
739 if (break_index > index) {
740 pdf_font_resource_t *pdfont;
741 gs_matrix m3;
742 int xy_index_step = (!(pte->text.operation & TEXT_REPLACE_WIDTHS) ? 0 :
743 pte->text.x_widths == pte->text.y_widths ? 2 : 1);
744 gs_text_params_t save_text;
745
746 if (!subfont && num_type1_glyphs != 0)
747 subfont = subfont0;
748 if (subfont && (subfont->FontType == ft_encrypted || subfont->FontType == ft_encrypted2)) {
749 int save_op = pte->text.operation;
750 gs_font *save_font = pte->current_font;
751 const gs_glyph *save_data = pte->text.data.glyphs;
752
753 pte->current_font = subfont;
754 pte->text.operation |= TEXT_FROM_GLYPHS;
755 pte->text.data.glyphs = type1_glyphs;
756 str.data = ((const byte *)vbuf) + ((pte->text.size - pte->index) * sizeof(gs_glyph));
757 str.size = num_type1_glyphs;
758 code = pdf_obtain_font_resource_unencoded(pte, (const gs_string *)&str, &pdsubf0,
759 type1_glyphs);
760 if (code < 0) {
761 /* Replace the modified values, fall back to default implementation
762 * (type 3 bitmap image font)
763 */
764 pte->current_font = save_font;
765 pte->text.operation |= save_op;
766 pte->text.data.glyphs = save_data;
767 return(code);
768 }
769 memcpy((void *)scan.text.data.bytes, (void *)str.data, str.size);
770 str.data = scan.text.data.bytes;
771 pdsubf = pdsubf0;
772 pte->text.operation = save_op;
773 }
774 pte->current_font = subfont0;
775 if (!subfont0 || !pdsubf0)
776 /* This should be impossible */
777 return_error(gs_error_invalidfont);
778
779 code = gs_matrix_multiply(&subfont0->FontMatrix, &font->FontMatrix, &m3);
780 /* We thought that it should be gs_matrix_multiply(&font->FontMatrix, &subfont0->FontMatrix, &m3); */
781 if (code < 0)
782 return code;
783 if (pdsubf0->FontType == ft_user_defined || pdsubf0->FontType == ft_PDF_user_defined || pdsubf->FontType == ft_encrypted ||
784 pdsubf->FontType == ft_encrypted2)
785 pdfont = pdsubf0;
786 else {
787 code = pdf_obtain_parent_type0_font_resource(pdev, pdsubf0, font_index0,
788 &font->data.CMap->CMapName, &pdfont);
789 if (code < 0)
790 return code;
791 if (!pdfont->u.type0.Encoding_name[0]) {
792 /*
793 * If pdfont->u.type0.Encoding_name is set,
794 * a CMap resource is already attached.
795 * See attach_cmap_resource.
796 */
797 code = attach_cmap_resource(pdev, pdfont, font->data.CMap, font_index0);
798 if (code < 0)
799 return code;
800 }
801 }
802 pdf_set_text_wmode(pdev, font->WMode);
803 code = pdf_update_text_state(&text_state, (pdf_text_enum_t *)pte, pdfont, &m3);
804 if (code < 0)
805 return code;
806 /* process_text_modify_width breaks text parameters.
807 We would like to improve it someday.
808 Now save them locally and restore after the call. */
809 save_text = pte->text;
810 if (subfont && (subfont->FontType != ft_encrypted &&
811 subfont->FontType != ft_encrypted2)) {
812 /* If we are a type 1 descendant, we already sorted this out above */
813 str.data = scan.text.data.bytes + index;
814 str.size = break_index - index;
815 }
816 if (pte->text.operation & TEXT_REPLACE_WIDTHS) {
817 if (pte->text.x_widths != NULL)
818 pte->text.x_widths += xy_index * xy_index_step;
819 if (pte->text.y_widths != NULL)
820 pte->text.y_widths += xy_index * xy_index_step;
821 }
822 pte->xy_index = 0;
823 if (subfont && (subfont->FontType == ft_encrypted ||
824 subfont->FontType == ft_encrypted2)) {
825 gs_font *f = pte->orig_font;
826
827 adjust_first_last_char(pdfont, (byte *)str.data, str.size);
828
829 /* Make sure we use the descendant font, not the original type 0 ! */
830 pte->orig_font = subfont;
831 code = process_text_modify_width((pdf_text_enum_t *)pte,
832 (gs_font *)subfont, &text_state, &str, &wxy, type1_glyphs, false, scan.index - index);
833 if (code < 0)
834 return(code);
835 if(font_change) {
836 type1_glyphs[0] = type1_glyphs[num_type1_glyphs];
837 num_type1_glyphs = 1;
838 subfont = saved_subfont;
839 } else {
840 num_type1_glyphs = 0;
841 }
842 pte->orig_font = f;
843 } else {
844 code = process_text_modify_width((pdf_text_enum_t *)pte, (gs_font *)font,
845 &text_state, &str, &wxy, NULL, true, scan.index - index);
846 }
847 if (pte->text.operation & TEXT_REPLACE_WIDTHS) {
848 if (pte->text.x_widths != NULL)
849 pte->text.x_widths -= xy_index * xy_index_step;
850 if (pte->text.y_widths != NULL)
851 pte->text.y_widths -= xy_index * xy_index_step;
852 }
853 pte->text = save_text;
854 pte->cdevproc_callout = false;
855 if (code < 0) {
856 pte->index = index;
857 pte->xy_index = xy_index;
858 return code;
859 }
860 pte->index = break_index;
861 pte->xy_index = break_xy_index;
862 if (pdev->Eps2Write) {
863 gs_rect text_bbox;
864 gx_device_clip cdev;
865 gx_drawing_color devc;
866 fixed x0, y0, bx2, by2;
867
868 text_bbox.q.x = text_bbox.p.y = text_bbox.q.y = 0;
869 estimate_fontbbox(pte, (gs_font_base *)font, NULL, &text_bbox);
870 text_bbox.p.x = fixed2float(pte->origin.x);
871 text_bbox.q.x = text_bbox.p.x + wxy.x;
872
873 x0 = float2fixed(text_bbox.p.x);
874 y0 = float2fixed(text_bbox.p.y);
875 bx2 = float2fixed(text_bbox.q.x) - x0;
876 by2 = float2fixed(text_bbox.q.y) - y0;
877
878 pdev->AccumulatingBBox++;
879 gx_make_clip_device_on_stack(&cdev, pte->pcpath, (gx_device *)pdev);
880 set_nonclient_dev_color(&devc, gx_device_black((gx_device *)pdev)); /* any non-white color will do */
881 gx_default_fill_triangle((gx_device *) pdev, x0, y0,
882 float2fixed(text_bbox.p.x) - x0,
883 float2fixed(text_bbox.q.y) - y0,
884 bx2, by2, &devc, lop_default);
885 gx_default_fill_triangle((gx_device *) & cdev, x0, y0,
886 float2fixed(text_bbox.q.x) - x0,
887 float2fixed(text_bbox.p.y) - y0,
888 bx2, by2, &devc, lop_default);
889 pdev->AccumulatingBBox--;
890 }
891 code = pdf_shift_text_currentpoint(pte, &wxy);
892 if (code < 0)
893 return code;
894 }
895 pdf_text_release_cgp(pte);
896 index = break_index;
897 xy_index = break_xy_index;
898 if (done || rcode != 0)
899 break;
900 pdsubf0 = pdsubf;
901 font_index0 = font_index;
902 subfont0 = subfont;
903 }
904 pte->index = index;
905 pte->xy_index = xy_index;
906 return rcode;
907 }
908
909 int
process_cmap_text(gs_text_enum_t * penum,void * vbuf,uint bsize)910 process_cmap_text(gs_text_enum_t *penum, void *vbuf, uint bsize)
911 {
912 int code;
913 pdf_text_enum_t *pte = (pdf_text_enum_t *)penum;
914 byte *save;
915 uint start = pte->index;
916
917 if (pte->text.operation &
918 (TEXT_FROM_ANY - (TEXT_FROM_STRING | TEXT_FROM_BYTES))
919 )
920 return_error(gs_error_rangecheck);
921 if (pte->text.operation & TEXT_INTERVENE) {
922 /* Not implemented. (PostScript doesn't allow TEXT_INTERVENE.) */
923 return_error(gs_error_rangecheck);
924 }
925 /* scan_cmap_text has the unfortunate side effect of meddling with the
926 * text data in the enumerator. In general that's OK but in the case where
927 * the string is (eg) in a bound procedure, and we run that procedure more
928 * than once, the string is corrupted on the first use and then produces
929 * incorrect output for the subsequent use(s).
930 * The routine is, sadly, extremely convoluted so instead of trying to fix
931 * it so that it doesn't corrupt the string (which looks likely to be impossible
932 * without copying the string at some point) I've chosen to take a copy of the
933 * string here, and restore it after the call to scan_cmap_text.
934 * See bug #695322 and test file Bug691680.ps
935 */
936 save = (byte *)pte->text.data.bytes;
937 pte->text.data.bytes = gs_alloc_string(pte->memory, pte->text.size, "pdf_text_process");
938 memcpy((byte *)pte->text.data.bytes, save, pte->text.size);
939 code = scan_cmap_text(pte, vbuf);
940 gs_free_string(pte->memory, (byte *)pte->text.data.bytes, pte->text.size, "pdf_text_process");
941 pte->text.data.bytes = save;
942 pte->bytes_decoded = pte->index - start;
943
944 if (code == TEXT_PROCESS_CDEVPROC)
945 pte->cdevproc_callout = true;
946 else
947 pte->cdevproc_callout = false;
948 return code;
949 }
950
951 /* ---------------- CIDFont ---------------- */
952
953 /*
954 * Process a text string in a CIDFont. (Only glyphshow is supported.)
955 */
956 int
process_cid_text(gs_text_enum_t * pte,void * vbuf,uint bsize)957 process_cid_text(gs_text_enum_t *pte, void *vbuf, uint bsize)
958 {
959 pdf_text_enum_t *penum = (pdf_text_enum_t *)pte;
960 uint operation = pte->text.operation;
961 gs_text_enum_t save;
962 gs_font *scaled_font = pte->current_font; /* CIDFont */
963 gs_font *font; /* unscaled font (CIDFont) */
964 const gs_glyph *glyphs;
965 gs_matrix scale_matrix;
966 pdf_font_resource_t *pdsubf; /* CIDFont */
967 gs_font_type0 *font0 = NULL;
968 uint size;
969 int code;
970
971 if (operation & TEXT_FROM_GLYPHS) {
972 glyphs = pte->text.data.glyphs;
973 size = pte->text.size - pte->index;
974 } else if (operation & TEXT_FROM_SINGLE_GLYPH) {
975 glyphs = &pte->text.data.d_glyph;
976 size = 1;
977 } else if (operation & TEXT_FROM_STRING) {
978 glyphs = &pte->outer_CID;
979 size = 1;
980 } else
981 return_error(gs_error_rangecheck);
982
983 /*
984 * PDF doesn't support glyphshow directly: we need to create a Type 0
985 * font with an Identity CMap. Make sure all the glyph numbers fit
986 * into 16 bits. (Eventually we should support wider glyphs too,
987 * but this would require a different CMap.)
988 */
989 if (bsize < size * 2)
990 return_error(gs_error_unregistered); /* Must not happen. */
991 {
992 int i;
993 byte *pchars = vbuf;
994
995 for (i = 0; i < size; ++i) {
996 ulong gnum = glyphs[i] - GS_MIN_CID_GLYPH;
997
998 if (gnum & ~0xffffL)
999 return_error(gs_error_rangecheck);
1000 *pchars++ = (byte)(gnum >> 8);
1001 *pchars++ = (byte)gnum;
1002 }
1003 }
1004
1005 /* Find the original (unscaled) version of this font. */
1006
1007 for (font = scaled_font; font->base != font; )
1008 font = font->base;
1009 /* Compute the scaling matrix. */
1010 code = gs_matrix_invert(&font->FontMatrix, &scale_matrix);
1011 if (code < 0)
1012 return code;
1013 gs_matrix_multiply(&scale_matrix, &scaled_font->FontMatrix, &scale_matrix);
1014
1015 /* Find or create the CIDFont resource. */
1016
1017 code = pdf_obtain_font_resource(penum, NULL, &pdsubf);
1018 if (code < 0)
1019 return code;
1020
1021 /* Create the CMap and Type 0 font if they don't exist already. */
1022
1023 if (pdsubf->u.cidfont.glyphshow_font_id != 0)
1024 font0 = (gs_font_type0 *)gs_find_font_by_id(font->dir,
1025 pdsubf->u.cidfont.glyphshow_font_id, &scaled_font->FontMatrix);
1026 if (font0 == NULL) {
1027 code = gs_font_type0_from_cidfont(&font0, font, font->WMode,
1028 &scale_matrix, font->memory);
1029 if (code < 0)
1030 return code;
1031 pdsubf->u.cidfont.glyphshow_font_id = font0->id;
1032 }
1033
1034 /* Now handle the glyphshow as a show in the Type 0 font. */
1035
1036 save = *pte;
1037 pte->current_font = pte->orig_font = (gs_font *)font0;
1038 /* Patch the operation temporarily for init_fstack. */
1039 pte->text.operation = (operation & ~TEXT_FROM_ANY) | TEXT_FROM_BYTES;
1040 /* Patch the data for process_cmap_text. */
1041 pte->text.data.bytes = vbuf;
1042 pte->text.size = size * 2;
1043 pte->index = 0;
1044 gs_type0_init_fstack(pte, pte->current_font);
1045 code = process_cmap_text(pte, vbuf, bsize);
1046 pte->current_font = scaled_font;
1047 pte->orig_font = save.orig_font;
1048 pte->text = save.text;
1049 pte->index = save.index + pte->index / 2;
1050 pte->fstack = save.fstack;
1051 return code;
1052 }
1053