1 /* Copyright (C) 2001-2019 Artifex Software, Inc.
2    All Rights Reserved.
3 
4    This software is provided AS-IS with no warranty, either express or
5    implied.
6 
7    This software is distributed under license and may not be copied,
8    modified or distributed except as expressly authorized under the terms
9    of the license contained in the file LICENSE in this distribution.
10 
11    Refer to licensing information at http://www.artifex.com or contact
12    Artifex Software, Inc.,  1305 Grant Avenue - Suite 200, Novato,
13    CA 94945, U.S.A., +1(415)492-9861, for further information.
14 */
15 
16 
17 /* Composite and CID-based text processing for pdfwrite. */
18 #include "memory_.h"
19 #include "gx.h"
20 #include "gserrors.h"
21 #include "gxfcmap.h"
22 #include "gxfont.h"
23 #include "gxfont0.h"
24 #include "gxfont0c.h"
25 #include "gzpath.h"
26 #include "gxchar.h"
27 #include "gdevpsf.h"
28 #include "gdevpdfx.h"
29 #include "gdevpdtx.h"
30 #include "gdevpdtd.h"
31 #include "gdevpdtf.h"
32 #include "gdevpdts.h"
33 #include "gdevpdtt.h"
34 
35 #include "gximage.h"
36 #include "gxcpath.h"
37 /* ---------------- Non-CMap-based composite font ---------------- */
38 
39 /*
40  * Process a text string in a composite font with FMapType != 9 (CMap).
41  */
42 int
process_composite_text(gs_text_enum_t * pte,void * vbuf,uint bsize)43 process_composite_text(gs_text_enum_t *pte, void *vbuf, uint bsize)
44 {
45     byte *const buf = vbuf;
46     pdf_text_enum_t *const penum = (pdf_text_enum_t *)pte;
47     int code = 0;
48     gs_string str;
49     pdf_text_process_state_t text_state;
50     pdf_text_enum_t curr, prev, out;
51     gs_point total_width;
52     const gs_matrix *psmat = 0;
53     gs_font *prev_font = 0;
54     gs_char chr, char_code = 0x0badf00d, space_char = GS_NO_CHAR;
55     int buf_index = 0;
56     bool return_width = (penum->text.operation & TEXT_RETURN_WIDTH);
57 
58     str.data = buf;
59     if (return_width) {
60         code = gx_path_current_point(penum->path, &penum->origin);
61         if (code < 0)
62             return code;
63     }
64     if (pte->text.operation &
65         (TEXT_FROM_ANY - (TEXT_FROM_STRING | TEXT_FROM_BYTES))
66         )
67         return_error(gs_error_rangecheck);
68     if (pte->text.operation & TEXT_INTERVENE) {
69         /* Not implemented. (PostScript doesn't even allow this case.) */
70         return_error(gs_error_rangecheck);
71     }
72     total_width.x = total_width.y = 0;
73     curr = *penum;
74     prev = curr;
75     out = curr;
76     out.current_font = 0;
77     /* Scan runs of characters in the same leaf font. */
78     for ( ; ; ) {
79         int font_code;
80         gs_font *new_font = 0;
81 
82         gs_text_enum_copy_dynamic((gs_text_enum_t *)&out,
83                                   (gs_text_enum_t *)&curr, false);
84         for (;;) {
85             gs_glyph glyph;
86 
87             gs_text_enum_copy_dynamic((gs_text_enum_t *)&prev,
88                                       (gs_text_enum_t *)&curr, false);
89             font_code = pte->orig_font->procs.next_char_glyph
90                 ((gs_text_enum_t *)&curr, &chr, &glyph);
91             /*
92              * We check for a font change by comparing the current
93              * font, rather than testing the return code, because
94              * it makes the control structure a little simpler.
95              */
96             switch (font_code) {
97             case 0:		/* no font change */
98             case 1:		/* font change */
99                 curr.returned.current_char = chr;
100                 char_code = gx_current_char((gs_text_enum_t *)&curr);
101                 new_font = curr.fstack.items[curr.fstack.depth].font;
102                 if (new_font != prev_font)
103                     break;
104                 if (chr != (byte)chr)	/* probably can't happen */
105                     return_error(gs_error_rangecheck);
106                 if (buf_index >= bsize)
107                     return_error(gs_error_unregistered); /* Must not happen. */
108                 buf[buf_index] = (byte)chr;
109                 buf_index++;
110                 prev_font = new_font;
111                 psmat = &curr.fstack.items[curr.fstack.depth - 1].font->FontMatrix;
112                 if ((pte->text.operation & TEXT_ADD_TO_SPACE_WIDTH) &&
113                         pte->text.space.s_char == char_code)
114                     space_char = chr;
115                 continue;
116             case 2:		/* end of string */
117                 break;
118             default:	/* error */
119                 return font_code;
120             }
121             break;
122         }
123         str.size = buf_index;
124         if (buf_index) {
125             /* buf_index == 0 is only possible the very first time. */
126             /*
127              * The FontMatrix of leaf descendant fonts is not updated
128              * by scalefont.  Compute the effective FontMatrix now.
129              */
130             gs_matrix fmat;
131 
132             /* set up the base font : */
133             out.fstack.depth = 0;
134             out.fstack.items[out.fstack.depth].font = out.current_font = prev_font;
135             pte->current_font = prev_font;
136 
137             /* Provide the decoded space character : */
138             out.text.space.s_char = space_char;
139 
140             gs_matrix_multiply(&prev_font->FontMatrix, psmat, &fmat);
141             out.index = 0; /* Note : we don't reset out.xy_index here. */
142             code = pdf_process_string_aux(&out, &str, NULL, &fmat, &text_state);
143             if (code < 0) {
144                 if (code == gs_error_undefined && new_font && new_font->FontType == ft_encrypted2)
145                 /* Caused by trying to make a CFF font resource for ps2write, which doesn't support CFF, abort now! */
146 		  return_error(gs_error_invalidfont);
147                 return code;
148             }
149             curr.xy_index = out.xy_index; /* pdf_encode_process_string advanced it. */
150             if (out.index < str.size) {
151                 gs_glyph glyph;
152 
153                 /* Advance *pte exactly for out.index chars,
154                    because above we stored bytes into buf. */
155                 while (out.index--)
156                     pte->orig_font->procs.next_char_glyph(pte, &chr, &glyph);
157                 font_code = 2; /* force exiting the loop */
158             } else {
159                 /* advance *pte past the current substring */
160                 gs_text_enum_copy_dynamic(pte, (gs_text_enum_t *)&prev, true);
161             }
162             pte->xy_index = out.xy_index;
163             if (return_width) {
164                 /* This is silly, but its a consequence of the way pdf_process_string
165                  * works. When we have TEXT_DO_NONE (stringwidth) we add the width of the
166                  * glyph(s) to the enumerator 'returned.total_width' so we keep track
167                  * of the total width as we go. However when we are returning the width
168                  * but its NOT for a stringwidth, we set the enumerator 'retuerned'
169                  * value to just the width of the glyph(s) processed. So when we are *not*
170                  * handling a stringwidth we need to keep track of the total width
171                  * ourselves. I'd have preferred to alter pdf_process_string, but that
172                  * is used in many other places, and those places rely on this behaviour.
173                  */
174                 if (pte->text.operation & TEXT_DO_NONE) {
175                     pte->returned.total_width.x = total_width.x = out.returned.total_width.x;
176                     pte->returned.total_width.y = total_width.y = out.returned.total_width.y;
177                 } else {
178                     pte->returned.total_width.x = total_width.x +=
179                         out.returned.total_width.x;
180                     pte->returned.total_width.y = total_width.y +=
181                         out.returned.total_width.y;
182                 }
183             }
184             pdf_text_release_cgp(penum);
185         }
186         if (font_code == 2)
187             break;
188         buf[0] = (byte)chr;
189         buf_index = 1;
190         space_char = ((pte->text.operation & TEXT_ADD_TO_SPACE_WIDTH) &&
191                       pte->text.space.s_char == char_code ? chr : ~0);
192         psmat = &curr.fstack.items[curr.fstack.depth - 1].font->FontMatrix;
193         prev_font = new_font;
194     }
195     if (!return_width)
196         return 0;
197     return pdf_shift_text_currentpoint(penum, &total_width);
198 }
199 
200 /* ---------------- CMap-based composite font ---------------- */
201 
202 /*
203  * Process a text string in a composite font with FMapType == 9 (CMap).
204  */
205 static const char *const standard_cmap_names[] = {
206     /* The following were added in PDF 1.5. */
207 
208     "UniGB-UTF16-H", "UniGB-UTF16-V",
209 
210     "GBKp-EUC-H", "GBKp-EUC-V",
211     "HKscs-B5-H", "HKscs-B5-V",
212     "UniCNS-UTF16-H", "UniCNS-UTF16-V",
213     "UniJIS-UTF16-H", "UniJIS-UTF16-V",
214     "UniKS-UTF16-H", "UniKS-UTF16-V",
215 #define END_PDF15_CMAP_NAMES_INDEX 12
216     /* The following were added in PDF 1.4. */
217     "GBKp-EUC-H", "GBKp-EUC-V",
218     "GBK2K-H", "GBK2K-V",
219     "HKscs-B5-H", "HKscs-B5-V",
220 #define END_PDF14_CMAP_NAMES_INDEX 18
221     /* The following were added in PDF 1.3. */
222 
223     "GBpc-EUC-V",
224     "GBK-EUC-H", "GBK-EUC-V",
225     "UniGB-UCS2-H", "UniGB-UCS2-V",
226 
227     "ETenms-B5-H", "ETenms-B5-V",
228 
229     "UniCNS-UCS2-H", "UniCNS-UCS2-V",
230 
231     "90msp-RKSJ-H", "90msp-RKSJ-V",
232     "EUC-H", "EUC-V",
233     "UniJIS-UCS2-H", "UniJIS-UCS2-V",
234     "UniJIS-UCS2-HW-H", "UniJIS-UCS2-HW-V",
235 
236     "KSCms-UHC-HW-H", "KSCms-UHC-HW-V",
237     "UniKS-UCS2-H", "UniKS-UCS2-V",
238 
239 #define END_PDF13_CMAP_NAMES_INDEX 39
240     /* The following were added in PDF 1.2. */
241 
242     "GB-EUC-H", "GB-EUC-V",
243     "GBpc-EUC-H",
244 
245     "B5pc-H", "B5pc-V",
246     "ETen-B5-H", "ETen-B5-V",
247     "CNS-EUC-H", "CNS-EUC-V",
248 
249     "83pv-RKSJ-H",
250     "90ms-RKSJ-H", "90ms-RKSJ-V",
251     "90pv-RKSJ-H",
252     "Add-RKSJ-H", "Add-RKSJ-V",
253     "Ext-RKSJ-H", "Ext-RKSJ-V",
254     "H", "V",
255 
256     "KSC-EUC-H", "KSC-EUC-V",
257     "KSCms-UHC-H", "KSCms-UHC-V",
258     "KSCpc-EUC-H",
259 
260     "Identity-H", "Identity-V",
261 
262     0
263 };
264 
265 static int
attach_cmap_resource(gx_device_pdf * pdev,pdf_font_resource_t * pdfont,const gs_cmap_t * pcmap,int font_index_only)266 attach_cmap_resource(gx_device_pdf *pdev, pdf_font_resource_t *pdfont,
267                 const gs_cmap_t *pcmap, int font_index_only)
268 {
269     const char *const *pcmn =
270         standard_cmap_names +
271         (pdev->CompatibilityLevel < 1.3 ? END_PDF13_CMAP_NAMES_INDEX :
272          pdev->CompatibilityLevel < 1.4 ? END_PDF14_CMAP_NAMES_INDEX :
273          pdev->CompatibilityLevel < 1.5 ? END_PDF15_CMAP_NAMES_INDEX : 0);
274     bool is_identity = false;
275     pdf_resource_t *pcmres = 0;	/* CMap */
276     int code;
277 
278     /* Make sure cmap names is properly initialised. Silences Coverity warning */
279     if (!pcmn)
280         return_error(gs_error_unknownerror);
281 
282     /*
283      * If the CMap isn't standard, write it out if necessary.
284      */
285     for (; *pcmn != 0; ++pcmn)
286         if (pcmap->CMapName.size == strlen(*pcmn) &&
287             !memcmp(*pcmn, pcmap->CMapName.data, pcmap->CMapName.size))
288             break;
289 
290     /* For PDF/A we need to write out all non-identity CMaps
291      * first force the identity check.
292      */
293     if (*pcmn == 0 || pdev->PDFA != 0) {
294         /*
295          * PScript5.dll Version 5.2 creates identity CMaps with
296          * instandard name. Check this specially here
297          * and later replace with a standard name.
298          * This is a temporary fix for SF bug #615994 "CMAP is corrupt".
299          */
300         is_identity = gs_cmap_is_identity(pcmap, font_index_only);
301     }
302     /* If the CMap is non-standard, or we are producing PDF/A, and its not
303      * an Identity CMap, then we need to emit it.
304      */
305     if ((*pcmn == 0  || pdev->PDFA != 0) && !is_identity) {		/* not standard */
306         pcmres = pdf_find_resource_by_gs_id(pdev, resourceCMap, pcmap->id + font_index_only);
307         if (pcmres == 0) {
308             /* Create and write the CMap object. */
309             code = pdf_cmap_alloc(pdev, pcmap, &pcmres, font_index_only);
310             if (code < 0)
311                 return code;
312         }
313     }
314     if (pcmap->from_Unicode) {
315         gs_cmap_ranges_enum_t renum;
316 
317         gs_cmap_ranges_enum_init(pcmap, &renum);
318         if (gs_cmap_enum_next_range(&renum) == 0 && renum.range.size == 2 &&
319             gs_cmap_enum_next_range(&renum) == 1) {
320             /*
321              * Exactly one code space range, of size 2.  Add an identity
322              * ToUnicode CMap.
323              */
324             if (!pdev->Identity_ToUnicode_CMaps[pcmap->WMode]) {
325                 /* Create and write an identity ToUnicode CMap now. */
326                 gs_cmap_t *pidcmap;
327 
328                 code = gs_cmap_create_char_identity(&pidcmap, 2, pcmap->WMode,
329                                                     pdev->memory);
330                 if (code < 0)
331                     return code;
332                 pidcmap->CMapType = 2;	/* per PDF Reference */
333                 pidcmap->ToUnicode = true;
334                 code = pdf_cmap_alloc(pdev, pidcmap,
335                                 &pdev->Identity_ToUnicode_CMaps[pcmap->WMode], -1);
336                 if (code < 0)
337                     return code;
338             }
339             pdfont->res_ToUnicode = pdev->Identity_ToUnicode_CMaps[pcmap->WMode];
340         }
341     }
342     if (pcmres || is_identity) {
343         uint size = pcmap->CMapName.size;
344         byte *chars = gs_alloc_string(pdev->pdf_memory, size,
345                                       "pdf_font_resource_t(CMapName)");
346 
347         if (chars == 0)
348             return_error(gs_error_VMerror);
349         memcpy(chars, pcmap->CMapName.data, size);
350         if (is_identity)
351             strcpy(pdfont->u.type0.Encoding_name,
352                     (pcmap->WMode ? "/Identity-V" : "/Identity-H"));
353         else
354             gs_sprintf(pdfont->u.type0.Encoding_name, "%ld 0 R",
355                     pdf_resource_id(pcmres));
356         pdfont->u.type0.CMapName.data = chars;
357         pdfont->u.type0.CMapName.size = size;
358     } else {
359         if (!*pcmn)
360             /* Should not be possible, if *pcmn is NULL then either
361              * is_identity is true or we create pcmres.
362              */
363             return_error(gs_error_invalidfont);
364 
365         gs_sprintf(pdfont->u.type0.Encoding_name, "/%s", *pcmn);
366         pdfont->u.type0.CMapName.data = (const byte *)*pcmn;
367         pdfont->u.type0.CMapName.size = strlen(*pcmn);
368         pdfont->u.type0.cmap_is_standard = true;
369     }
370     pdfont->u.type0.WMode = pcmap->WMode;
371     return 0;
372 }
373 
estimate_fontbbox(pdf_text_enum_t * pte,gs_font_base * font,const gs_matrix * pfmat,gs_rect * text_bbox)374 static int estimate_fontbbox(pdf_text_enum_t *pte, gs_font_base *font,
375                           const gs_matrix *pfmat,
376                           gs_rect *text_bbox)
377 {
378     gs_matrix m;
379     gs_point p0, p1, p2, p3;
380 
381     if (font->FontBBox.p.x == font->FontBBox.q.x ||
382         font->FontBBox.p.y == font->FontBBox.q.y)
383         return_error(gs_error_undefined);
384     if (pfmat == 0)
385         pfmat = &font->FontMatrix;
386     m = ctm_only(pte->pgs);
387     m.tx = fixed2float(pte->origin.x);
388     m.ty = fixed2float(pte->origin.y);
389     gs_matrix_multiply(pfmat, &m, &m);
390 
391     gs_point_transform(font->FontBBox.p.x, font->FontBBox.p.y, &m, &p0);
392     gs_point_transform(font->FontBBox.p.x, font->FontBBox.q.y, &m, &p1);
393     gs_point_transform(font->FontBBox.q.x, font->FontBBox.p.y, &m, &p2);
394     gs_point_transform(font->FontBBox.q.x, font->FontBBox.q.y, &m, &p3);
395     text_bbox->p.x = min(min(p0.x, p1.x), min(p1.x, p2.x));
396     text_bbox->p.y = min(min(p0.y, p1.y), min(p1.y, p2.y));
397     text_bbox->q.x = max(max(p0.x, p1.x), max(p1.x, p2.x));
398     text_bbox->q.y = max(max(p0.y, p1.y), max(p1.y, p2.y));
399 
400     return 0;
401 }
402 
403 /* Record widths and CID => GID mappings. */
404 static int
scan_cmap_text(pdf_text_enum_t * pte,void * vbuf)405 scan_cmap_text(pdf_text_enum_t *pte, void *vbuf)
406 {
407     gx_device_pdf *pdev = (gx_device_pdf *)pte->dev;
408     /* gs_font_type0 *const font = (gs_font_type0 *)pte->current_font;*/ /* Type 0, fmap_CMap */
409     gs_font_type0 *const font = (gs_font_type0 *)pte->orig_font; /* Type 0, fmap_CMap */
410     /* Not sure. Changed for CDevProc callout. Was pte->current_font */
411     gs_text_enum_t scan = *(gs_text_enum_t *)pte;
412     int wmode = font->WMode, code, rcode = 0;
413     pdf_font_resource_t *pdsubf0 = NULL;
414     gs_font *subfont0 = NULL, *saved_subfont = NULL;
415     uint index = scan.index, xy_index = scan.xy_index, start_index = index;
416     uint font_index0 = 0x7badf00d;
417     bool done = false;
418     pdf_char_glyph_pairs_t p;
419     gs_glyph *type1_glyphs = (gs_glyph *)vbuf;
420     int num_type1_glyphs = 0;
421 
422     p.num_all_chars = 1;
423     p.num_unused_chars = 1;
424     p.unused_offset = 0;
425     pte->returned.total_width.x = pte->returned.total_width.y = 0;;
426     for (;;) {
427         uint break_index, break_xy_index;
428         uint font_index = 0x7badf00d;
429         gs_const_string str;
430         pdf_text_process_state_t text_state;
431         pdf_font_resource_t *pdsubf;
432         gs_font *subfont = NULL;
433         gs_point wxy;
434         bool font_change = 0;
435 
436         code = gx_path_current_point(pte->path, &pte->origin);
437         if (code < 0)
438             return code;
439         do {
440             gs_char chr;
441             gs_glyph glyph;
442             pdf_font_descriptor_t *pfd;
443             byte *glyph_usage;
444             double *real_widths, *w, *v, *w0;
445             int char_cache_size, width_cache_size;
446             gs_char cid;
447 
448             break_index = scan.index;
449             break_xy_index = scan.xy_index;
450             code = font->procs.next_char_glyph(&scan, &chr, &glyph);
451             if (code == 2) {		/* end of string */
452                 done = true;
453                 break;
454             }
455             if (code < 0)
456                 return code;
457             subfont = scan.fstack.items[scan.fstack.depth].font;
458             font_index = scan.fstack.items[scan.fstack.depth - 1].index;
459             scan.xy_index++;
460             if (glyph == GS_NO_GLYPH)
461                 glyph = GS_MIN_CID_GLYPH;
462             cid = glyph - GS_MIN_CID_GLYPH;
463             switch (subfont->FontType) {
464                 case ft_encrypted:
465                 case ft_encrypted2:{
466                     if (glyph == GS_MIN_CID_GLYPH) {
467                         glyph = subfont->procs.encode_char(subfont, chr, GLYPH_SPACE_NAME);
468                     }
469                     type1_glyphs[num_type1_glyphs] = glyph;
470                     num_type1_glyphs++;
471                     break;
472                 }
473                 case ft_CID_encrypted:
474                 case ft_CID_TrueType: {
475                     p.s[0].glyph = glyph;
476                     p.s[0].chr = cid;
477                     code = pdf_obtain_cidfont_resource(pdev, subfont, &pdsubf, &p);
478                     if (code < 0)
479                         return code;
480                     break;
481                 }
482                 case ft_user_defined:
483                 case ft_PDF_user_defined:
484                 {
485                     gs_string str1;
486 
487                     str1.data = NULL;
488                     str1.size = 0;
489                     pte->current_font = subfont;
490                     code = pdf_obtain_font_resource(pte, &str1, &pdsubf);
491                     if (code < 0)
492                         return code;
493                     cid = pdf_find_glyph(pdsubf, glyph);
494                     if (cid == GS_NO_CHAR) {
495                         code = pdf_make_font3_resource(pdev, subfont, &pdsubf);
496                         if (code < 0)
497                             return code;
498                         code = pdf_attach_font_resource(pdev, subfont, pdsubf);
499                         if (code < 0)
500                             return code;
501                         cid = 0;
502                     }
503                     break;
504                 }
505                 default:
506                     /* An unsupported case, fall back to default implementation. */
507                     return_error(gs_error_rangecheck);
508             }
509             code = pdf_attached_font_resource(pdev, (gs_font *)subfont, &pdsubf,
510                                        &glyph_usage, &real_widths, &char_cache_size, &width_cache_size);
511             if (code < 0)
512                 return code;
513             if (break_index > start_index && pdev->charproc_just_accumulated)
514                 break;
515             if ((subfont->FontType == ft_user_defined || subfont->FontType == ft_PDF_user_defined )&&
516                 (break_index > start_index || !pdev->charproc_just_accumulated) &&
517                 !(pdsubf->u.simple.s.type3.cached[cid >> 3] & (0x80 >> (cid & 7)))) {
518                 if (subfont0 && subfont0->FontType != ft_user_defined && subfont0->FontType != ft_PDF_user_defined)
519                     /* This is hacky. By pretending to be in a type 3 font doing a charpath we force
520                      * text handling to fall right back to bitmap glyphs. This is because we can't handle
521                      * CIDFonts with mixed type 1/3 descendants. Ugly but it produces correct output for
522                      * what is after all a dumb setup.
523                      */
524                     pdev->type3charpath = 1;
525                 pte->current_font = subfont;
526                 return_error(gs_error_undefined);
527             }
528             if (subfont->FontType == ft_encrypted || subfont->FontType == ft_encrypted2) {
529                 font_change = (subfont != subfont0 && subfont0 != NULL);
530                 if (font_change) {
531                     saved_subfont = subfont;
532                     subfont = subfont0;
533                     num_type1_glyphs--;
534                 }
535             } else
536                 font_change = (pdsubf != pdsubf0 && pdsubf0 != NULL);
537             if (!font_change) {
538                 pdsubf0 = pdsubf;
539                 font_index0 = font_index;
540                 subfont0 = subfont;
541             }
542             if (subfont->FontType != ft_encrypted && subfont->FontType != ft_encrypted2) {
543                 pfd = pdsubf->FontDescriptor;
544                 code = pdf_resize_resource_arrays(pdev, pdsubf, cid + 1);
545                 if (code < 0)
546                     return code;
547                 if (subfont->FontType == ft_CID_encrypted || subfont->FontType == ft_CID_TrueType) {
548                     if (cid >=width_cache_size) {
549                         /* fixme: we add the CID=0 glyph as CID=cid glyph to the output font.
550                            Really it must not add and leave the CID undefined. */
551                         cid = 0; /* notdef. */
552                     }
553                 }
554                 if (cid >= char_cache_size || cid >= width_cache_size)
555                     return_error(gs_error_unregistered); /* Must not happen */
556                 if (pdsubf->FontType == ft_user_defined  || pdsubf->FontType == ft_PDF_user_defined  || pdsubf->FontType == ft_encrypted ||
557                                 pdsubf->FontType == ft_encrypted2) {
558                 } else {
559                     pdf_font_resource_t *pdfont;
560                     bool notdef_subst = false;
561 
562                     code = pdf_obtain_cidfont_widths_arrays(pdev, pdsubf, wmode, &w, &w0, &v);
563                     if (code < 0)
564                         return code;
565                     code = pdf_obtain_parent_type0_font_resource(pdev, pdsubf, font_index,
566                         &font->data.CMap->CMapName, &pdfont);
567                     if (code < 0)
568                         return code;
569                     if (pdf_is_CID_font(subfont)) {
570                         /* Some Pscript5 output has non-identity mappings between character code and CID
571                          * and the GlyphNames2Unicode dictionary uses character codes, not glyph names. So
572                          * if we detect ths condition we cheat and claim not to be a CIDFont, so that the
573                          * decode_glyph procedure can use the character code to look up the GlyphNames2Unicode
574                          * dictionary. See bugs #696021, #688768 and #687954 for examples of the various ways
575                          * this code can be exercised.
576                          */
577                         if (chr == glyph - GS_MIN_CID_GLYPH)
578                             code = subfont->procs.decode_glyph((gs_font *)subfont, glyph, -1, NULL, 0);
579                         else
580                             code = subfont->procs.decode_glyph((gs_font *)subfont, glyph, chr, NULL, 0);
581                         if (code != 0)
582                             /* Since PScript5.dll creates GlyphNames2Unicode with character codes
583                                instead CIDs, and with the WinCharSetFFFF-H2 CMap
584                                character codes appears different than CIDs (Bug 687954),
585                                pass the character code intead the CID. */
586                             code = pdf_add_ToUnicode(pdev, subfont, pdfont,
587                                 chr + GS_MIN_CID_GLYPH, chr, NULL);
588                         else {
589                             /* If we interpret a PDF document, ToUnicode
590                                CMap may be attached to the Type 0 font. */
591                             code = pdf_add_ToUnicode(pdev, pte->orig_font, pdfont,
592                                 chr + GS_MIN_CID_GLYPH, chr, NULL);
593                         }
594                     }
595                     else
596                         code = pdf_add_ToUnicode(pdev, subfont, pdfont, glyph, cid, NULL);
597                     if (code < 0)
598                         return code;
599                     /*  We can't check pdsubf->used[cid >> 3] here,
600                         because it mixed data for different values of WMode.
601                         Perhaps pdf_font_used_glyph returns fast with reused glyphs.
602                      */
603                     code = pdf_font_used_glyph(pfd, glyph, (gs_font_base *)subfont);
604                     if (code == gs_error_rangecheck) {
605                         if (!(pdsubf->used[cid >> 3] & (0x80 >> (cid & 7)))) {
606                             char buf[gs_font_name_max + 1];
607                             int l = min(sizeof(buf) - 1, subfont->font_name.size);
608 
609                             memcpy(buf, subfont->font_name.chars, l);
610                             buf[l] = 0;
611                             emprintf3(pdev->memory,
612                                       "Missing glyph CID=%d, glyph=%04x in the font %s . The output PDF may fail with some viewers.\n",
613                                       (int)cid,
614                                       (unsigned int)(glyph - GS_MIN_CID_GLYPH),
615                                       buf);
616                             pdsubf->used[cid >> 3] |= 0x80 >> (cid & 7);
617                             if (pdev->PDFA != 0) {
618                                 switch (pdev->PDFACompatibilityPolicy) {
619                                     /* Default behaviour matches Adobe Acrobat, warn and continue,
620                                      * output file will not be PDF/A compliant
621                                      */
622                                     case 0:
623                                     case 1:
624                                     case 3:
625                                         emprintf(pdev->memory,
626                                              "All used glyphs mst be present in fonts for PDF/A, reverting to normal PDF output.\n");
627                                         pdev->AbortPDFAX = true;
628                                         pdev->PDFA = 0;
629                                         break;
630                                     case 2:
631                                         emprintf(pdev->memory,
632                                              "All used glyphs mst be present in fonts for PDF/A, aborting conversion.\n");
633                                         return_error(gs_error_invalidfont);
634                                         break;
635                                     default:
636                                         emprintf(pdev->memory,
637                                              "All used glyphs mst be present in fonts for PDF/A, unrecognised PDFACompatibilityLevel,\nreverting to normal PDF output\n");
638                                         pdev->AbortPDFAX = true;
639                                         pdev->PDFA = 0;
640                                         break;
641                                 }
642                             }
643                         }
644                         cid = 0, code = 1;  /* undefined glyph. */
645                         notdef_subst = true;
646                         /* If this is the first use of CID=0, get its width */
647                         if (pdsubf->Widths[cid] == 0) {
648                             pdf_glyph_widths_t widths;
649 
650                             code = pdf_glyph_widths(pdsubf, wmode, glyph, (gs_font *)subfont, &widths,
651                                 pte->cdevproc_callout ? pte->cdevproc_result : NULL);
652                         }
653                     } else if (code < 0)
654                         return code;
655                     if (glyph == GS_MIN_CID_GLYPH && pdev->PDFA != 0) {
656                         switch (pdev->PDFACompatibilityPolicy) {
657                             case 0:
658                             case 1:
659                             case 3:
660                                 emprintf(pdev->memory,
661                                      "A CIDFont uses CID 0, which is not legal for PDF/A, reverting to normal PDF output.\n");
662                                 pdev->AbortPDFAX = true;
663                                 pdev->PDFA = 0;
664                                 break;
665                             case 2:
666                                 emprintf(pdev->memory,
667                                      "A CIDFont uses CID 0, which is not legal for PDF/A, aborting conversion.\n");
668                                 return_error(gs_error_invalidfont);
669                                 break;
670                             default:
671                                 emprintf(pdev->memory,
672                                      "A CIDFont uses CID 0, which is not legal for PDF/A, unrecognised PDFACompatibilityLevel,\nreverting to normal PDF output\n");
673                                 pdev->AbortPDFAX = true;
674                                 pdev->PDFA = 0;
675                                 break;
676                         }
677                     }
678                     if ((code == 0 /* just copied */ || pdsubf->Widths[cid] == 0) && !notdef_subst) {
679                         pdf_glyph_widths_t widths;
680 
681                     code = pdf_glyph_widths(pdsubf, wmode, glyph, (gs_font *)subfont, &widths,
682                         pte->cdevproc_callout ? pte->cdevproc_result : NULL);
683                     if (code < 0)
684                         return code;
685                     if (code == TEXT_PROCESS_CDEVPROC) {
686                         pte->returned.current_glyph = glyph;
687                         pte->current_font = subfont;
688                         rcode = TEXT_PROCESS_CDEVPROC;
689                         break;
690                     }
691                     if (code >= 0) {
692                         if (cid > pdsubf->count)
693                             return_error(gs_error_unregistered); /* Must not happen. */
694                         w[cid] = widths.Width.w;
695                         if (v != NULL) {
696                             v[cid * 2 + 0] = widths.Width.v.x;
697                             v[cid * 2 + 1] = widths.Width.v.y;
698                         }
699                         real_widths[cid] = widths.real_width.w;
700                     }
701                     if (wmode) {
702                         /* Since AR5 use W or DW to compute the x-coordinate of
703                            v-vector, comupte and store the glyph width for WMode 0. */
704                         /* fixme : skip computing real_width here. */
705                         code = pdf_glyph_widths(pdsubf, 0, glyph, (gs_font *)subfont, &widths,
706                             pte->cdevproc_callout ? pte->cdevproc_result : NULL);
707                         if (code < 0)
708                             return code;
709                         w0[cid] = widths.Width.w;
710                     }
711                     if (pdsubf->u.cidfont.CIDToGIDMap != 0) {
712                         uint gid = 0;
713                         gs_font_cid2 *subfont2 = (gs_font_cid2 *)subfont;
714 
715                         gid = subfont2->cidata.CIDMap_proc(subfont2, glyph);
716 
717                         /* If this is a TrueType CIDFont, check the GSUB table to see if there's
718                          * a suitable substitute glyph.
719                          */
720                         if (subfont2->FontType == ft_CID_TrueType)
721                             gid = subfont2->data.substitute_glyph_index_vertical((gs_font_type42 *)subfont, gid, subfont2->WMode, glyph);
722                         pdsubf->u.cidfont.CIDToGIDMap[cid] = gid;
723                     }
724                 }
725                 if (wmode)
726                     pdsubf->u.cidfont.used2[cid >> 3] |= 0x80 >> (cid & 7);
727                 }
728                 pdsubf->used[cid >> 3] |= 0x80 >> (cid & 7);
729             }
730             if (pte->cdevproc_callout) {
731                 /* Only handle a single character because its width is stored
732                   into pte->cdevproc_result, and process_text_modify_width neds it.
733                   fixme: next time take from w, v, real_widths. */
734                 break_index = scan.index;
735                 break_xy_index = scan.xy_index;
736                 break;
737             }
738         } while (!font_change);
739         if (break_index > index) {
740             pdf_font_resource_t *pdfont;
741             gs_matrix m3;
742             int xy_index_step = (!(pte->text.operation & TEXT_REPLACE_WIDTHS) ? 0 :
743                                  pte->text.x_widths == pte->text.y_widths ? 2 : 1);
744             gs_text_params_t save_text;
745 
746             if (!subfont && num_type1_glyphs != 0)
747                 subfont = subfont0;
748             if (subfont && (subfont->FontType == ft_encrypted || subfont->FontType == ft_encrypted2)) {
749                 int save_op = pte->text.operation;
750                 gs_font *save_font = pte->current_font;
751                 const gs_glyph *save_data = pte->text.data.glyphs;
752 
753                 pte->current_font = subfont;
754                 pte->text.operation |= TEXT_FROM_GLYPHS;
755                 pte->text.data.glyphs = type1_glyphs;
756                 str.data = ((const byte *)vbuf) + ((pte->text.size - pte->index) * sizeof(gs_glyph));
757                 str.size = num_type1_glyphs;
758                 code = pdf_obtain_font_resource_unencoded(pte, (const gs_string *)&str, &pdsubf0,
759                     type1_glyphs);
760                 if (code < 0) {
761                     /* Replace the modified values, fall back to default implementation
762                      * (type 3 bitmap image font)
763                      */
764                     pte->current_font = save_font;
765                     pte->text.operation |= save_op;
766                     pte->text.data.glyphs = save_data;
767                     return(code);
768                 }
769                 memcpy((void *)scan.text.data.bytes, (void *)str.data, str.size);
770                 str.data = scan.text.data.bytes;
771                 pdsubf = pdsubf0;
772                 pte->text.operation = save_op;
773             }
774             pte->current_font = subfont0;
775             if (!subfont0 || !pdsubf0)
776                 /* This should be impossible */
777                 return_error(gs_error_invalidfont);
778 
779             code = gs_matrix_multiply(&subfont0->FontMatrix, &font->FontMatrix, &m3);
780             /* We thought that it should be gs_matrix_multiply(&font->FontMatrix, &subfont0->FontMatrix, &m3); */
781             if (code < 0)
782                 return code;
783             if (pdsubf0->FontType == ft_user_defined  || pdsubf0->FontType == ft_PDF_user_defined  || pdsubf->FontType == ft_encrypted ||
784                 pdsubf->FontType == ft_encrypted2)
785                     pdfont = pdsubf0;
786             else {
787                 code = pdf_obtain_parent_type0_font_resource(pdev, pdsubf0, font_index0,
788                             &font->data.CMap->CMapName, &pdfont);
789                 if (code < 0)
790                     return code;
791                 if (!pdfont->u.type0.Encoding_name[0]) {
792                     /*
793                     * If pdfont->u.type0.Encoding_name is set,
794                     * a CMap resource is already attached.
795                     * See attach_cmap_resource.
796                     */
797                     code = attach_cmap_resource(pdev, pdfont, font->data.CMap, font_index0);
798                     if (code < 0)
799                         return code;
800                 }
801             }
802             pdf_set_text_wmode(pdev, font->WMode);
803             code = pdf_update_text_state(&text_state, (pdf_text_enum_t *)pte, pdfont, &m3);
804             if (code < 0)
805                 return code;
806             /* process_text_modify_width breaks text parameters.
807                We would like to improve it someday.
808                Now save them locally and restore after the call. */
809             save_text = pte->text;
810             if (subfont && (subfont->FontType != ft_encrypted &&
811                 subfont->FontType != ft_encrypted2)) {
812                 /* If we are a type 1 descendant, we already sorted this out above */
813                 str.data = scan.text.data.bytes + index;
814                 str.size = break_index - index;
815             }
816             if (pte->text.operation & TEXT_REPLACE_WIDTHS) {
817                 if (pte->text.x_widths != NULL)
818                     pte->text.x_widths += xy_index * xy_index_step;
819                 if (pte->text.y_widths != NULL)
820                     pte->text.y_widths += xy_index * xy_index_step;
821             }
822             pte->xy_index = 0;
823             if (subfont && (subfont->FontType == ft_encrypted ||
824                 subfont->FontType == ft_encrypted2)) {
825                 gs_font *f = pte->orig_font;
826 
827                 adjust_first_last_char(pdfont, (byte *)str.data, str.size);
828 
829                 /* Make sure we use the descendant font, not the original type 0 ! */
830                 pte->orig_font = subfont;
831                 code = process_text_modify_width((pdf_text_enum_t *)pte,
832                     (gs_font *)subfont, &text_state, &str, &wxy, type1_glyphs, false, scan.index - index);
833                 if (code < 0)
834                     return(code);
835                 if(font_change) {
836                     type1_glyphs[0] = type1_glyphs[num_type1_glyphs];
837                     num_type1_glyphs = 1;
838                     subfont = saved_subfont;
839                 } else {
840                     num_type1_glyphs = 0;
841                 }
842                 pte->orig_font = f;
843             } else {
844                 code = process_text_modify_width((pdf_text_enum_t *)pte, (gs_font *)font,
845                     &text_state, &str, &wxy, NULL, true, scan.index - index);
846             }
847             if (pte->text.operation & TEXT_REPLACE_WIDTHS) {
848                 if (pte->text.x_widths != NULL)
849                     pte->text.x_widths -= xy_index * xy_index_step;
850                 if (pte->text.y_widths != NULL)
851                     pte->text.y_widths -= xy_index * xy_index_step;
852             }
853             pte->text = save_text;
854             pte->cdevproc_callout = false;
855             if (code < 0) {
856                 pte->index = index;
857                 pte->xy_index = xy_index;
858                 return code;
859             }
860             pte->index = break_index;
861             pte->xy_index = break_xy_index;
862             if (pdev->Eps2Write) {
863                 gs_rect text_bbox;
864                 gx_device_clip cdev;
865                 gx_drawing_color devc;
866                 fixed x0, y0, bx2, by2;
867 
868                 text_bbox.q.x = text_bbox.p.y = text_bbox.q.y = 0;
869                 estimate_fontbbox(pte, (gs_font_base *)font, NULL, &text_bbox);
870                 text_bbox.p.x = fixed2float(pte->origin.x);
871                 text_bbox.q.x = text_bbox.p.x + wxy.x;
872 
873                 x0 = float2fixed(text_bbox.p.x);
874                 y0 = float2fixed(text_bbox.p.y);
875                 bx2 = float2fixed(text_bbox.q.x) - x0;
876                 by2 = float2fixed(text_bbox.q.y) - y0;
877 
878                 pdev->AccumulatingBBox++;
879                 gx_make_clip_device_on_stack(&cdev, pte->pcpath, (gx_device *)pdev);
880                 set_nonclient_dev_color(&devc, gx_device_black((gx_device *)pdev));  /* any non-white color will do */
881                 gx_default_fill_triangle((gx_device *) pdev, x0, y0,
882                                          float2fixed(text_bbox.p.x) - x0,
883                                          float2fixed(text_bbox.q.y) - y0,
884                                          bx2, by2, &devc, lop_default);
885                 gx_default_fill_triangle((gx_device *) & cdev, x0, y0,
886                                          float2fixed(text_bbox.q.x) - x0,
887                                          float2fixed(text_bbox.p.y) - y0,
888                                          bx2, by2, &devc, lop_default);
889                 pdev->AccumulatingBBox--;
890             }
891             code = pdf_shift_text_currentpoint(pte, &wxy);
892             if (code < 0)
893                 return code;
894         }
895         pdf_text_release_cgp(pte);
896         index = break_index;
897         xy_index = break_xy_index;
898         if (done || rcode != 0)
899             break;
900         pdsubf0 = pdsubf;
901         font_index0 = font_index;
902         subfont0 = subfont;
903     }
904     pte->index = index;
905     pte->xy_index = xy_index;
906     return rcode;
907 }
908 
909 int
process_cmap_text(gs_text_enum_t * penum,void * vbuf,uint bsize)910 process_cmap_text(gs_text_enum_t *penum, void *vbuf, uint bsize)
911 {
912     int code;
913     pdf_text_enum_t *pte = (pdf_text_enum_t *)penum;
914     byte *save;
915     uint start = pte->index;
916 
917     if (pte->text.operation &
918         (TEXT_FROM_ANY - (TEXT_FROM_STRING | TEXT_FROM_BYTES))
919         )
920         return_error(gs_error_rangecheck);
921     if (pte->text.operation & TEXT_INTERVENE) {
922         /* Not implemented.  (PostScript doesn't allow TEXT_INTERVENE.) */
923         return_error(gs_error_rangecheck);
924     }
925     /* scan_cmap_text has the unfortunate side effect of meddling with the
926      * text data in the enumerator. In general that's OK but in the case where
927      * the string is (eg) in a bound procedure, and we run that procedure more
928      * than once, the string is corrupted on the first use and then produces
929      * incorrect output for the subsequent use(s).
930      * The routine is, sadly, extremely convoluted so instead of trying to fix
931      * it so that it doesn't corrupt the string (which looks likely to be impossible
932      * without copying the string at some point) I've chosen to take a copy of the
933      * string here, and restore it after the call to scan_cmap_text.
934      * See bug #695322 and test file Bug691680.ps
935      */
936     save = (byte *)pte->text.data.bytes;
937     pte->text.data.bytes = gs_alloc_string(pte->memory, pte->text.size, "pdf_text_process");
938     memcpy((byte *)pte->text.data.bytes, save, pte->text.size);
939     code = scan_cmap_text(pte, vbuf);
940     gs_free_string(pte->memory, (byte *)pte->text.data.bytes,  pte->text.size, "pdf_text_process");
941     pte->text.data.bytes = save;
942     pte->bytes_decoded = pte->index - start;
943 
944     if (code == TEXT_PROCESS_CDEVPROC)
945         pte->cdevproc_callout = true;
946     else
947         pte->cdevproc_callout = false;
948     return code;
949 }
950 
951 /* ---------------- CIDFont ---------------- */
952 
953 /*
954  * Process a text string in a CIDFont.  (Only glyphshow is supported.)
955  */
956 int
process_cid_text(gs_text_enum_t * pte,void * vbuf,uint bsize)957 process_cid_text(gs_text_enum_t *pte, void *vbuf, uint bsize)
958 {
959     pdf_text_enum_t *penum = (pdf_text_enum_t *)pte;
960     uint operation = pte->text.operation;
961     gs_text_enum_t save;
962     gs_font *scaled_font = pte->current_font; /* CIDFont */
963     gs_font *font;		/* unscaled font (CIDFont) */
964     const gs_glyph *glyphs;
965     gs_matrix scale_matrix;
966     pdf_font_resource_t *pdsubf; /* CIDFont */
967     gs_font_type0 *font0 = NULL;
968     uint size;
969     int code;
970 
971     if (operation & TEXT_FROM_GLYPHS) {
972         glyphs = pte->text.data.glyphs;
973         size = pte->text.size - pte->index;
974     } else if (operation & TEXT_FROM_SINGLE_GLYPH) {
975         glyphs = &pte->text.data.d_glyph;
976         size = 1;
977     } else if (operation & TEXT_FROM_STRING) {
978         glyphs = &pte->outer_CID;
979         size = 1;
980     } else
981         return_error(gs_error_rangecheck);
982 
983     /*
984      * PDF doesn't support glyphshow directly: we need to create a Type 0
985      * font with an Identity CMap.  Make sure all the glyph numbers fit
986      * into 16 bits.  (Eventually we should support wider glyphs too,
987      * but this would require a different CMap.)
988      */
989     if (bsize < size * 2)
990         return_error(gs_error_unregistered); /* Must not happen. */
991     {
992         int i;
993         byte *pchars = vbuf;
994 
995         for (i = 0; i < size; ++i) {
996             ulong gnum = glyphs[i] - GS_MIN_CID_GLYPH;
997 
998             if (gnum & ~0xffffL)
999                 return_error(gs_error_rangecheck);
1000             *pchars++ = (byte)(gnum >> 8);
1001             *pchars++ = (byte)gnum;
1002         }
1003     }
1004 
1005     /* Find the original (unscaled) version of this font. */
1006 
1007     for (font = scaled_font; font->base != font; )
1008         font = font->base;
1009     /* Compute the scaling matrix. */
1010     code = gs_matrix_invert(&font->FontMatrix, &scale_matrix);
1011     if (code < 0)
1012         return code;
1013     gs_matrix_multiply(&scale_matrix, &scaled_font->FontMatrix, &scale_matrix);
1014 
1015     /* Find or create the CIDFont resource. */
1016 
1017     code = pdf_obtain_font_resource(penum, NULL, &pdsubf);
1018     if (code < 0)
1019         return code;
1020 
1021     /* Create the CMap and Type 0 font if they don't exist already. */
1022 
1023     if (pdsubf->u.cidfont.glyphshow_font_id != 0)
1024         font0 = (gs_font_type0 *)gs_find_font_by_id(font->dir,
1025                     pdsubf->u.cidfont.glyphshow_font_id, &scaled_font->FontMatrix);
1026     if (font0 == NULL) {
1027         code = gs_font_type0_from_cidfont(&font0, font, font->WMode,
1028                                           &scale_matrix, font->memory);
1029         if (code < 0)
1030             return code;
1031         pdsubf->u.cidfont.glyphshow_font_id = font0->id;
1032     }
1033 
1034     /* Now handle the glyphshow as a show in the Type 0 font. */
1035 
1036     save = *pte;
1037     pte->current_font = pte->orig_font = (gs_font *)font0;
1038     /* Patch the operation temporarily for init_fstack. */
1039     pte->text.operation = (operation & ~TEXT_FROM_ANY) | TEXT_FROM_BYTES;
1040     /* Patch the data for process_cmap_text. */
1041     pte->text.data.bytes = vbuf;
1042     pte->text.size = size * 2;
1043     pte->index = 0;
1044     gs_type0_init_fstack(pte, pte->current_font);
1045     code = process_cmap_text(pte, vbuf, bsize);
1046     pte->current_font = scaled_font;
1047     pte->orig_font = save.orig_font;
1048     pte->text = save.text;
1049     pte->index = save.index + pte->index / 2;
1050     pte->fstack = save.fstack;
1051     return code;
1052 }
1053