1 /* Copyright (C) 2000-2012 by George Williams */
2 /* 2012nov01, many fixes added, Jose Da Silva */
3 /*
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are met:
6
7 * Redistributions of source code must retain the above copyright notice, this
8 * list of conditions and the following disclaimer.
9
10 * Redistributions in binary form must reproduce the above copyright notice,
11 * this list of conditions and the following disclaimer in the documentation
12 * and/or other materials provided with the distribution.
13
14 * The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission.
16
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
18 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
19 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
20 * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
22 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
23 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
24 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
25 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
26 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29 #include <fontforge-config.h>
30
31 #include "parsepdf.h"
32
33 #include "chardata.h"
34 #include "cvimages.h"
35 #include "dumppfa.h"
36 #include "encoding.h"
37 #include "fontforge.h"
38 #include "gfile.h"
39 #include "gwidget.h"
40 #include "namelist.h"
41 #include "parsepfa.h"
42 #include "parsettf.h"
43 #include "psfont.h"
44 #include "psread.h"
45 #include "sd.h"
46 #include "splineutil.h"
47 #include "splineutil2.h"
48 #include "ustring.h"
49 #include "utype.h"
50
51 #include <locale.h>
52 #include <math.h>
53
54 #ifdef HAVE_IEEEFP_H
55 # include <ieeefp.h> /* Solaris defines finite in ieeefp rather than math.h */
56 #endif
57
58 struct pdfcontext {
59 char *tokbuf;
60 int tblen;
61 FILE *pdf;
62 FILE *compressed;
63 struct psdict pdfdict;
64 long *objs;
65 long *subindex;
66 int ocnt;
67 long *fontobjs;
68 char **fontnames; /* theoretically in utf-8 */
69 long *cmapobjs;
70 int *cmap_from_cid;
71 int fcnt;
72 enum openflags openflags;
73 int encrypted;
74 int enc_dict;
75 int pcnt;
76 long *pages;
77 int root;
78 };
79
FindXRef(FILE * pdf)80 static long FindXRef(FILE *pdf) {
81 /* Find 'startxref' in FILE pdf and return the value found, else return -1 */
82 long xrefpos;
83 /* From end of file, back up over expected trailer: */
84 /* CR/LF, '%%EOF', CR/LF, byte offset number, CR/LF, 'startxref', */
85 /* plus a little more. Have observed offset numbers of 8 decimal */
86 /* digits, so allow for 10 digit numbers. */
87 char buffer[40];
88 const size_t fillcnt = sizeof(buffer) - 1;
89 char *pt;
90
91 if ( fseek(pdf,-fillcnt,SEEK_END)!=0 )
92 return( -1 );
93
94 if ( fread(buffer,1,fillcnt,pdf)!=fillcnt )
95 return( -1 );
96
97 buffer[fillcnt] = '\0';
98
99 if ( (pt=strstr(buffer,"startxref"))==NULL )
100 return( -1 );
101
102 if ( sscanf(pt,"startxref %ld",&xrefpos)!=1 )
103 return( -1 );
104
105 return( xrefpos );
106 }
107
findkeyword(FILE * pdf,char * keyword,char * end)108 static int findkeyword(FILE *pdf, char *keyword, char *end) {
109 /* Find Keyword in file pdf. Stop looking if reach end or get a file-error */
110 char buffer[60];
111 int len = strlen( keyword );
112 int end_len = end==NULL ? 0 : strlen(end);
113 int ch, i;
114
115 /* exit with error if 'keyword' or 'end' too big to test */
116 if ( len >= sizeof(buffer) || end_len >= sizeof(buffer) ) {
117 return( false );
118 }
119
120 /* initialize buffer to begin checking for keyword */
121 for ( i=0; i<len; ++i ) {
122 if ( (ch=getc(pdf))<0 ) return( false );
123 buffer[i] = ch;
124 }
125 buffer[i] = 0;
126
127 /* search file for keyword, or stop looking if found end */
128 while ( 1 ) {
129 if ( strcmp(buffer,keyword)==0 )
130 return( true );
131 if ( end_len && strncmp(buffer,end,end_len)==0 )
132 return( false );
133 for ( i=1; i<len; ++i )
134 buffer[i-1] = buffer[i];
135 if ( (ch=getc(pdf))<0 ) return( false );
136 buffer[--i] = ch;
137 }
138 }
139
seektrailer(FILE * pdf,long * start,long * num,struct pdfcontext * pc)140 static int seektrailer(FILE *pdf, long *start, long *num, struct pdfcontext *pc) {
141 /* seek 'trailer' and then return values for 'start' and 'num'. Exit if error. */
142 long prev_xref;
143 long pos;
144
145 /* find 'trailer' and point 'pos' to the next char location after it */
146 if ( !findkeyword(pdf,"trailer",NULL) || (pos=ftell(pdf))==-1 )
147 return( false );
148
149 /* check if there's encryption and toggle-on the encrypt flag if yes */
150 if ( findkeyword(pdf,"/Encrypt",">>") ) {
151 long bar;
152 if ( fscanf(pdf,"%d %ld",&pc->enc_dict,&bar)==2 )
153 pc->encrypted = true;
154 }
155
156 if ( pc->root == 0 ) {
157 if ( fseek(pdf,pos,SEEK_SET)!=0 ) return( false );
158 if ( findkeyword(pdf,"/Root",">>") ) {
159 long bar;
160 fscanf(pdf,"%d %ld",&pc->root,&bar);
161 }
162 }
163
164 /* find '/Prev' and return values for 'start' & 'num', Exit if error */
165 if ( fseek(pdf,pos,SEEK_SET)!=0 || \
166 !findkeyword(pdf,"/Prev",">>") || \
167 fscanf(pdf,"%ld",&prev_xref)!=1 || \
168 fseek(pdf,prev_xref,SEEK_SET )!=0 || \
169 fscanf(pdf,"xref %ld %ld",start,num)!=2 )
170 return( false );
171
172 return( true ); /* Done! We now have 'start' and 'num' */
173 }
174
175 static long *FindObjectsFromXREFObject(struct pdfcontext *pc, long prev_xref);
176
FindObjects(struct pdfcontext * pc)177 static long *FindObjects(struct pdfcontext *pc) {
178 /* Find and return a list of file pointers to XREFObjects in this pdf file. */
179 /* Return NULL if any file-reading error encountered, or if lack of memory. */
180 FILE *pdf = pc->pdf;
181 long xrefpos;
182 long *ret, *ret_old;
183 int *gen, *gen_old;
184 int ch; long cnt, i, start, num;
185 long offset; int gennum; char f;
186
187 /* find the XREF location and point to that position. Exit if error */
188 if ( (xrefpos=FindXRef(pdf))==-1 || fseek(pdf,xrefpos,SEEK_SET)!=0 )
189 return( NULL );
190
191 /* initialize 'start' and 'num' values if we have them here */
192 if ( fscanf(pdf,"xref %ld %ld",&start,&num)!=2 ) {
193 /* otherwise, check if it is an 'obj' and try there instead */
194 long foo, bar;
195 if ( fseek(pdf,xrefpos,SEEK_SET)!=0 || \
196 fscanf(pdf,"%ld %ld",&foo,&bar)!=2 )
197 return( NULL );
198 while ( isspace(ch=getc(pdf)));
199 if ( ch=='o' && \
200 getc(pdf)=='b' && \
201 getc(pdf)=='j' && \
202 isspace(getc(pdf)) )
203 return( FindObjectsFromXREFObject(pc,xrefpos));
204
205 return( NULL );
206 }
207
208 cnt=0; ret=NULL; gen=NULL; /* no objects to return yet */
209 while ( 1 ) {
210 if ( start < 0 || start > 10000000 || num < 0 || num > 10000000 ||
211 start+num > 10000000 ) {
212 free(ret); free(gen);
213 pc->ocnt = 0;
214 return( NULL );
215 }
216 if ( start+num>cnt ) {
217 /* increase memory needed for XREFs. Mark last location = -2 */
218 ret_old=ret; gen_old=gen; pc->ocnt=(int)(start+num);
219 ret = realloc(ret,(start+num+1)*sizeof(long));
220 gen = realloc(gen,(start+num)*sizeof(int));
221 if ( ret==NULL || gen==NULL || pc->ocnt!=start+num ) {
222 free(ret); free(ret_old);
223 free(gen); free(gen_old);
224 NoMoreMemMessage(); pc->ocnt = 0;
225 return( NULL );
226 }
227 memset(ret+cnt,-1,sizeof(long)*(start+num-cnt));
228 memset(gen+cnt,-1,sizeof(int)*(start+num-cnt));
229 cnt = start+num;
230 ret[cnt] = -2;
231 }
232 for ( i=start; i<start+num; ++i ) {
233 if ( fscanf(pdf,"%ld %d %c",&offset,&gennum,&f)!=3 ) {
234 free(gen);
235 return( ret );
236 }
237 if ( f=='f' ) {
238 if ( gennum > gen[i] ) {
239 ret[i] = -1;
240 gen[i] = gennum;
241 }
242 } else if ( f=='n' ) {
243 if ( gennum > gen[i] ) {
244 ret[i] = offset;
245 gen[i] = gennum;
246 }
247 } else {
248 free(gen);
249 return( ret );
250 }
251 }
252 /* load the next 'start' and 'num' values and continue. */
253 /* if can't get more 'start' and 'num' then we're done. */
254 if ( fscanf(pdf,"%ld %ld",&start,&num)!=2 && \
255 !seektrailer(pdf,&start,&num,pc) ) {
256 free(gen);
257 return( ret );
258 }
259 }
260 }
261
262 #define pdf_space(ch) (ch=='\0' || ch=='\t' || ch=='\n' || ch=='\r' || ch=='\f' || ch==' ' )
263 #define pdf_oper(ch) (ch=='(' || ch==')' || ch=='<' || ch=='>' || ch=='[' || ch==']' || ch=='{' || ch=='}' || ch=='/' || ch=='%' )
264
pdf_peekch(FILE * pdf)265 static int pdf_peekch(FILE *pdf) {
266 /* Peek to see what is the next character to get in the file, */
267 /* ...and spool back if no errors encountered while checking. */
268 int ch;
269 if ( (ch=getc(pdf))>=0 ) ungetc(ch,pdf);
270 return( ch );
271 }
272
pdf_skipwhitespace(struct pdfcontext * pc)273 static int pdf_skipwhitespace(struct pdfcontext *pc) {
274 /* Skip pdf white spaces. Return -1 if EOF or get file error. */
275 FILE *pdf = pc->compressed ? pc->compressed : pc->pdf;
276 int ch;
277
278 /* get next char and loop forever until EOF or file error */
279 while ( (ch=getc(pdf))>=0 ) {
280 if( ch=='%' )
281 /* skip everything after '%' upto '\n' or '\r' */
282 while ( (ch=getc(pdf))>=0 && ch!='\n' && ch!='\r' );
283 else
284 if ( !pdf_space(ch) ) break;
285 }
286 /* Done! Now if not EOF or a file error, then unget ch */
287 if ( ch<0 || ungetc(ch,pdf)<0 ) return( -1 );
288 return( 0 );
289 }
290
pdf_getname(struct pdfcontext * pc)291 static char *pdf_getname(struct pdfcontext *pc) {
292 /* return name. return NULL if errors found */
293 FILE *pdf = pc->compressed ? pc->compressed : pc->pdf;
294 int ch;
295 char *pt = pc->tokbuf, *end = pc->tokbuf+pc->tblen;
296
297 /* first, skip any white spaces in front of name */
298 if ( pdf_skipwhitespace(pc) ) return( NULL );
299
300 if ( (ch=getc(pdf))!='/' ) {
301 ungetc(ch,pdf);
302 return( NULL );
303 }
304
305 for ( ch=getc(pdf) ;; ch=getc(pdf) ) {
306 if ( pt>=end ) {
307 char *temp;
308 if ( (temp=realloc(pc->tokbuf,(pc->tblen+=300)))==NULL ) {
309 /* error, but don't need to free realloc memory */
310 NoMoreMemMessage();
311 return( NULL );
312 }
313 pt = temp + (pt-pc->tokbuf);
314 pc->tokbuf = temp;
315 end = temp+pc->tblen;
316 }
317 if ( pdf_space(ch) || pdf_oper(ch) ) {
318 ungetc(ch,pdf);
319 *pt = '\0';
320 return( pc->tokbuf );
321 }
322 *pt++ = ch;
323 }
324 }
325
pdf_getdictvalue(struct pdfcontext * pc)326 static char *pdf_getdictvalue(struct pdfcontext *pc) {
327 FILE *pdf = pc->compressed ? pc->compressed : pc->pdf;
328 int ch;
329 char *pt = pc->tokbuf, *end = pc->tokbuf+pc->tblen;
330 int dnest=0, anest=0, strnest;
331
332 pdf_skipwhitespace(pc);
333 ch = getc(pdf);
334 for (;;) {
335 if ( pt>=end ) {
336 char *temp = realloc(pc->tokbuf,(pc->tblen+=300));
337 pt = temp + (pt-pc->tokbuf);
338 pc->tokbuf = temp;
339 end = temp+pc->tblen;
340 }
341 *pt++ = ch;
342 if ( ch=='(' ) {
343 strnest = 0;
344 while ( (ch=getc(pdf))!=EOF ) {
345 if ( pt>=end ) {
346 char *temp = realloc(pc->tokbuf,(pc->tblen+=300));
347 pt = temp + (pt-pc->tokbuf);
348 pc->tokbuf = temp;
349 end = temp+pc->tblen;
350 }
351 *pt++ = ch;
352 if ( ch=='(' ) ++strnest;
353 else if ( ch==')' && strnest==0 )
354 break;
355 else if ( ch==')' ) --strnest;
356 }
357 } else if ( ch=='[' )
358 ++ anest;
359 else if ( ch==']' && anest>0 )
360 -- anest;
361 else if ( ch=='<' && pdf_peekch(pdf)=='<' )
362 ++dnest;
363 else if ( ch=='>' && pdf_peekch(pdf)=='>' ) {
364 if ( dnest==0 ) {
365 ungetc(ch,pdf);
366 pt[-1] = '\0';
367 if ( pt>pc->tokbuf+1 && pt[-2]==' ' ) pt[-2] = '\0';
368 return( pc->tokbuf );
369 }
370 --dnest;
371 } else if ( ch=='/' && anest==0 && dnest==0 && pt!=pc->tokbuf+1 ) {
372 /* A name token may be a value if it is the first thing */
373 /* otherwise it is the start of the next key */
374 ungetc(ch,pdf);
375 pt[-1] = '\0';
376 if ( pt>pc->tokbuf+1 && pt[-2]==' ' ) pt[-2] = '\0';
377 return( pc->tokbuf );
378 } else if ( ch=='%' || pdf_space(ch) ) {
379 pt[-1] = ' ';
380 ungetc(ch,pdf);
381 pdf_skipwhitespace(pc);
382 } else if ( ch==EOF ) {
383 pt[-1] = '\0';
384 return( pc->tokbuf );
385 }
386 ch = getc(pdf);
387 }
388 }
389
PSDictClear(struct psdict * dict)390 static void PSDictClear(struct psdict *dict) {
391 /* Clear all psdict keys[] and values[] */
392 int i;
393
394 for ( i=0; i<dict->next; ++i ) {
395 free(dict->keys[i]);
396 free(dict->values[i]);
397 }
398 dict->next = 0;
399 }
400
pdf_readdict(struct pdfcontext * pc)401 static int pdf_readdict(struct pdfcontext *pc) {
402 FILE *pdf = pc->compressed ? pc->compressed : pc->pdf;
403 char *key, *value;
404 int ch;
405
406 PSDictClear(&pc->pdfdict);
407
408 if ( pdf_skipwhitespace(pc) ) return( false );
409 ch = getc(pdf);
410 if ( ch!='<' || pdf_peekch(pdf)!='<' )
411 return( false );
412 getc(pdf); /* Eat the second '<' */
413
414 for (;;) {
415 key = copy(pdf_getname(pc));
416 if ( key==NULL ) {
417 if ( pc->compressed!=NULL ) { /* We've read the whole object*/
418 fclose(pc->compressed); /* so close the compressed */
419 pc->compressed = NULL; /* stream in which it lives */
420 }
421 return( true );
422 }
423 value = pdf_getdictvalue(pc);
424 if ( value==NULL || strcmp(value,"null")==0 )
425 free(key);
426 else {
427 if ( pc->pdfdict.next>=pc->pdfdict.cnt ) {
428 pc->pdfdict.keys = realloc(pc->pdfdict.keys,(pc->pdfdict.cnt+=20)*sizeof(char *));
429 pc->pdfdict.values = realloc(pc->pdfdict.values,pc->pdfdict.cnt*sizeof(char *));
430 }
431 pc->pdfdict.keys [pc->pdfdict.next] = key;
432 pc->pdfdict.values[pc->pdfdict.next] = copy(value);
433 ++pc->pdfdict.next;
434 }
435 }
436 }
437
pdf_skipobjectheader(struct pdfcontext * pc)438 static void pdf_skipobjectheader(struct pdfcontext *pc) {
439
440 fscanf( pc->pdf, "%*d %*d obj" );
441 }
442
hex(int ch1,int ch2)443 static int hex(int ch1, int ch2) {
444 /* Convert two HEX characters to one binary value. Return -1 if error */
445 /* NOTE: FIXME: parsepfa has an identical routine that can be merged. */
446
447 if (ch1 >= '0' && ch1 <= '9') ch1 -='0';
448 else if (ch1 >= 'A' && ch1 <= 'F') ch1 -=('A'-10);
449 else if (ch1 >= 'a' && ch1 <= 'f') ch1 -=('a'-10);
450 else return( -1 );
451
452 if (ch2 >= '0' && ch2 <= '9') ch2 -='0';
453 else if (ch2 >= 'A' && ch2 <= 'F') ch2 -=('A'-10);
454 else if (ch2 >= 'a' && ch2 <= 'f') ch2 -=('a'-10);
455 else return( -1 );
456
457 return( (ch1<<4)|ch2 );
458 }
459
pdf_getprotectedtok(FILE * stream,char * tokbuf)460 static int pdf_getprotectedtok(FILE *stream, char *tokbuf) {
461 char *pt=tokbuf, *end=tokbuf+100-2; int ch;
462
463 while ( isspace(ch = getc(stream)) );
464 while ( ch>=0 && !isspace(ch) && ch!='[' && ch!=']' && ch!='{' && ch!='}' && ch!='<' && ch!='>' ) {
465 if ( pt<end ) *pt++ = ch;
466 ch = getc(stream);
467 }
468 if ( ch>=0 ) {
469 /* if not EOF or file error, then do this... */
470 if ( pt==tokbuf )
471 *pt++ = ch;
472 else
473 ungetc(ch,stream);
474 }
475 *pt='\0';
476 return( pt!=tokbuf?1:ch<0?-1: 0 );
477 }
478
pdf_skip_brackets(FILE * stream,char * tokbuf)479 static int pdf_skip_brackets(FILE *stream, char *tokbuf) {
480 int ch, ret;
481
482 /* first ch should be '<', else return 0 as not found */
483 while ( isspace(ch = getc(stream)) );
484 if (ch != '<') return( 0 );
485
486 ret = pdf_getprotectedtok(stream, tokbuf);
487 ch = getc(stream);
488
489 return( ret && ch=='>' );
490 }
491
492 static FILE *pdf_defilterstream(struct pdfcontext *pc);
493 static int pdf_getinteger(char *pt,struct pdfcontext *pc);
494
pdf_findobject(struct pdfcontext * pc,int num)495 static int pdf_findobject(struct pdfcontext *pc, int num) {
496 int first_offset, n, i, o, offset, container;
497 FILE *data;
498 char *pt;
499
500 if ( pc->compressed!=NULL ) {
501 fclose( pc->compressed );
502 pc->compressed = NULL;
503 }
504 if ( num<0 || num>=pc->ocnt )
505 return( false );
506 if ( pc->subindex==NULL || pc->subindex[num]==-1 ) {
507 if ( pc->objs[num]==-1 )
508 return( false );
509 fseek(pc->pdf,pc->objs[num],SEEK_SET);
510 pdf_skipobjectheader(pc);
511 return( true );
512 } else {
513 container = pc->objs[num];
514 while ( container!=-1 ) {
515 if ( pc->subindex[container]!=-1 ) {
516 LogError(_("Compressed object container is itself a compressed object"));
517 return( false );
518 }
519 fseek(pc->pdf,pc->objs[container],SEEK_SET);
520 pdf_skipobjectheader(pc);
521 if ( !pdf_readdict(pc) )
522 return( false );
523 if ( (pt=PSDictHasEntry(&pc->pdfdict,"Type"))==NULL || strcmp(pt,"/ObjStm")!=0 )
524 return( false );
525 if ( (pt=PSDictHasEntry(&pc->pdfdict,"N"))==NULL )
526 return( false );
527 n = pdf_getinteger(pt,pc);
528 if ( (pt=PSDictHasEntry(&pc->pdfdict,"First"))==NULL )
529 return( false );
530 first_offset = pdf_getinteger(pt,pc);
531 container = -1;
532 if ( (pt=PSDictHasEntry(&pc->pdfdict,"Extends"))!=NULL )
533 container = strtol(pt,NULL,0);
534 data = pdf_defilterstream(pc);
535 if ( data==NULL )
536 return( false );
537 rewind(data);
538 for ( i=0; i<n; ++i ) {
539 fscanf( data, "%d %d", &o, &offset );
540 if ( o==num )
541 break;
542 }
543 if ( i<n ) {
544 fseek( data, first_offset+offset, SEEK_SET );
545 pc->compressed = data;
546 return( true );
547 }
548 fclose(data);
549 }
550 /* Not found in any extents */
551 return( false );
552 }
553 }
554
pdf_getdescendantfont(struct pdfcontext * pc,int num)555 static int pdf_getdescendantfont(struct pdfcontext *pc, int num) {
556 char *pt;
557 int nnum;
558
559 if ( pdf_findobject(pc,num) && pdf_readdict(pc) ) {
560 if ( (pt=PSDictHasEntry(&pc->pdfdict,"Type"))!=NULL && strcmp(pt,"/Font")==0 &&
561 PSDictHasEntry(&pc->pdfdict,"FontDescriptor")!=NULL &&
562 (pt=PSDictHasEntry(&pc->pdfdict,"BaseFont"))!=NULL ) {
563 return( num );
564 }
565 }
566 if ( (pt = pdf_getdictvalue(pc)) != NULL && sscanf(pt,"%d",&nnum) && nnum > 0 && nnum < pc->ocnt )
567 return( pdf_getdescendantfont(pc, nnum) );
568
569 return( -1 );
570 }
571
pdf_findfonts(struct pdfcontext * pc)572 static int pdf_findfonts(struct pdfcontext *pc) {
573 int i, j, k=0, dnum, cnum;
574 char *pt, *tpt, *cmap, *desc;
575
576 pc->fontobjs = malloc(pc->ocnt*sizeof(long));
577 pc->cmapobjs = malloc(pc->ocnt*sizeof(long));
578 pc->cmap_from_cid = calloc(pc->ocnt,sizeof(int));
579 memset(pc->cmapobjs,-1,sizeof(long));
580 pc->fontnames = malloc(pc->ocnt*sizeof(char *));
581 /* First look for CID-keyed fonts with a pointer to a ToUnicode CMap */
582 for ( i=1; i<pc->ocnt; ++i ) if ( pc->objs[i]!=-1 ) { /* Object 0 is always unused */
583 if ( pdf_findobject(pc,i) && pdf_readdict(pc) ) {
584 if ((pt=PSDictHasEntry(&pc->pdfdict,"Type"))!=NULL && strcmp(pt,"/Font")==0 &&
585 (pt=PSDictHasEntry(&pc->pdfdict,"Subtype"))!=NULL && strcmp(pt,"/Type0")==0 &&
586 (cmap=PSDictHasEntry(&pc->pdfdict,"ToUnicode"))!=NULL &&
587 (desc=PSDictHasEntry(&pc->pdfdict,"DescendantFonts"))!=NULL &&
588 (pt=PSDictHasEntry(&pc->pdfdict,"BaseFont"))!=NULL) {
589
590 if (*cmap == '[') cmap++;
591 if (*desc == '[') desc++;
592 sscanf(cmap, "%d", &cnum);
593 sscanf(desc, "%d", &dnum);
594 if ( *pt=='/' || *pt=='(' )
595 ++pt;
596 tpt = copy(pt);
597
598 dnum = pdf_getdescendantfont( pc,dnum );
599 if ( dnum > 0 ) {
600 pc->fontobjs[k] = dnum;
601 pc->cmapobjs[k] = cnum;
602 pc->fontnames[k] = tpt;
603 /* Store a flag indicating this particular CMap comes from a CID-keyed */
604 /* font. We can't determine this later just by examining sf->subfontcnt, */
605 /* as FF flattens TTF CID fonts at the time they are loaded, so that */
606 /* they no longer look as CID-keyed fonts */
607 pc->cmap_from_cid[k] = 1;
608 k++;
609 } else {
610 free(tpt);
611 }
612 }
613 }
614 }
615
616 /* List other fonts, skipping those detected at the first pass */
617 for ( i=1; i<pc->ocnt; ++i ) if ( pc->objs[i]!=-1 ) { /* Object 0 is always unused */
618 if ( pdf_findobject(pc,i) && pdf_readdict(pc) ) {
619 if ( (pt=PSDictHasEntry(&pc->pdfdict,"Type"))!=NULL && strcmp(pt,"/Font")==0 &&
620 (PSDictHasEntry(&pc->pdfdict,"FontDescriptor")!=NULL ||
621 ((pt=PSDictHasEntry(&pc->pdfdict,"Subtype"))!=NULL && strcmp(pt,"/Type3")==0)) &&
622 ((pt=PSDictHasEntry(&pc->pdfdict,"BaseFont"))!=NULL ||
623 /* Type3 fonts are named by "Name" rather than BaseFont */
624 (pt=PSDictHasEntry(&pc->pdfdict,"Name"))!=NULL) ) {
625
626 for (j=0; j<k && pc->fontobjs[j] != i; j++);
627 if (j < k )
628 continue;
629
630 if ((cmap=PSDictHasEntry(&pc->pdfdict,"ToUnicode"))!=NULL) {
631 if (*cmap == '[') cmap++;
632 sscanf(cmap, "%d", &cnum);
633 pc->cmapobjs[k] = cnum;
634 }
635 pc->fontobjs[k] = i;
636 if ( *pt=='/' || *pt=='(' )
637 ++pt;
638 pc->fontnames[k++] = tpt = copy(pt);
639 for ( pt=tpt; *pt; ++pt ) {
640 if ( *pt=='#' && ishexdigit(pt[1]) && ishexdigit(pt[2])) {
641 *tpt++ = hex(pt[1],pt[2]);
642 pt += 2;
643 } else
644 *tpt++ = *pt;
645 }
646 *tpt = '\0';
647 }
648 }
649 }
650 pc->fcnt = k;
651 return( k>0 );
652 }
653
pdf_getinteger(char * pt,struct pdfcontext * pc)654 static int pdf_getinteger(char *pt,struct pdfcontext *pc) {
655 int val,ret;
656 long here;
657 FILE *pdf;
658
659 if ( pt==NULL )
660 return( 0 );
661 val = strtol(pt,NULL,10);
662 if ( pt[strlen(pt)-1]!='R' )
663 return( val );
664 if ( val<0 || val>=pc->ocnt || pc->objs[val]==-1 )
665 return( 0 );
666 here = ftell(pc->pdf);
667 if ( here < 0 )
668 return( 0 );
669 if ( !pdf_findobject(pc,val))
670 return( 0 );
671 pdf = pc->compressed ? pc->compressed : pc->pdf;
672 ret = fscanf(pdf,"%d",&val);
673 if ( pc->compressed ) {
674 fclose(pc->compressed );
675 pc->compressed = NULL;
676 }
677 fseek(pc->pdf,here,SEEK_SET);
678 if ( ret!=1 )
679 return( 0 );
680 return( val );
681 }
682
pdf_addpages(struct pdfcontext * pc,int obj)683 static void pdf_addpages(struct pdfcontext *pc, int obj) {
684 /* Object is either a page or a page catalog */
685 char *pt, *end;
686
687 if ( pdf_findobject(pc,obj) && pdf_readdict(pc) ) {
688 if ( (pt=PSDictHasEntry(&pc->pdfdict,"Type"))!=NULL ) {
689 if ( strcmp(pt,"/Page")==0 ) {
690 pc->pages[pc->pcnt++] = obj;
691 } else if ( strcmp(pt,"/Pages")==0 ) {
692 if ( (pt=PSDictHasEntry(&pc->pdfdict,"Kids"))!=NULL ) {
693 char *kids = copy(pt);
694 for ( pt = kids; *pt!=']' && *pt!='\0' ; ) {
695 if ( *pt=='[' || isspace(*pt)) {
696 ++pt;
697 } else {
698 int o = strtol(pt,&end,10);
699 int r;
700 r = strtol(end,&end,10);
701 if ( pt==end )
702 return;
703 pt = end;
704 while ( isspace( *pt )) ++pt;
705 if ( *pt=='R' )
706 ++pt;
707 pdf_addpages(pc,o);
708 }
709 }
710 free(kids);
711 }
712 }
713 }
714 }
715 }
716
pdf_findpages(struct pdfcontext * pc)717 static int pdf_findpages(struct pdfcontext *pc) {
718 FILE *pdf = pc->pdf;
719 long top_ref;
720 /* I could just find all the Page objects, but they would not be in order then */
721
722 if ( pc->root==0 ) return( 0 );
723
724 if ( fseek(pdf,pc->objs[pc->root],SEEK_SET)==0 || \
725 !findkeyword(pdf,"/Pages",">>") || \
726 fscanf(pdf,"%ld",&top_ref)!=1 )
727 return( 0 );
728
729 if ( (pc->pages = malloc(pc->ocnt*sizeof(long)))==NULL ) {
730 NoMoreMemMessage();
731 return( 0 );
732 }
733 pdf_addpages(pc,top_ref);
734 return( pc->pcnt );
735 }
736
737 /* ************************************************************************** */
738 /* *********************** Simplistic filter decoders *********************** */
739 /* ************************************************************************** */
pdf_hexfilter(FILE * to,FILE * from)740 static void pdf_hexfilter(FILE *to,FILE *from) {
741 int ch1, ch2;
742
743 rewind(from);
744 while ( (ch1=getc(from))!=EOF ) {
745 while ( !ishexdigit(ch1) && ch1!=EOF ) ch1 = getc(from);
746 while ( (ch2=getc(from))!=EOF && !ishexdigit(ch2));
747 if ( ch2==EOF )
748 break;
749 putc(hex(ch1,ch2),to);
750 }
751 }
752
pdf_85filter(FILE * to,FILE * from)753 static void pdf_85filter(FILE *to,FILE *from) {
754 int ch1, ch2, ch3, ch4, ch5;
755 unsigned int val;
756 int cnt;
757
758 rewind(from);
759 for (;;) {
760 while ( isspace(ch1=getc(from)));
761 if ( ch1==EOF || ch1=='~' )
762 break;
763 if ( ch1=='z' ) {
764 putc(0,to);
765 putc(0,to);
766 putc(0,to);
767 putc(0,to);
768 } else {
769 while ( isspace(ch2=getc(from)));
770 while ( isspace(ch3=getc(from)));
771 while ( isspace(ch4=getc(from)));
772 while ( isspace(ch5=getc(from)));
773 cnt = 4;
774 if ( ch3=='~' && ch4=='>' ) {
775 cnt=1;
776 ch3 = ch4 = ch5 = '!';
777 } else if ( ch4=='~' && ch5=='>' ) {
778 cnt = 2;
779 ch4 = ch5 = '!';
780 } else if ( ch5=='~' ) {
781 cnt = 3;
782 ch5 = '!';
783 }
784 val = ((((ch1-'!')*85+ ch2-'!')*85 + ch3-'!')*85 + ch4-'!')*85 + ch5-'!';
785 putc(val>>24,to);
786 if ( cnt>1 )
787 putc((val>>16)&0xff,to);
788 if ( cnt>2 )
789 putc((val>>8)&0xff,to);
790 if ( cnt>3 )
791 putc(val&0xff,to);
792 if ( cnt!=4 )
793 break;
794 }
795 }
796 }
797
798
799 # include <zlib.h>
800
801 #define Z_CHUNK 65536
802 /* Copied with few mods from the zlib howto */
pdf_zfilter(FILE * to,FILE * from)803 static int pdf_zfilter(FILE *to,FILE *from) {
804 char *in;
805 char *out;
806 z_stream strm;
807 int ret;
808
809 /* Initialize */
810 rewind(from);
811 memset(&strm,0,sizeof(strm));
812 strm.zalloc = Z_NULL;
813 strm.zfree = Z_NULL;
814 strm.opaque = Z_NULL;
815 strm.avail_in = 0;
816 strm.next_in = Z_NULL;
817 ret = inflateInit(&strm);
818 if (ret != Z_OK) {
819 LogError( _("Flate decompression failed.\n") );
820 return ret;
821 }
822 in = malloc(Z_CHUNK); out = malloc(Z_CHUNK);
823
824 do {
825 strm.avail_in = fread(in,1,Z_CHUNK,from);
826 if ( strm.avail_in==0 )
827 break;
828 strm.next_in = (uint8 *) in;
829 do {
830 strm.avail_out = Z_CHUNK;
831 strm.next_out = (uint8 *) out;
832 ret = inflate(&strm, Z_NO_FLUSH);
833 if ( ret==Z_NEED_DICT || ret==Z_DATA_ERROR || ret==Z_MEM_ERROR ) {
834 (void)inflateEnd(&strm);
835 LogError( _("Flate decompression failed.\n") );
836 return ret;
837 }
838 fwrite(out,1,Z_CHUNK-strm.avail_out,to);
839 } while ( strm.avail_out == 0 );
840 } while ( ret != Z_STREAM_END );
841 (void)inflateEnd(&strm);
842 free(in); free(out);
843 return( ret == Z_STREAM_END ? Z_OK : Z_DATA_ERROR );
844 }
845
pdf_rlefilter(FILE * to,FILE * from)846 static void pdf_rlefilter(FILE *to,FILE *from) {
847 int ch1, ch2, i;
848
849 rewind(from);
850 while ( (ch1=getc(from))!=EOF && ch1!=0x80 ) { /* 0x80 => EOD */
851 if ( ch1<=127 ) {
852 for ( i=0; i<=ch1; ++i ) { /* copy ch1+1 bytes directly */
853 ch2 = getc(from);
854 if ( ch2!=EOF )
855 putc(ch2,to);
856 }
857 } else { /* copy the next by 257-ch1 times */
858 ch2 = getc(from);
859 if ( ch2!=EOF )
860 for ( i=0; i<257-ch1; ++i )
861 putc(ch2,to);
862 }
863 }
864 }
865
866 /* Filters I shall support: ASCIIHexDecode ASCII85Decode FlateDecode RunLengthDecode */
pdf_defilterstream(struct pdfcontext * pc)867 static FILE *pdf_defilterstream(struct pdfcontext *pc) {
868 /* First copy the stream data into a file. This isn't efficient, but */
869 /* we can live with that */
870 /* Then apply each de-filter sequentially reading from one file, writing */
871 /* to another */
872 FILE *res, *old, *pdf = pc->pdf;
873 int i,length,ch;
874 char *pt, *end, *ptDecodeParms;
875
876 if ( pc->compressed!=NULL ) {
877 LogError( _("A pdf stream object may not be a compressed object"));
878 return( NULL );
879 }
880 if ( (pt=PSDictHasEntry(&pc->pdfdict,"Length"))==NULL ) {
881 LogError( _("A pdf stream object is missing a Length attribute"));
882 return( NULL );
883 }
884 length = pdf_getinteger(pt,pc);
885
886 while ( (ch=getc(pdf))!=EOF && ch!='m' ); /* Skip over >>\nstream */
887 if ( (ch=getc(pdf))=='\r' ) ch = getc(pdf); /* Skip the newline */
888
889 res = GFileTmpfile();
890 for ( i=0; i<length; ++i ) {
891 if ( (ch=getc(pdf))!=EOF )
892 putc(ch,res);
893 }
894 rewind(res);
895
896 if ( (pt=PSDictHasEntry(&pc->pdfdict,"Filter"))==NULL )
897 return( res );
898 ptDecodeParms = PSDictHasEntry(&pc->pdfdict,"DecodeParms");
899 while ( *pt==' ' || *pt=='[' || *pt==']' || *pt=='/' ) ++pt; /* Yes, I saw a null array once */
900 while ( *pt!='\0' ) {
901 for ( end=pt; isalnum(*end); ++end );
902 ch = *end; *end = '\0';
903 old = res;
904 res = GFileTmpfile();
905 if ( strmatch("ASCIIHexDecode",pt)==0 ) {
906 pdf_hexfilter(res,old);
907 pt += strlen("ASCIIHexDecode");
908 } else if ( strmatch("ASCII85Decode",pt)==0 ) {
909 pdf_85filter(res,old);
910 pt += strlen("ASCII85Decode");
911 } else if ( strmatch("FlateDecode",pt)==0) {
912 if ( ptDecodeParms!=NULL ) {
913 LogError( _("Unsupported decode filter parameters : %s"), ptDecodeParms );
914 fclose(old); fclose(res);
915 return( NULL );
916 }
917 pdf_zfilter(res,old);
918 pt += strlen("FlateDecode");
919 } else if ( strmatch("RunLengthDecode",pt)==0 ) {
920 pdf_rlefilter(res,old);
921 pt += strlen("RunLengthDecode");
922 } else {
923 LogError( _("Unsupported filter: %s"), pt );
924 fclose(old); fclose(res);
925 return( NULL );
926 }
927 *end = ch;
928 pt = end;
929 while ( *pt==' ' || *pt==']' || *pt=='/' ) ++pt;
930 fclose(old);
931 }
932 return( res );
933 }
934 /* ************************************************************************** */
935 /* ****************************** End filters ******************************* */
936 /* ************************************************************************** */
937
938 /* ************************************************************************** */
939 /* ****************************** xref streams ****************************** */
940 /* ************************************************************************** */
getuvalue(FILE * f,int len,long * val)941 static int getuvalue(FILE *f, int len, long *val) {
942 /* Get a big endian binary value from file. Return 0 if okay, and -1 if error */
943 int ch;
944
945 *val = 0;
946 while ( --len>=0 ) {
947 if ( (ch=getc(f))<0 ) return( 1 );
948 *val = (*val<<8) | ch;
949 }
950 return( 0 );
951 }
952
FindObjectsFromXREFObject(struct pdfcontext * pc,long prev_xref)953 static long *FindObjectsFromXREFObject(struct pdfcontext *pc, long prev_xref) {
954 char *pt;
955 long *ret, *ret_old, *sub_old;
956 int *gen, *gen_old;
957 long cnt = 0, i, start, num;
958 int bar;
959 int typewidth, offwidth, genwidth;
960 long type, offset, gennum;
961 FILE *xref_stream, *pdf = pc->pdf;
962
963 while ( prev_xref!=-1 ) {
964 if ( fseek(pdf,prev_xref,SEEK_SET)!=0 ) return( NULL );
965 pdf_skipobjectheader(pc);
966 if ( !pdf_readdict(pc))
967 return( NULL );
968 if ( (pt=PSDictHasEntry(&pc->pdfdict,"Type"))==NULL || strcmp(pt,"/XRef")!=0 )
969 return( NULL );
970 if ( (pt=PSDictHasEntry(&pc->pdfdict,"Size"))==NULL )
971 return( NULL );
972 else {
973 start = 0;
974 num = pdf_getinteger(pt,pc);
975 }
976 if ( (pt=PSDictHasEntry(&pc->pdfdict,"Index"))!=NULL ) {
977 if ( sscanf(pt,"[%ld %ld]",&start,&num)!=2 )
978 return( NULL );
979 }
980 if ( (pt=PSDictHasEntry(&pc->pdfdict,"W"))==NULL )
981 return( NULL );
982 else {
983 if ( sscanf(pt,"[%d %d %d]",&typewidth,&offwidth,&genwidth )!=3 )
984 return( NULL );
985 }
986 if ( (pt=PSDictHasEntry(&pc->pdfdict,"Encrypt"))!=NULL ) {
987 if ( sscanf( pt, "%d %d", &pc->enc_dict, &bar )==2 )
988 pc->encrypted = true;
989 }
990 if ( pc->root == 0 && (pt=PSDictHasEntry(&pc->pdfdict,"Root"))!=NULL ) {
991 fscanf( pdf, "%d %d", &pc->root, &bar );
992 }
993 prev_xref = -1;
994 if ( (pt=PSDictHasEntry(&pc->pdfdict,"Prev"))!=NULL ) {
995 prev_xref = strtol(pt,NULL,0);
996 }
997 /* I ignore Info */
998
999 cnt=0; ret=NULL; gen=NULL; /* no objects to return yet */
1000 if ( start+num>cnt ) {
1001 /* increase memory needed for objects. Mark last location = -2 */
1002 ret_old=ret; gen_old=gen; sub_old=pc->subindex;
1003 pc->ocnt=(int)(start+num);
1004 ret = realloc(ret,(start+num+1)*sizeof(long));
1005 pc->subindex = realloc(pc->subindex,(start+num+1)*sizeof(long));
1006 gen = realloc(gen,(start+num)*sizeof(int));
1007 if ( ret==NULL || gen==NULL || pc->subindex==NULL || pc->ocnt!=start+num ) {
1008 if ( ret==NULL ) ret=ret_old;
1009 if ( pc->subindex==NULL ) pc->subindex=sub_old;
1010 if ( gen==NULL ) gen=gen_old;
1011 NoMoreMemMessage();
1012 goto FindObjectsFromXREFObjectError_ReleaseMemAndExit;
1013 }
1014 memset(ret+cnt,-1,sizeof(long)*(start+num-cnt));
1015 memset(pc->subindex+cnt,-1,sizeof(long)*(start+num-cnt));
1016 memset(gen+cnt,-1,sizeof(int)*(start+num-cnt));
1017 cnt = start+num;
1018 ret[cnt] = -2;
1019 }
1020 /* Now gather the cross references from their stream */
1021 if ( (xref_stream=pdf_defilterstream(pc))==NULL )
1022 goto FindObjectsFromXREFObjectError_ReleaseMemAndExit;
1023 if ( fseek(xref_stream,0,SEEK_SET)!=0 ) {
1024 fclose(xref_stream); /* failed to rewind(xref_stream) */
1025 goto FindObjectsFromXREFObjectError_ReleaseMemAndExit;
1026 }
1027 for ( i=start; i<start+num; ++i ) {
1028 if ( getuvalue(xref_stream,typewidth,&type) || \
1029 getuvalue(xref_stream,offwidth,&offset) || \
1030 getuvalue(xref_stream,genwidth,&gennum) ) {
1031 fclose(xref_stream);
1032 goto FindObjectsFromXREFObjectError_ReleaseMemAndExit;
1033 }
1034 if ( type==0 ) {
1035 if ( gennum > gen[i] ) {
1036 ret[i] = -1;
1037 gen[i] = gennum;
1038 }
1039 } else if ( type==1 ) {
1040 if ( gennum > gen[i] ) {
1041 ret[i] = offset;
1042 gen[i] = gennum;
1043 }
1044 } else if ( type==2 ) {
1045 if ( 0 > gen[i] ) {
1046 ret[i] = offset; /* containing object # */
1047 pc->subindex[i] = gennum;
1048 gen[i] = 0;
1049 }
1050 }
1051 }
1052 fclose(xref_stream);
1053 }
1054 free( gen );
1055 return( ret );
1056
1057 FindObjectsFromXREFObjectError_ReleaseMemAndExit:
1058 /* error occurred, therefore release objects and return with NULL */
1059 free(ret); free(pc->subindex); pc->subindex=NULL; free(gen);
1060 pc->ocnt = 0;
1061 return( NULL );
1062 }
1063 /* ************************************************************************** */
1064 /* **************************** End xref streams **************************** */
1065 /* ************************************************************************** */
1066
1067 /* ************************************************************************** */
1068 /* *********************** pdf graphics interpreter ************************* */
1069 /* ************************************************************************** */
1070
1071 /* Stolen from the PS interpreter and then modified beyond all recognition */
1072 enum pstoks { pt_eof=-1, pt_moveto, pt_lineto, pt_curveto, pt_vcurveto,
1073 pt_ycurveto, pt_closepath, pt_rect,
1074 pt_gsave, pt_grestore, pt_concat, pt_setlinewidth, pt_setlinecap, pt_setlinejoin,
1075 pt_setmiterlimit, pt_setdash,
1076 pt_stroke, pt_closestroke, pt_fillnz, pt_filleo, pt_fillstrokenz,
1077 pt_fillstrokeeo, pt_closefillstrokenz, pt_closefillstrokeeo, pt_paintnoop,
1078 pt_setcachedevice, pt_setcharwidth,
1079 pt_strokecolor, pt_fillcolor, pt_setgreystroke, pt_setgreyfill,
1080 pt_setrgbstroke, pt_setrgbfill,
1081
1082 pt_true, pt_false,
1083
1084 pt_opencurly, pt_closecurly, pt_openarray, pt_closearray, pt_string,
1085 pt_number, pt_unknown, pt_namelit
1086 };
1087
1088 static char *toknames[] = { "m", "l", "c", "v",
1089 "y", "h", "re",
1090 "q", "Q", "cm", "w", "j", "J",
1091 "M", "d",
1092 "S", "s", "f" /* "F" is an alternate form for "f"*/, "f*", "B",
1093 "B*", "b", "b*", "n",
1094 "d1", "d0",
1095 "SC", "sc", "G", "g",
1096 "RG", "rg",
1097
1098 "true", "false",
1099
1100 "opencurly", "closecurly", "openarray", "closearray", "string",
1101 "number", "unknown", "namelit", "=", "==",
1102 NULL };
1103
nextpdftoken(FILE * file,real * val,char * tokbuf,int tbsize)1104 static int nextpdftoken(FILE *file, real *val, char *tokbuf, int tbsize) {
1105 int ch, r, i;
1106 char *pt, *end;
1107
1108 /* Eat whitespace and comments. Comments last to eol */
1109 while ( 1 ) {
1110 while ( isspace(ch = getc(file)) );
1111 if ( ch!='%' )
1112 break;
1113 while ( (ch=getc(file))!=EOF && ch!='\r' && ch!='\n' );
1114 }
1115
1116 if ( ch==EOF )
1117 return( pt_eof );
1118
1119 pt = tokbuf;
1120 end = pt+tbsize-1;
1121 *pt++ = ch; *pt='\0';
1122
1123 if ( ch=='(' ) {
1124 int nest=1, quote=0;
1125 while ( (ch=getc(file))!=EOF ) {
1126 if ( pt<end ) *pt++ = ch;
1127 if ( quote )
1128 quote=0;
1129 else if ( ch=='(' )
1130 ++nest;
1131 else if ( ch==')' ) {
1132 if ( --nest==0 )
1133 break;
1134 } else if ( ch=='\\' )
1135 quote = 1;
1136 }
1137 *pt='\0';
1138 return( pt_string );
1139 } else if ( ch=='<' ) {
1140 ch = getc(file);
1141 if ( pt<end ) *pt++ = ch;
1142 if ( ch=='>' )
1143 /* Done */;
1144 else if ( ch!='~' ) {
1145 while ( (ch=getc(file))!=EOF && ch!='>' )
1146 if ( pt<end ) *pt++ = ch;
1147 } else {
1148 int twiddle=0;
1149 while ( (ch=getc(file))!=EOF ) {
1150 if ( pt<end ) *pt++ = ch;
1151 if ( ch=='~' ) twiddle = 1;
1152 else if ( twiddle && ch=='>' )
1153 break;
1154 else twiddle = 0;
1155 }
1156 }
1157 *pt='\0';
1158 return( pt_string );
1159 } else if ( ch==')' || ch=='>' || ch=='[' || ch==']' || ch=='{' || ch=='}' ) {
1160 if ( ch=='{' )
1161 return( pt_opencurly );
1162 else if ( ch=='}' )
1163 return( pt_closecurly );
1164 if ( ch=='[' )
1165 return( pt_openarray );
1166 else if ( ch==']' )
1167 return( pt_closearray );
1168
1169 return( pt_unknown ); /* single character token */
1170 } else if ( ch=='/' ) {
1171 pt = tokbuf;
1172 while ( (ch=getc(file))!=EOF && !isspace(ch) && ch!='%' &&
1173 ch!='(' && ch!=')' && ch!='<' && ch!='>' && ch!='[' && ch!=']' &&
1174 ch!='{' && ch!='}' && ch!='/' )
1175 if ( pt<tokbuf+tbsize-2 )
1176 *pt++ = ch;
1177 *pt = '\0';
1178 ungetc(ch,file);
1179 return( pt_namelit ); /* name literal */
1180 } else {
1181 while ( (ch=getc(file))!=EOF && !isspace(ch) && ch!='%' &&
1182 ch!='(' && ch!=')' && ch!='<' && ch!='>' && ch!='[' && ch!=']' &&
1183 ch!='{' && ch!='}' && ch!='/' ) {
1184 if ( pt<tokbuf+tbsize-2 )
1185 *pt++ = ch;
1186 }
1187 *pt = '\0';
1188 ungetc(ch,file);
1189 r = strtol(tokbuf,&end,10);
1190 pt = end;
1191 if ( *pt=='\0' ) { /* It's a normal integer */
1192 *val = r;
1193 return( pt_number );
1194 } else if ( *pt=='#' ) {
1195 r = strtol(pt+1,&end,r);
1196 if ( *end=='\0' ) { /* It's a radix integer */
1197 *val = r;
1198 return( pt_number );
1199 }
1200 } else {
1201 *val = strtod(tokbuf,&end);
1202 if ( !isfinite(*val) ) {
1203 /* GT: NaN is a concept in IEEE floating point which means "Not a Number" */
1204 /* GT: it is used to represent errors like 0/0 or sqrt(-1). */
1205 LogError( _("Bad number, infinity or nan: %s\n"), tokbuf );
1206 *val = 0;
1207 }
1208 if ( *end=='\0' ) /* It's a real */
1209 return( pt_number );
1210 }
1211 /* It's not a number */
1212 for ( i=0; toknames[i]!=NULL; ++i )
1213 if ( strcmp(tokbuf,toknames[i])==0 )
1214 return( i );
1215
1216 return( pt_unknown );
1217 }
1218 }
1219
Transform(BasePoint * to,BasePoint * from,real trans[6])1220 static void Transform(BasePoint *to, BasePoint *from, real trans[6]) {
1221 to->x = trans[0]*from->x+trans[2]*from->y+trans[4];
1222 to->y = trans[1]*from->x+trans[3]*from->y+trans[5];
1223 }
1224
EntityCreate(SplinePointList * head,int linecap,int linejoin,real linewidth,real * transform)1225 static Entity *EntityCreate(SplinePointList *head,int linecap,int linejoin,
1226 real linewidth, real *transform) {
1227 Entity *ent = calloc(1,sizeof(Entity));
1228 ent->type = et_splines;
1229 ent->u.splines.splines = head;
1230 ent->u.splines.cap = linecap;
1231 ent->u.splines.join = linejoin;
1232 ent->u.splines.stroke_width = linewidth;
1233 ent->u.splines.miterlimit = 10.0; // PostScript Spec Default
1234 ent->u.splines.fill.col = 0xffffffff;
1235 ent->u.splines.stroke.col = 0xffffffff;
1236 ent->u.splines.fill.opacity = 1.0;
1237 ent->u.splines.stroke.opacity = 1.0;
1238 memcpy(ent->u.splines.transform,transform,6*sizeof(real));
1239 return( ent );
1240 }
1241
ECCategorizePoints(EntityChar * ec)1242 static void ECCategorizePoints( EntityChar *ec ) {
1243 Entity *ent;
1244
1245 for ( ent=ec->splines; ent!=NULL; ent=ent->next ) if ( ent->type == et_splines ) {
1246 SPLCategorizePoints( ent->u.splines.splines );
1247 }
1248 }
1249
dictfree(struct pskeydict * dict)1250 static void dictfree(struct pskeydict *dict) {
1251 int i;
1252
1253 for ( i=0; i<dict->cnt; ++i ) {
1254 if ( dict->entries[i].type==ps_string || dict->entries[i].type==ps_instr ||
1255 dict->entries[i].type==ps_lit )
1256 free(dict->entries[i].u.str);
1257 else if ( dict->entries[i].type==ps_array || dict->entries[i].type==ps_dict )
1258 dictfree(&dict->entries[i].u.dict);
1259 }
1260 }
1261
freestuff(struct psstack * stack,int sp)1262 static void freestuff(struct psstack *stack, int sp) {
1263 int i;
1264
1265 for ( i=0; i<sp; ++i ) {
1266 if ( stack[i].type==ps_string || stack[i].type==ps_instr ||
1267 stack[i].type==ps_lit )
1268 free(stack[i].u.str);
1269 else if ( stack[i].type==ps_array || stack[i].type==ps_dict )
1270 dictfree(&stack[i].u.dict);
1271 }
1272 }
1273
_InterpretPdf(FILE * in,struct pdfcontext * pc,EntityChar * ec)1274 static void _InterpretPdf(FILE *in, struct pdfcontext *pc, EntityChar *ec) {
1275 SplinePointList *cur=NULL, *head=NULL;
1276 BasePoint current;
1277 int tok, i, j;
1278 struct psstack stack[100];
1279 real dval;
1280 int sp=0;
1281 SplinePoint *pt;
1282 real transform[6], t[6];
1283 struct graphicsstate {
1284 real transform[6];
1285 BasePoint current;
1286 real linewidth;
1287 int linecap, linejoin;
1288 Color fore_stroke, fore_fill;
1289 DashType dashes[DASH_MAX];
1290 } gsaves[30];
1291 int gsp = 0;
1292 Color fore_stroke=COLOR_INHERITED, fore_fill=COLOR_INHERITED;
1293 int linecap=lc_inherited, linejoin=lj_inherited; real linewidth=WIDTH_INHERITED;
1294 DashType dashes[DASH_MAX];
1295 int dash_offset = 0;
1296 Entity *ent;
1297 char tokbuf[100];
1298 const int tokbufsize = 100;
1299
1300 locale_t tmplocale; locale_t oldlocale; // Declare temporary locale storage.
1301 switch_to_c_locale(&tmplocale, &oldlocale); // Switch to the C locale temporarily and cache the old locale.
1302
1303 transform[0] = transform[3] = 1.0;
1304 transform[1] = transform[2] = transform[4] = transform[5] = 0;
1305 current.x = current.y = 0;
1306 dashes[0] = 0; dashes[1] = DASH_INHERITED;
1307
1308 while ( (tok = nextpdftoken(in,&dval,tokbuf,tokbufsize))!=pt_eof ) {
1309 switch ( tok ) {
1310 case pt_number:
1311 if ( sp<sizeof(stack)/sizeof(stack[0]) ) {
1312 stack[sp].type = ps_num;
1313 stack[sp++].u.val = dval;
1314 }
1315 break;
1316 case pt_string:
1317 if ( sp<sizeof(stack)/sizeof(stack[0]) ) {
1318 stack[sp].type = ps_string;
1319 stack[sp++].u.str = copyn(tokbuf+1,strlen(tokbuf)-2);
1320 }
1321 break;
1322 case pt_namelit:
1323 if ( sp<sizeof(stack)/sizeof(stack[0]) ) {
1324 stack[sp].type = ps_lit;
1325 stack[sp++].u.str = copy(tokbuf);
1326 }
1327 break;
1328 case pt_true: case pt_false:
1329 if ( sp<sizeof(stack)/sizeof(stack[0]) ) {
1330 stack[sp].type = ps_bool;
1331 stack[sp++].u.tf = tok==pt_true;
1332 }
1333 break;
1334 case pt_openarray:
1335 if ( sp<sizeof(stack)/sizeof(stack[0]) ) {
1336 stack[sp++].type = ps_mark;
1337 }
1338 break;
1339 case pt_closearray:
1340 for ( i=0; i<sp; ++i )
1341 if ( stack[sp-1-i].type==ps_mark )
1342 break;
1343 if ( i==sp )
1344 LogError( _("No mark in ] (close array)\n") );
1345 else {
1346 struct pskeydict dict;
1347 dict.cnt = dict.max = i;
1348 dict.entries = calloc(i,sizeof(struct pskeyval));
1349 dict.is_executable = false;
1350 for ( j=0; j<i; ++j ) {
1351 dict.entries[j].type = stack[sp-i+j].type;
1352 dict.entries[j].u = stack[sp-i+j].u;
1353 /* don't need to copy because the things on the stack */
1354 /* are being popped (don't need to free either) */
1355 }
1356 sp = sp-i;
1357 stack[sp-1].type = ps_array;
1358 stack[sp-1].u.dict = dict;
1359 }
1360 break;
1361 case pt_setcachedevice:
1362 if ( sp>=6 ) {
1363 ec->width = stack[sp-6].u.val;
1364 ec->vwidth = stack[sp-5].u.val;
1365 /* I don't care about the bounding box */
1366 sp-=6;
1367 }
1368 break;
1369 case pt_setcharwidth:
1370 if ( sp>=2 )
1371 ec->width = stack[sp-=2].u.val;
1372 break;
1373 case pt_concat:
1374 if ( sp>=1 ) {
1375 if ( stack[sp-1].type==ps_array ) {
1376 if ( stack[sp-1].u.dict.cnt==6 && stack[sp-1].u.dict.entries[0].type==ps_num ) {
1377 --sp;
1378 t[5] = stack[sp].u.dict.entries[5].u.val;
1379 t[4] = stack[sp].u.dict.entries[4].u.val;
1380 t[3] = stack[sp].u.dict.entries[3].u.val;
1381 t[2] = stack[sp].u.dict.entries[2].u.val;
1382 t[1] = stack[sp].u.dict.entries[1].u.val;
1383 t[0] = stack[sp].u.dict.entries[0].u.val;
1384 dictfree(&stack[sp].u.dict);
1385 MatMultiply(t,transform,transform);
1386 }
1387 }
1388 }
1389 break;
1390 case pt_setmiterlimit:
1391 sp = 0; /* don't interpret, just ignore */
1392 break;
1393 case pt_setlinecap:
1394 if ( sp>=1 )
1395 linecap = stack[--sp].u.val;
1396 break;
1397 case pt_setlinejoin:
1398 if ( sp>=1 )
1399 linejoin = stack[--sp].u.val;
1400 break;
1401 case pt_setlinewidth:
1402 if ( sp>=1 )
1403 linewidth = stack[--sp].u.val;
1404 break;
1405 case pt_setdash:
1406 if ( sp>=2 && stack[sp-1].type==ps_num && stack[sp-2].type==ps_array ) {
1407 sp -= 2;
1408 dash_offset = stack[sp+1].u.val;
1409 for ( i=0; i<DASH_MAX && i<stack[sp].u.dict.cnt; ++i )
1410 dashes[i] = stack[sp].u.dict.entries[i].u.val;
1411 dictfree(&stack[sp].u.dict);
1412 }
1413 break;
1414 case pt_setgreystroke:
1415 if ( sp>=1 ) {
1416 fore_stroke = stack[--sp].u.val*255;
1417 fore_stroke *= 0x010101;
1418 }
1419 break;
1420 case pt_setgreyfill:
1421 if ( sp>=1 ) {
1422 fore_fill = stack[--sp].u.val*255;
1423 fore_fill *= 0x010101;
1424 }
1425 break;
1426 case pt_setrgbstroke:
1427 if ( sp>=3 ) {
1428 fore_stroke = (((int) (stack[sp-3].u.val*255))<<16) +
1429 (((int) (stack[sp-2].u.val*255))<<8) +
1430 (int) (stack[sp-1].u.val*255);
1431 sp -= 3;
1432 }
1433 break;
1434 case pt_setrgbfill:
1435 if ( sp>=3 ) {
1436 fore_fill = (((int) (stack[sp-3].u.val*255))<<16) +
1437 (((int) (stack[sp-2].u.val*255))<<8) +
1438 (int) (stack[sp-1].u.val*255);
1439 sp -= 3;
1440 }
1441 break;
1442 case pt_lineto:
1443 case pt_moveto:
1444 if ( sp>=2 ) {
1445 current.x = stack[sp-2].u.val;
1446 current.y = stack[sp-1].u.val;
1447 sp -= 2;
1448 pt = chunkalloc(sizeof(SplinePoint));
1449 Transform(&pt->me,¤t,transform);
1450 pt->noprevcp = true; pt->nonextcp = true;
1451 if ( tok==pt_moveto ) {
1452 SplinePointList *spl = chunkalloc(sizeof(SplinePointList));
1453 spl->first = spl->last = pt;
1454 if ( cur!=NULL )
1455 cur->next = spl;
1456 else
1457 head = spl;
1458 cur = spl;
1459 } else {
1460 if ( cur!=NULL && cur->first!=NULL && (cur->first!=cur->last || cur->first->next==NULL) ) {
1461 SplineMake3(cur->last,pt);
1462 cur->last = pt;
1463 }
1464 }
1465 } else
1466 sp = 0;
1467 break;
1468 case pt_curveto: case pt_vcurveto: case pt_ycurveto:
1469 if ( (sp>=6 && tok==pt_curveto) || (sp>=4 && tok!=pt_curveto)) {
1470 BasePoint ncp, pcp, to;
1471 to.x = stack[sp-2].u.val;
1472 to.y = stack[sp-1].u.val;
1473 if ( tok==pt_curveto ) {
1474 ncp.x = stack[sp-6].u.val;
1475 ncp.y = stack[sp-5].u.val;
1476 pcp.x = stack[sp-4].u.val;
1477 pcp.y = stack[sp-3].u.val;
1478 } else if ( tok==pt_vcurveto ) {
1479 ncp = current;
1480 pcp.x = stack[sp-4].u.val;
1481 pcp.y = stack[sp-3].u.val;
1482 } else if ( tok==pt_ycurveto ) {
1483 pcp = to;
1484 ncp.x = stack[sp-4].u.val;
1485 ncp.y = stack[sp-3].u.val;
1486 }
1487 current = to;
1488 if ( cur!=NULL && cur->first!=NULL && (cur->first!=cur->last || cur->first->next==NULL) ) {
1489 Transform(&cur->last->nextcp,&ncp,transform);
1490 cur->last->nonextcp = false;
1491 pt = chunkalloc(sizeof(SplinePoint));
1492 Transform(&pt->prevcp,&pcp,transform);
1493 Transform(&pt->me,¤t,transform);
1494 pt->nonextcp = true;
1495 SplineMake3(cur->last,pt);
1496 cur->last = pt;
1497 }
1498 }
1499 sp = 0;
1500 break;
1501 case pt_rect:
1502 if ( sp>=4 ) {
1503 SplinePointList *spl = chunkalloc(sizeof(SplinePointList));
1504 SplinePoint *first, *second, *third, *fourth;
1505 BasePoint temp1, temp2;
1506 spl->first = spl->last = pt;
1507 if ( cur!=NULL )
1508 cur->next = spl;
1509 else
1510 head = spl;
1511 cur = spl;
1512 temp1.x = stack[sp-4].u.val; temp1.y = stack[sp-3].u.val;
1513 Transform(&temp2,&temp1,transform);
1514 first = SplinePointCreate(temp2.x,temp2.y);
1515 temp1.x += stack[sp-2].u.val;
1516 Transform(&temp2,&temp1,transform);
1517 second = SplinePointCreate(temp2.x,temp2.y);
1518 temp1.y += stack[sp-3].u.val;
1519 Transform(&temp2,&temp1,transform);
1520 third = SplinePointCreate(temp2.x,temp2.y);
1521 temp1.x = stack[sp-4].u.val;
1522 Transform(&temp2,&temp1,transform);
1523 fourth = SplinePointCreate(temp2.x,temp2.y);
1524 cur->first = cur->last = first;
1525 SplineMake3(first,second);
1526 SplineMake3(second,third);
1527 SplineMake3(third,fourth);
1528 SplineMake3(fourth,first);
1529 current = temp1;
1530 }
1531 sp = 0;
1532 break;
1533 case pt_closepath:
1534 case pt_stroke: case pt_closestroke: case pt_fillnz: case pt_filleo:
1535 case pt_fillstrokenz: case pt_fillstrokeeo: case pt_closefillstrokenz:
1536 case pt_closefillstrokeeo: case pt_paintnoop:
1537 if ( tok==pt_closepath || tok==pt_closestroke ||
1538 tok==pt_closefillstrokenz || tok==pt_closefillstrokeeo ) {
1539 if ( cur!=NULL && cur->first!=NULL && cur->first!=cur->last ) {
1540 if ( cur->first->me.x==cur->last->me.x && cur->first->me.y==cur->last->me.y ) {
1541 SplinePoint *oldlast = cur->last;
1542 cur->first->prevcp = oldlast->prevcp;
1543 cur->first->noprevcp = false;
1544 oldlast->prev->from->next = NULL;
1545 cur->last = oldlast->prev->from;
1546 SplineFree(oldlast->prev);
1547 SplinePointFree(oldlast);
1548 }
1549 SplineMake3(cur->last,cur->first);
1550 cur->last = cur->first;
1551 }
1552 }
1553 if ( tok==pt_closepath )
1554 break;
1555 else if ( tok==pt_paintnoop ) {
1556 SplinePointListsFree(head);
1557 head = cur = NULL;
1558 break;
1559 }
1560 ent = EntityCreate(head,linecap,linejoin,linewidth,transform);
1561 ent->next = ec->splines;
1562 ec->splines = ent;
1563 if ( tok==pt_stroke || tok==pt_closestroke || tok==pt_fillstrokenz ||
1564 tok==pt_fillstrokeeo || tok==pt_closefillstrokenz ||
1565 tok==pt_closefillstrokeeo )
1566 ent->u.splines.stroke.col = fore_stroke;
1567 if ( tok==pt_fillnz || tok==pt_filleo || tok==pt_fillstrokenz ||
1568 tok==pt_fillstrokeeo || tok==pt_closefillstrokenz ||
1569 tok==pt_closefillstrokeeo )
1570 ent->u.splines.fill.col = fore_fill;
1571 head = NULL; cur = NULL;
1572 break;
1573 case pt_gsave:
1574 if ( gsp<30 ) {
1575 memcpy(gsaves[gsp].transform,transform,sizeof(transform));
1576 gsaves[gsp].current = current;
1577 gsaves[gsp].linewidth = linewidth;
1578 gsaves[gsp].linecap = linecap;
1579 gsaves[gsp].linejoin = linejoin;
1580 gsaves[gsp].fore_stroke = fore_stroke;
1581 gsaves[gsp].fore_fill = fore_fill;
1582 ++gsp;
1583 /* Unlike PS does not! save current path */
1584 }
1585 break;
1586 case pt_grestore:
1587 if ( gsp>0 ) {
1588 --gsp;
1589 memcpy(transform,gsaves[gsp].transform,sizeof(transform));
1590 current = gsaves[gsp].current;
1591 linewidth = gsaves[gsp].linewidth;
1592 linecap = gsaves[gsp].linecap;
1593 linejoin = gsaves[gsp].linejoin;
1594 fore_stroke = gsaves[gsp].fore_stroke;
1595 fore_fill = gsaves[gsp].fore_fill;
1596 }
1597 break;
1598 default:
1599 sp=0;
1600 break;
1601 }
1602 }
1603 freestuff(stack,sp);
1604 if ( head!=NULL ) {
1605 ent = EntityCreate(head,linecap,linejoin,linewidth,transform);
1606 ent->next = ec->splines;
1607 ec->splines = ent;
1608 }
1609 ECCategorizePoints(ec);
1610 switch_to_old_locale(&tmplocale, &oldlocale); // Switch to the cached locale.
1611 }
1612
pdf_InterpretSC(struct pdfcontext * pc,char * glyphname,char * objnum,int * flags)1613 static SplineChar *pdf_InterpretSC(struct pdfcontext *pc,char *glyphname,
1614 char *objnum, int *flags) {
1615 int gn = strtol(objnum,NULL,10);
1616 EntityChar ec;
1617 FILE *glyph_stream;
1618 SplineChar *sc;
1619
1620 if ( gn<=0 || gn>=pc->ocnt || pc->objs[gn]==-1 )
1621 goto fail;
1622 if ( pdf_findobject(pc,gn) && !pdf_readdict(pc) )
1623 goto fail;
1624 glyph_stream = pdf_defilterstream(pc);
1625 if ( glyph_stream==NULL )
1626 return( NULL );
1627 rewind(glyph_stream);
1628
1629 memset(&ec,'\0',sizeof(ec));
1630 ec.fromtype3 = true;
1631 ec.sc = sc = SplineCharCreate(2);
1632 sc->name = copy(glyphname);
1633
1634 _InterpretPdf(glyph_stream,pc,&ec);
1635 sc->width = ec.width;
1636 sc->layer_cnt = 1;
1637 SCAppendEntityLayers(sc,ec.splines,ImportParamsState());
1638 if ( sc->layer_cnt==1 ) ++sc->layer_cnt;
1639
1640 fclose(glyph_stream);
1641 return( sc );
1642
1643 fail:
1644 LogError( _("Syntax error while parsing type3 glyph: %s"), glyphname );
1645 return( NULL );
1646 }
1647
pdf_InterpretEntity(struct pdfcontext * pc,int page_num)1648 static Entity *pdf_InterpretEntity(struct pdfcontext *pc,int page_num) {
1649 EntityChar ec;
1650 SplineChar dummy;
1651 FILE *glyph_stream;
1652 char *pt;
1653 int content;
1654
1655 if ( !pdf_findobject(pc,pc->pages[page_num]) || !pdf_readdict(pc) ) {
1656 LogError( _("Syntax error while parsing pdf graphics"));
1657 return( NULL );
1658 }
1659 if ( (pt=PSDictHasEntry(&pc->pdfdict,"Contents"))==NULL ||
1660 sscanf(pt,"%d",&content)!=1 ) {
1661 LogError( _("Syntax error while parsing pdf graphics: Page with no Contents"));
1662 return( NULL );
1663 }
1664 if ( !pdf_findobject(pc,content) || !pdf_readdict(pc) ) {
1665 LogError( _("Syntax error while parsing pdf graphics"));
1666 return( NULL );
1667 }
1668 glyph_stream = pdf_defilterstream(pc);
1669 if ( glyph_stream==NULL )
1670 return( NULL );
1671 rewind(glyph_stream);
1672
1673 memset(&ec,'\0',sizeof(ec));
1674 memset(&dummy,0,sizeof(dummy));
1675 ec.fromtype3 = true;
1676 ec.sc = &dummy;
1677 dummy.name = "Nameless glyph";
1678
1679 _InterpretPdf(glyph_stream,pc,&ec);
1680
1681 fclose(glyph_stream);
1682 return( ec.splines );
1683 }
1684
1685 /* ************************************************************************** */
1686 /* ****************************** End graphics ****************************** */
1687 /* ************************************************************************** */
1688
add_mapping(SplineFont * basesf,long * mappings,int * uvals,int nuni,int gid,int cmap_from_cid,int cur)1689 static void add_mapping(SplineFont *basesf, long *mappings, int *uvals, int nuni, int gid, int cmap_from_cid, int cur) {
1690 int i, ndups, pos;
1691 char suffix[8], *name, *nname, buffer[400];
1692 SplineFont *sf = basesf->subfontcnt > 0 ? basesf->subfonts[0] : basesf;
1693 struct altuni *altuni, *prev;
1694 SplineChar *sc;
1695
1696 name = copy(StdGlyphName(buffer,uvals[0],sf->uni_interp,sf->for_new_glyphs));
1697 name = realloc(name,strlen(name)+8);
1698 for (i = 1; i<nuni; i++) {
1699 nname = copy(StdGlyphName(buffer,uvals[i],sf->uni_interp,sf->for_new_glyphs));
1700 name = realloc(name,strlen(name)+strlen(nname)+10);
1701 strcat(name, "_");
1702 strcat(name, nname);
1703 free(nname);
1704 }
1705 ndups = 0;
1706 for (i=0; i < cur; i++) {
1707 if (mappings[i] == mappings[cur]) ndups++;
1708 }
1709 if (ndups) {
1710 sprintf(suffix, ".alt%d", ndups);
1711 strcat(name, suffix);
1712 }
1713
1714 /* embedded TTF fonts may contain a 8-bit cmap table, denoted as platform ID 1 format 0 */
1715 /* (Apple). In fact this mapping has nothing to do both with Unicode and Apple, and rather */
1716 /* stores a custom order used to refer to glyphs from this particular PDF. */
1717 /* If such a mapping is present, then GIDs used in the ToUnicode Cmap array will correspond */
1718 /* to "Unicode" values it specifies rather than to the real order in which the glyphs are */
1719 /* stored in the file */
1720 pos = cmap_from_cid || sf->map == NULL ? gid : sf->map->map[gid];
1721 sc = sf->glyphs[pos];
1722
1723 if (pos >= 0 && pos < sf->glyphcnt && (sc->unicodeenc != uvals[0] || nuni > 1)) {
1724 /* Sometimes FF instead of assigning proper Unicode values to TTF glyphs keeps */
1725 /* them encoded to the same codepoint, but creates for each glyph an alternate */
1726 /* encoding, corresponding to the position this glyph has in the font's encoding */
1727 /* map. As we are going to reencode the glyph anyway, we should remove those weird */
1728 /* AltUni's first */
1729 if (!cmap_from_cid) {
1730 for ( altuni = sc->altuni, prev = NULL; altuni!=NULL; prev = altuni, altuni = altuni->next ) {
1731 if ( altuni->vs == -1 && altuni->unienc == gid)
1732 break;
1733 }
1734 if ( altuni ) {
1735 if ( prev==NULL )
1736 sc->altuni = altuni->next;
1737 else
1738 prev->next = altuni->next;
1739 altuni->next = NULL;
1740 AltUniFree(altuni);
1741 }
1742 }
1743 free(sc->name);
1744 sc->name = name;
1745 sc->unicodeenc = UniFromName(name,sf->uni_interp,&custom);
1746 } else {
1747 free(name);
1748 }
1749 }
1750
pdf_getcmap(struct pdfcontext * pc,SplineFont * basesf,int font_num)1751 static void pdf_getcmap(struct pdfcontext *pc, SplineFont *basesf, int font_num) {
1752 FILE *file;
1753 int i, j, gid, start, end, uni, cur=0, nuni, nhex, nchars, lo, *uvals;
1754 long *mappings = NULL;
1755 long *tmappings = NULL;
1756 long tmap;
1757 char tok[200], *ccval, prevtok[200]="";
1758 SplineFont *sf = basesf->subfontcnt > 0 ? basesf->subfonts[0] : basesf;
1759
1760 if ( !pdf_findobject(pc,pc->cmapobjs[font_num]) || !pdf_readdict(pc) )
1761 return;
1762 file = pdf_defilterstream(pc);
1763 if ( file==NULL )
1764 return;
1765 rewind(file);
1766
1767 long mappings_length = sf->glyphcnt;
1768 mappings = calloc(mappings_length,sizeof(long));
1769 while ( pdf_getprotectedtok(file,tok) >= 0 ) {
1770 if ( strcmp(tok,"beginbfchar") == 0 && sscanf(prevtok,"%d",&nchars)) {
1771 for (i=0; i<nchars; i++) {
1772 if (pdf_skip_brackets(file,tok) >= 0 && sscanf(tok,"%x",&gid) &&
1773 pdf_skip_brackets(file,tok) >= 0 && sscanf(tok,"%lx",&tmap)) {
1774 /* Values we store in the 'mappings' array are just unique identifiers, */
1775 /* so they should not necessarily correspond to any valid Unicode codepoints. */
1776 /* In order to get the real Unicode value mapped to a glyph we should parse the */
1777 /* hex string once again, dividing it into hex quartets */
1778 nhex = (strlen(tok))/4;
1779 nuni = 1;
1780 uvals = calloc(nhex,sizeof(int));
1781 sscanf(tok,"%4x", &uvals[0]);
1782 ccval = tok + 4;
1783 /* If a single glyph is mapped to a sequence of Unicode characters, then the */
1784 /* CMap mapping will contain two or more hex quartets. However a pair of such */
1785 /* quartets may also represent a single Unicode character encoded with */
1786 /* a surrogate pair */
1787 for (j = 1; j<nhex && strlen(ccval) >= 4; j++) {
1788 sscanf(ccval,"%4x", &lo);
1789 if (uvals[nuni-1] >= 0xD800 && uvals[nuni-1] <= 0xDBFF && lo >= 0xDC00 && lo <= 0xDFFF )
1790 uvals[nuni-1] = 0x10000 + (uvals[nuni-1] - 0xD800) * 0x400 + (lo - 0xDC00);
1791 else
1792 uvals[nuni++] = lo;
1793 ccval += 4;
1794 }
1795 if (cur >= mappings_length && mappings_length <= 0x10000) {
1796 // The limit is arbitrary.
1797 // But a file exceeding it is probably garbage.
1798 // If appropriate, double the size of the mapping table.
1799 tmappings = calloc(2 * mappings_length,sizeof(long));
1800 if (tmappings == NULL) goto fail;
1801 memcpy(tmappings, mappings, mappings_length);
1802 mappings_length *= 2;
1803 free(mappings);
1804 mappings = tmappings;
1805 }
1806 if (cur < mappings_length) {
1807 mappings[cur] = tmap;
1808 add_mapping(basesf, mappings, uvals, nuni, gid, pc->cmap_from_cid[font_num], cur);
1809 cur++;
1810 }
1811 free(uvals);
1812 cur++;
1813 } else
1814 goto fail;
1815 }
1816 if ( pdf_getprotectedtok(file,tok) <= 0 || strcmp(tok,"endbfchar") != 0 )
1817 goto fail;
1818 } else if ( strcmp(tok,"beginbfrange") == 0 && sscanf(prevtok,"%d",&nchars)) {
1819 for (i=0; i<nchars; i++) {
1820 if (pdf_skip_brackets(file,tok) >= 0 && sscanf(tok,"%x",&start) &&
1821 pdf_skip_brackets(file,tok) >= 0 && sscanf(tok,"%x",&end) &&
1822 pdf_skip_brackets(file,tok) >= 0 && sscanf(tok,"%lx",&mappings[cur])) {
1823
1824 uvals = calloc(1,sizeof(int));
1825 sscanf(tok,"%4x", &uni);
1826 /* For CMap values defining a character range we assume they should always */
1827 /* correspond to a single Unicode character (either a BMP character or a surrogate pair) */
1828 if (strlen(tok) >= 8) {
1829 sscanf(tok+4,"%4x", &lo);
1830 if (uni >= 0xD800 && uni <= 0xDBFF && lo >= 0xDC00 && lo <= 0xDFFF )
1831 uni = 0x10000 + (uni - 0xD800) * 0x400 + (lo - 0xDC00);
1832 }
1833
1834 for (gid=start; gid<=end; gid++) {
1835 mappings[cur] = uvals[0] = uni++;
1836 add_mapping(basesf, mappings, uvals, 1, gid, pc->cmap_from_cid[font_num], cur);
1837 cur++;
1838 }
1839 free(uvals);
1840 } else
1841 goto fail;
1842 }
1843 if ( pdf_getprotectedtok(file,tok) <= 0 || strcmp(tok,"endbfrange") != 0 )
1844 goto fail;
1845 } else
1846 memcpy(prevtok,tok,200);
1847 }
1848 fclose(file);
1849 /* If this is not a cid font, then regenerate the font encoding (so that it is no */
1850 /* longer identified as MacRoman) */
1851 if ( sf->map != NULL && basesf == sf ) {
1852 EncMapFree( sf->map );
1853 sf->map = EncMapFromEncoding(sf,FindOrMakeEncoding("Original"));
1854 }
1855 free(mappings);
1856 return;
1857 fail:
1858 free(mappings);
1859 LogError( _("Syntax errors while parsing ToUnicode CMap") );
1860 }
1861
pdf_getcharprocs(struct pdfcontext * pc,char * charprocs)1862 static int pdf_getcharprocs(struct pdfcontext *pc,char *charprocs) {
1863 int cp = strtol(charprocs,NULL,10);
1864 FILE *temp, *pdf = pc->pdf;
1865 int ret;
1866
1867 /* An indirect reference? */
1868 if ( cp!=0 ) {
1869 if ( !pdf_findobject(pc,cp) )
1870 return( false );
1871 return( pdf_readdict(pc));
1872 }
1873 temp = GFileTmpfile();
1874 if ( temp==NULL )
1875 return( false );
1876 while ( *charprocs ) {
1877 putc(*charprocs,temp);
1878 ++charprocs;
1879 }
1880 rewind(temp);
1881 pc->pdf = temp;
1882 ret = pdf_readdict(pc);
1883 pc->pdf = pdf;
1884 fclose(temp);
1885 return( ret );
1886 }
1887
pdf_loadtype3(struct pdfcontext * pc)1888 static SplineFont *pdf_loadtype3(struct pdfcontext *pc) {
1889 char *enc, *cp, *fontmatrix, *name;
1890 double emsize;
1891 SplineFont *sf;
1892 int flags = -1;
1893 int i;
1894 struct psdict *charprocdict;
1895
1896 name=PSDictHasEntry(&pc->pdfdict,"Name");
1897 if ( name==NULL )
1898 name=PSDictHasEntry(&pc->pdfdict,"BaseFont");
1899 if ( (enc=PSDictHasEntry(&pc->pdfdict,"Encoding"))==NULL )
1900 goto fail;
1901 if ( (cp=PSDictHasEntry(&pc->pdfdict,"CharProcs"))==NULL )
1902 goto fail;
1903 if ( (fontmatrix=PSDictHasEntry(&pc->pdfdict,"FontMatrix"))==NULL )
1904 goto fail;
1905 if ( sscanf(fontmatrix,"[%lg",&emsize)!=1 || emsize==0 )
1906 goto fail;
1907 if ( !pdf_getcharprocs(pc,cp))
1908 goto fail;
1909
1910 emsize = 1.0/emsize;
1911 charprocdict = PSDictCopy(&pc->pdfdict);
1912
1913 sf = SplineFontBlank(charprocdict->next);
1914 if ( name!=NULL ) {
1915 name = copy(name+1);
1916 free(sf->fontname); free(sf->fullname); free(sf->familyname);
1917 sf->fontname = name;
1918 sf->familyname = copy(name);
1919 sf->fullname = copy(name);
1920 }
1921 free(sf->copyright); sf->copyright = NULL;
1922 free(sf->comments); sf->comments = NULL;
1923 sf->ascent = .8*emsize;
1924 sf->descent = emsize - sf->ascent;
1925 sf->multilayer = true;
1926
1927 for ( i=0; i<charprocdict->next; ++i ) {
1928 sf->glyphs[i] = pdf_InterpretSC(pc,charprocdict->keys[i],
1929 charprocdict->values[i],&flags);
1930 if ( sf->glyphs[i]!=NULL ) {
1931 sf->glyphs[i]->orig_pos = i;
1932 sf->glyphs[i]->parent = sf;
1933 sf->glyphs[i]->vwidth = emsize;
1934 sf->glyphs[i]->unicodeenc = UniFromName(sf->glyphs[i]->name,sf->uni_interp,&custom);
1935 }
1936 }
1937 sf->glyphcnt = charprocdict->next;
1938 PSDictFree(charprocdict);
1939
1940 /* I'm going to ignore the encoding vector for now, and just return original */
1941 sf->map = EncMapFromEncoding(sf,FindOrMakeEncoding("Original"));
1942
1943 return( sf );
1944
1945 fail:
1946 LogError( _("Syntax errors while parsing Type3 font headers") );
1947 return( NULL );
1948 }
1949
pdf_insertpfbsections(FILE * file,struct pdfcontext * pc)1950 static FILE *pdf_insertpfbsections(FILE *file,struct pdfcontext *pc) {
1951 /* I don't need this. Type1 fonts provide us with the same info */
1952 /* about cleartext length, binary length, cleartext length that */
1953 /* the pfb section headings do. But the info isn't important in */
1954 /* parsing the pfb file, so we can just ignore it */
1955 return( file );
1956 }
1957
pdf_loadfont(struct pdfcontext * pc,int font_num)1958 static SplineFont *pdf_loadfont(struct pdfcontext *pc,int font_num) {
1959 char *pt;
1960 int fd, type, ff;
1961 FILE *file;
1962 SplineFont *sf;
1963
1964 if ( !pdf_findobject(pc,pc->fontobjs[font_num]) || !pdf_readdict(pc) )
1965 return( NULL );
1966
1967 if ( (pt=PSDictHasEntry(&pc->pdfdict,"Subtype"))!=NULL && strcmp(pt,"/Type3")==0 )
1968 return( pdf_loadtype3(pc));
1969
1970 if ( (pt=PSDictHasEntry(&pc->pdfdict,"FontDescriptor"))==NULL )
1971 goto fail;
1972 fd = strtol(pt,NULL,10);
1973
1974 if ( !pdf_findobject(pc,fd) || !pdf_readdict(pc) )
1975 goto fail;
1976
1977 if ( (pt=PSDictHasEntry(&pc->pdfdict,"FontFile"))!=NULL )
1978 type = 1;
1979 else if ( (pt=PSDictHasEntry(&pc->pdfdict,"FontFile2"))!=NULL )
1980 type = 2;
1981 else if ( (pt=PSDictHasEntry(&pc->pdfdict,"FontFile3"))!=NULL )
1982 type = 3;
1983 else {
1984 LogError( _("The font %s is one of the standard fonts. It isn't actually in the file."), pc->fontnames[font_num]);
1985 return( NULL );
1986 }
1987 ff = strtol(pt,NULL,10);
1988 if ( !pdf_findobject(pc,ff) || !pdf_readdict(pc) )
1989 goto fail;
1990 if ( type==3 && (pt=PSDictHasEntry(&pc->pdfdict, "Subtype"))!=NULL && strcmp(pt, "/OpenType")==0 )
1991 type = 2;
1992 file = pdf_defilterstream(pc);
1993 if ( file==NULL )
1994 return( NULL );
1995 rewind(file);
1996 if ( type==1 ) {
1997 FontDict *fd;
1998 file = pdf_insertpfbsections(file,pc);
1999 fd = _ReadPSFont(file);
2000 if ( fd==NULL)
2001 return( NULL );
2002 sf = SplineFontFromPSFont(fd);
2003 PSFontFree(fd);
2004 } else if ( type==2 ) {
2005 sf = _SFReadTTF(file,0,pc->openflags,pc->fontnames[font_num],NULL,NULL);
2006 } else {
2007 int len;
2008 fseek(file,0,SEEK_END);
2009 len = ftell(file);
2010 rewind(file);
2011 sf = _CFFParse(file,len,pc->fontnames[font_num]);
2012 }
2013 fclose(file);
2014 if (sf == NULL)
2015 goto fail;
2016 /* Don't attempt to parse CMaps for Type 1 fonts: they already have glyph names */
2017 /* which are usually more meaningful */
2018 if (pc->cmapobjs[font_num] != -1 && type > 1)
2019 pdf_getcmap(pc, sf, font_num);
2020 return( sf );
2021
2022 fail:
2023 LogError( _("Unable to parse the pdf objects that make up %s"),pc->fontnames[font_num]);
2024 return( NULL );
2025 }
2026
pcFree(struct pdfcontext * pc)2027 static void pcFree(struct pdfcontext *pc) {
2028 /* Free any memory that may have been allocatted earlier */
2029 int i;
2030
2031 PSDictClear(&pc->pdfdict);
2032 free(pc->pdfdict.keys);
2033 free(pc->pdfdict.values);
2034 free(pc->objs);
2035 for ( i=0; i<pc->fcnt; ++i ) free(pc->fontnames[i]);
2036 free(pc->fontnames);
2037 free(pc->fontobjs);
2038 free(pc->cmapobjs);
2039 free(pc->cmap_from_cid);
2040 free(pc->pages);
2041 free(pc->tokbuf);
2042 }
2043
NamesReadPDF(char * filename)2044 char **NamesReadPDF(char *filename) {
2045 struct pdfcontext pc;
2046 int i;
2047 char **list;
2048
2049 locale_t tmplocale; locale_t oldlocale; // Declare temporary locale storage.
2050 switch_to_c_locale(&tmplocale, &oldlocale); // Switch to the C locale temporarily and cache the old locale.
2051 memset(&pc,0,sizeof(pc));
2052 if ( (pc.pdf=fopen(filename,"r"))==NULL )
2053 return( NULL );
2054 if ( (pc.objs=FindObjects(&pc))==NULL ) {
2055 LogError( _("Doesn't look like a valid pdf file, couldn't find xref section") );
2056 goto NamesReadPDF_error;
2057 }
2058 if ( pc.encrypted ) {
2059 LogError( _("This pdf file contains an /Encrypt dictionary, and FontForge does not currently\nsupport pdf encryption" ));
2060 goto NamesReadPDF_error;
2061 }
2062 if ( pdf_findfonts(&pc)==0 ) {
2063 goto NamesReadPDF_error;
2064 }
2065 if ( (list=malloc((pc.fcnt+1)*sizeof(char *)))==NULL )
2066 goto NamesReadPDF_error;
2067 for ( i=0; i<pc.fcnt; ++i )
2068 if ( (list[i]=copy(pc.fontnames[i]))==NULL )
2069 goto NamesReadPDFlist_error;
2070 list[i]=NULL;
2071 fclose(pc.pdf);
2072 pcFree(&pc);
2073 switch_to_old_locale(&tmplocale, &oldlocale); // Switch to the cached locale.
2074 return( list );
2075
2076 /* if errors, then free memory, close files, and return a NULL */
2077 NamesReadPDFlist_error:
2078 while ( --i>=0 ) free(list[i]);
2079 free(list);
2080 NamesReadPDF_error:
2081 pcFree(&pc);
2082 fclose(pc.pdf);
2083 switch_to_old_locale(&tmplocale, &oldlocale); // Switch to the cached locale.
2084 return( NULL );
2085 }
2086
_SFReadPdfFont(FILE * pdf,char * filename,enum openflags openflags)2087 SplineFont *_SFReadPdfFont(FILE *pdf,char *filename, enum openflags openflags) {
2088 char *select_this_font = NULL, *pt;
2089 struct pdfcontext pc;
2090 SplineFont *sf = NULL;
2091 int i;
2092
2093 locale_t tmplocale; locale_t oldlocale; // Declare temporary locale storage.
2094 switch_to_c_locale(&tmplocale, &oldlocale); // Switch to the C locale temporarily and cache the old locale.
2095 memset(&pc,0,sizeof(pc));
2096 pc.pdf = pdf;
2097 pc.openflags = openflags;
2098 if ( (pc.objs = FindObjects(&pc))==NULL ) {
2099 LogError( _("Doesn't look like a valid pdf file, couldn't find xref section") );
2100 pcFree(&pc);
2101 switch_to_old_locale(&tmplocale, &oldlocale); // Switch to the cached locale.
2102 return( NULL );
2103 }
2104 if ( pc.encrypted ) {
2105 LogError( _("This pdf file contains an /Encrypt dictionary, and FontForge does not currently\nsupport pdf encryption" ));
2106 pcFree(&pc);
2107 switch_to_old_locale(&tmplocale, &oldlocale); // Switch to the cached locale.
2108 return( NULL );
2109 }
2110 if ( pdf_findfonts(&pc)==0 ) {
2111 LogError( _("This pdf file has no fonts"));
2112 pcFree(&pc);
2113 switch_to_old_locale(&tmplocale, &oldlocale); // Switch to the cached locale.
2114 return( NULL );
2115 }
2116 // parse the chosen font name
2117 if((pt = strchr(filename, '(')) != NULL) {
2118 select_this_font = copy(pt+1);
2119 if((pt = strchr(select_this_font, ')')) != NULL)
2120 *pt = '\0';
2121 }
2122 if ( pc.fcnt==1 ) {
2123 sf = pdf_loadfont(&pc,0);
2124 } else if ( select_this_font!=NULL ) {
2125 for ( i=0; i<pc.fcnt; ++i ) {
2126 if ( strcmp(pc.fontnames[i],select_this_font)==0 )
2127 break;
2128 }
2129 if ( i<pc.fcnt )
2130 sf = pdf_loadfont(&pc,i);
2131 else
2132 ff_post_error(_("Not in Collection"),_("%s is not in %.100s"),
2133 select_this_font, filename);
2134 } else {
2135 char **names;
2136 int choice;
2137 names = malloc((pc.fcnt+1)*sizeof(unichar_t *));
2138 for ( i=0; i<pc.fcnt; ++i )
2139 names[i] = copy(pc.fontnames[i]);
2140 names[i] = NULL;
2141 if ( no_windowing_ui )
2142 choice = 0;
2143 else
2144 choice = ff_choose(_("Pick a font, any font..."),(const char **) names,pc.fcnt,0,_("There are multiple fonts in this file, pick one"));
2145 for ( i=0; i<pc.fcnt; ++i )
2146 free(names[i]);
2147 free(names);
2148 if ( choice!=-1 )
2149 sf = pdf_loadfont(&pc,choice);
2150 }
2151 switch_to_old_locale(&tmplocale, &oldlocale); // Switch to the cached locale.
2152 pcFree(&pc);
2153 free(select_this_font);
2154 return( sf );
2155 }
2156
SFReadPdfFont(char * filename,enum openflags openflags)2157 SplineFont *SFReadPdfFont(char *filename,enum openflags openflags) {
2158 SplineFont *sf;
2159 FILE *pdf;
2160
2161 pdf = fopen(filename,"r");
2162 if ( pdf==NULL )
2163 sf = NULL;
2164 else {
2165 sf = _SFReadPdfFont(pdf,filename,openflags);
2166 fclose(pdf);
2167 }
2168 return( sf );
2169 }
2170
EntityInterpretPDFPage(FILE * pdf,int select_page)2171 Entity *EntityInterpretPDFPage(FILE *pdf,int select_page) {
2172 struct pdfcontext pc;
2173 char oldloc[24];
2174 Entity *ent;
2175 char *ret;
2176 int choice;
2177
2178 locale_t tmplocale; locale_t oldlocale; // Declare temporary locale storage.
2179 switch_to_c_locale(&tmplocale, &oldlocale); // Switch to the C locale temporarily and cache the old locale.
2180 memset(&pc,0,sizeof(pc));
2181 pc.pdf = pdf;
2182 pc.openflags = 0;
2183 if ( (pc.objs = FindObjects(&pc))==NULL ) {
2184 LogError( _("Doesn't look like a valid pdf file, couldn't find xref section") );
2185 pcFree(&pc);
2186 switch_to_old_locale(&tmplocale, &oldlocale); // Switch to the cached locale.
2187 return( NULL );
2188 }
2189 if ( pc.encrypted ) {
2190 LogError( _("This pdf file contains an /Encrypt dictionary, and FontForge does not currently\nsupport pdf encryption" ));
2191 pcFree(&pc);
2192 switch_to_old_locale(&tmplocale, &oldlocale); // Switch to the cached locale.
2193 return( NULL );
2194 }
2195 if ( pdf_findpages(&pc)==0 ) {
2196 LogError( _("This pdf file has no pages"));
2197 pcFree(&pc);
2198 switch_to_old_locale(&tmplocale, &oldlocale); // Switch to the cached locale.
2199 return( NULL );
2200 }
2201 if ( pc.pcnt==1 ) {
2202 ent = pdf_InterpretEntity(&pc,0);
2203 } else if ( select_page>=0 && select_page<pc.pcnt ) {
2204 ent = pdf_InterpretEntity(&pc,select_page);
2205 } else {
2206 if ( no_windowing_ui )
2207 choice = 0;
2208 else {
2209 char buffer[200];
2210 snprintf( buffer, sizeof(buffer), _("There are %d pages in this file, which do you want?"), pc.pcnt );
2211 ret = ff_ask_string(_("Pick a page"),"1",buffer);
2212 if ( ret==NULL ) {
2213 pcFree(&pc);
2214 switch_to_old_locale(&tmplocale, &oldlocale); // Switch to the cached locale.
2215 return( NULL );
2216 }
2217 choice = strtol(ret,NULL,10)-1;
2218 if ( choice<0 || choice>=pc.pcnt ) {
2219 pcFree(&pc);
2220 switch_to_old_locale(&tmplocale, &oldlocale); // Switch to the cached locale.
2221 return( NULL );
2222 }
2223 }
2224 ent = pdf_InterpretEntity(&pc,choice);
2225 }
2226 switch_to_old_locale(&tmplocale, &oldlocale); // Switch to the cached locale.
2227 pcFree(&pc);
2228 return( ent );
2229 }
2230
2231