1 /* This is dvipdfmx, an eXtended version of dvipdfm by Mark A. Wicks.
2 
3     Copyright (C) 2002-2014 by Jin-Hwan Cho and Shunsaku Hirata,
4     the dvipdfmx project team.
5 
6     This program is free software; you can redistribute it and/or modify
7     it under the terms of the GNU General Public License as published by
8     the Free Software Foundation; either version 2 of the License, or
9     (at your option) any later version.
10 
11     This program is distributed in the hope that it will be useful,
12     but WITHOUT ANY WARRANTY; without even the implied warranty of
13     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14     GNU General Public License for more details.
15 
16     You should have received a copy of the GNU General Public License
17     along with this program; if not, write to the Free Software
18     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
19 */
20 
21 #ifdef HAVE_CONFIG_H
22 #include <config.h>
23 #endif
24 
25 #include "system.h"
26 #include "error.h"
27 #include "mem.h"
28 #include "dpxfile.h"
29 #include "dpxutil.h"
30 
31 #include "pdfobj.h"
32 #include "pdfparse.h"
33 
34 #include "agl.h"
35 
36 #include "otl_conf.h"
37 
38 #define VERBOSE_LEVEL_MIN 0
39 static int verbose = 0;
40 void
otl_conf_set_verbose(void)41 otl_conf_set_verbose (void)
42 {
43   verbose++;
44 }
45 
46 static pdf_obj *
parse_uc_coverage(pdf_obj * gclass,const char ** pp,const char * endptr)47 parse_uc_coverage (pdf_obj *gclass, const char **pp, const char *endptr)
48 {
49   pdf_obj *coverage;
50   pdf_obj *value;
51   long     ucv = 0;
52   char    *glyphname, *glyphclass;
53 
54   if (*pp + 1 >= endptr)
55     return NULL;
56 
57   if (**pp == '[')
58     (*pp)++;
59 
60   coverage = pdf_new_array();
61 
62   while (*pp < endptr) {
63     skip_white(pp, endptr);
64     switch (**pp) {
65     case ']': case ';':
66       (*pp)++;
67       return coverage;
68     case ',':
69       (*pp)++;
70       break;
71     case '@':
72       {
73 	pdf_obj *cvalues;
74 	int      i, size;
75 
76 	(*pp)++;
77 	glyphclass = parse_c_ident(pp, endptr);
78 	cvalues = pdf_lookup_dict(gclass, glyphclass);
79 	if (!cvalues)
80 	  ERROR("%s not defined...", glyphclass);
81 	size    = pdf_array_length(cvalues);
82 	for (i = 0; i < size; i++) {
83 	  pdf_add_array(coverage,
84 			pdf_link_obj(pdf_get_array(cvalues, i)));
85 	}
86       }
87       break;
88     default:
89       glyphname  = parse_c_ident(pp, endptr);
90       if (!glyphname)
91 	ERROR("Invalid Unicode character specified.");
92 
93       skip_white(pp, endptr);
94       if (*pp + 1 < endptr && **pp == '-') {
95 	value = pdf_new_array();
96 
97 	if (agl_get_unicodes(glyphname, &ucv, 1) != 1)
98 	  ERROR("Invalid Unicode char: %s", glyphname);
99 	pdf_add_array(value, pdf_new_number(ucv));
100 	RELEASE(glyphname);
101 
102 	(*pp)++; skip_white(pp, endptr);
103 	glyphname = parse_c_ident(pp, endptr);
104 	if (!glyphname)
105 	  ERROR("Invalid Unicode char: %s", glyphname);
106 	if (agl_get_unicodes(glyphname, &ucv, 1) != 1)
107 	  ERROR("Invalid Unicode char: %s", glyphname);
108 	pdf_add_array(value, pdf_new_number(ucv));
109 	RELEASE(glyphname);
110 
111       } else {
112 	if (agl_get_unicodes(glyphname, &ucv, 1) != 1)
113 	  ERROR("Invalid Unicode char: %s", glyphname);
114 	value = pdf_new_number(ucv);
115 	RELEASE(glyphname);
116       }
117       pdf_add_array(coverage, value);
118       break;
119     }
120     skip_white(pp, endptr);
121   }
122 
123   return coverage;
124 }
125 
126 static pdf_obj *parse_block (pdf_obj *gclass, const char **pp, const char *endptr);
127 
128 static void
add_rule(pdf_obj * rule,pdf_obj * gclass,char * first,char * second,char * suffix)129 add_rule (pdf_obj *rule, pdf_obj *gclass,
130 	  char *first, char *second, char *suffix)
131 {
132   pdf_obj *glyph1, *glyph2;
133 #define MAX_UNICODES 16
134   long     unicodes[MAX_UNICODES];
135   int      i, n_unicodes;
136 
137   if (first[0] == '@') {
138     glyph1 = pdf_lookup_dict(gclass, &first[1]);
139     if (!glyph1) {
140       WARN("No glyph class \"%s\" found.", &first[1]);
141       return;
142     }
143     pdf_link_obj(glyph1);
144 
145     if (verbose > VERBOSE_LEVEL_MIN) {
146       MESG("otl_conf>> Output glyph sequence: %s\n", first);
147     }
148 
149   } else {
150     n_unicodes = agl_get_unicodes(first, unicodes, MAX_UNICODES);
151     if (n_unicodes < 1) {
152       WARN("Failed to convert glyph \"%s\" to Unicode sequence.",
153 	   first);
154       return;
155     }
156     glyph1 = pdf_new_array();
157 
158     if (verbose > VERBOSE_LEVEL_MIN) {
159       MESG("otl_conf>> Output glyph sequence: %s ->", first);
160     }
161 
162     for (i = 0; i < n_unicodes; i++) {
163       pdf_add_array(glyph1, pdf_new_number(unicodes[i]));
164 
165       if (verbose > VERBOSE_LEVEL_MIN) {
166 	if (unicodes[i] < 0x10000) {
167 	  MESG(" U+%04X", unicodes[i]);
168 	} else {
169 	  MESG(" U+%06X", unicodes[i]);
170 	}
171       }
172     }
173 
174     if (verbose > VERBOSE_LEVEL_MIN) {
175       MESG("\n");
176     }
177   }
178 
179   if (second[0] == '@') {
180     glyph2 = pdf_lookup_dict(gclass, &second[1]);
181     if (!glyph2) {
182       WARN("No glyph class \"%s\" found.", &second[1]);
183       return;
184     }
185     pdf_link_obj(glyph2);
186 
187     if (verbose > VERBOSE_LEVEL_MIN) {
188       MESG("otl_conf>> Input glyph sequence: %s (%s)\n", second, suffix);
189     }
190 
191   } else {
192     n_unicodes = agl_get_unicodes(second, unicodes, 16);
193     if (n_unicodes < 1) {
194       WARN("Failed to convert glyph \"%s\" to Unicode sequence.",
195 	   second);
196       return;
197     }
198 
199     if (verbose > VERBOSE_LEVEL_MIN) {
200       if (suffix)
201 	MESG("otl_conf>> Input glyph sequence: %s.%s ->", second, suffix);
202       else
203 	MESG("otl_conf>> Input glyph sequence: %s ->", second);
204     }
205 
206     glyph2 = pdf_new_array();
207     for (i = 0; i < n_unicodes; i++) {
208       pdf_add_array(glyph2, pdf_new_number(unicodes[i]));
209 
210       if (verbose > VERBOSE_LEVEL_MIN) {
211 	if (unicodes[i] < 0x10000) {
212 	  MESG(" U+%04X", unicodes[i]);
213 	} else {
214 	  MESG(" U+%06X", unicodes[i]);
215 	}
216       }
217     }
218     if (verbose > VERBOSE_LEVEL_MIN) {
219       MESG(" (%s)\n", suffix);
220     }
221   }
222 
223   /* OK */
224   if (suffix) {
225     pdf_add_array(rule, pdf_new_string(suffix, strlen(suffix)));
226   } else {
227     pdf_add_array(rule, pdf_new_null());
228   }
229   pdf_add_array(rule, glyph1);
230   pdf_add_array(rule, glyph2);
231 }
232 
233 static pdf_obj *
parse_substrule(pdf_obj * gclass,const char ** pp,const char * endptr)234 parse_substrule (pdf_obj *gclass, const char **pp, const char *endptr)
235 {
236   pdf_obj *substrule;
237   char    *token;
238 
239   skip_white(pp, endptr);
240   if (*pp < endptr && **pp == '{')
241     (*pp)++;
242 
243   skip_white(pp, endptr);
244   if (*pp >= endptr)
245     return NULL;
246 
247   substrule = pdf_new_array();
248   while (*pp < endptr && **pp != '}') {
249     skip_white(pp, endptr);
250     if (*pp >= endptr)
251       break;
252 
253     if (**pp == '#') {
254       while (*pp < endptr) {
255 	if (**pp == '\r' || **pp == '\n') {
256 	  (*pp)++;
257 	  break;
258 	}
259 	(*pp)++;
260       }
261       continue;
262     } else if (**pp == ';') {
263       (*pp)++;
264       continue;
265     }
266 
267     skip_white(pp, endptr);
268     token = parse_c_ident(pp, endptr);
269     if (!token)
270       break;
271 
272     if (!strcmp(token, "assign") || !strcmp(token, "substitute")) {
273       char *tmp, *first, *second, *suffix;
274 
275       skip_white(pp, endptr);
276 
277       first = parse_c_ident(pp, endptr);
278       if (!first)
279 	ERROR("Syntax error (1)");
280 
281       skip_white(pp, endptr);
282       tmp = parse_c_ident(pp, endptr);
283       if (strcmp(tmp, "by") && strcmp(tmp, "to"))
284 	ERROR("Syntax error (2): %s", *pp);
285 
286       skip_white(pp, endptr);
287       second = parse_c_ident(pp, endptr); /* allows @ */
288       if (!second)
289 	ERROR("Syntax error (3)");
290 
291       /* (assign|substitute) tag dst src */
292       pdf_add_array(substrule, pdf_new_name(token));
293       if (*pp + 1 < endptr && **pp == '.') {
294 	(*pp)++;
295 	suffix = parse_c_ident(pp, endptr);
296       } else {
297 	suffix = NULL;
298       }
299       add_rule(substrule, gclass, first, second, suffix);
300 
301       RELEASE(first);
302       RELEASE(tmp);
303       RELEASE(second);
304       if (suffix)
305 	RELEASE(suffix);
306     } else {
307       ERROR("Unkown command %s.", token);
308     }
309     RELEASE(token);
310     skip_white(pp, endptr);
311   }
312 
313   if (*pp < endptr && **pp == '}')
314     (*pp)++;
315   return substrule;
316 }
317 
318 static pdf_obj *
parse_block(pdf_obj * gclass,const char ** pp,const char * endptr)319 parse_block (pdf_obj *gclass, const char **pp, const char *endptr)
320 {
321   pdf_obj *rule;
322   char    *token, *tmp;
323 
324   skip_white(pp, endptr);
325   if (*pp < endptr && **pp == '{')
326     (*pp)++;
327 
328   skip_white(pp, endptr);
329   if (*pp >= endptr)
330     return NULL;
331 
332   rule   = pdf_new_dict();
333   while (*pp < endptr && **pp != '}') {
334     skip_white(pp, endptr);
335     if (*pp >= endptr)
336       break;
337     if (**pp == '#') {
338       while (*pp < endptr) {
339 	if (**pp == '\r' || **pp == '\n') {
340 	  (*pp)++;
341 	  break;
342 	}
343 	(*pp)++;
344       }
345       continue;
346     } else if (**pp == ';') {
347       (*pp)++;
348       continue;
349     }
350 
351     skip_white(pp, endptr);
352     token = parse_c_ident(pp, endptr);
353     if (!token)
354       break;
355 
356     if (!strcmp(token, "script") ||
357 	!strcmp(token, "language")) {
358       int  i, len;
359 
360       skip_white(pp, endptr);
361       len = 0;
362       while (*pp + len < endptr && *(*pp + len) != ';') {
363 	len++;
364       }
365       if (len > 0) {
366 	tmp = NEW(len+1, char);
367 	memset(tmp, 0, len+1);
368 	for (i = 0; i < len; i++) {
369 	  if (!isspace((unsigned char)**pp))
370 	    tmp[i] = **pp;
371 	  (*pp)++;
372 	}
373 	pdf_add_dict(rule,
374 		     pdf_new_name(token),
375 		     pdf_new_string(tmp, strlen(tmp)));
376 
377 	if (verbose > VERBOSE_LEVEL_MIN) {
378 	  MESG("otl_conf>> Current %s set to \"%s\"\n", token, tmp);
379 	}
380 
381 	RELEASE(tmp);
382       }
383     } else if (!strcmp(token, "option")) {
384       pdf_obj *opt_dict, *opt_rule;
385 
386       opt_dict = pdf_lookup_dict(rule, "option");
387       if (!opt_dict) {
388 	opt_dict = pdf_new_dict();
389 	pdf_add_dict(rule,
390 		     pdf_new_name("option"), opt_dict);
391       }
392 
393       skip_white(pp, endptr);
394       tmp = parse_c_ident(pp, endptr);
395 
396       if (verbose > VERBOSE_LEVEL_MIN) {
397 	MESG("otl_conf>> Reading option \"%s\"\n", tmp);
398       }
399 
400       skip_white(pp, endptr);
401       opt_rule = parse_block(gclass, pp, endptr);
402       pdf_add_dict(opt_dict, pdf_new_name(tmp), opt_rule);
403 
404       RELEASE(tmp);
405     } else if (!strcmp(token, "prefered") ||
406 	       !strcmp(token, "required") ||
407 	       !strcmp(token, "optional")) {
408       pdf_obj *subst, *rule_block;
409 
410       if (verbose > VERBOSE_LEVEL_MIN) {
411 	MESG("otl_conf>> Reading block (%s)\n", token);
412       }
413 
414       skip_white(pp, endptr);
415       if (*pp >= endptr || **pp != '{')
416 	ERROR("Syntax error (1)");
417 
418       rule_block = parse_substrule(gclass, pp, endptr);
419       subst = pdf_lookup_dict(rule, "rule");
420       if (!subst) {
421 	subst = pdf_new_array();
422 	pdf_add_dict(rule, pdf_new_name("rule"), subst);
423       }
424       pdf_add_array(subst, pdf_new_number(token[0]));
425       pdf_add_array(subst, rule_block);
426     } else if (token[0] == '@') {
427       pdf_obj *coverage;
428 
429       skip_white(pp, endptr);
430       (*pp)++; /* = */
431       skip_white(pp, endptr);
432 
433       if (verbose > VERBOSE_LEVEL_MIN) {
434 	MESG("otl_conf>> Glyph class \"%s\"\n", token);
435       }
436 
437       coverage = parse_uc_coverage(gclass, pp, endptr);
438       if (!coverage)
439 	ERROR("No valid Unicode characters...");
440 
441       pdf_add_dict(gclass,
442 		   pdf_new_name(&token[1]), coverage);
443     }
444     RELEASE(token);
445     skip_white(pp, endptr);
446   }
447 
448   if (*pp < endptr && **pp == '}')
449     (*pp)++;
450   return rule;
451 }
452 
453 
454 static pdf_obj *
otl_read_conf(const char * conf_name)455 otl_read_conf (const char *conf_name)
456 {
457   pdf_obj *rule;
458   pdf_obj *gclass;
459   FILE    *fp;
460   char    *filename, *wbuf, *p, *endptr;
461   const char *pp;
462   long     size, len;
463 
464   filename = NEW(strlen(conf_name)+strlen(".otl")+1, char);
465   strcpy(filename, conf_name);
466   strcat(filename, ".otl");
467 
468   fp = DPXFOPEN(filename, DPX_RES_TYPE_TEXT);
469   if (!fp) {
470     RELEASE(filename);
471     return NULL;
472   }
473 
474   size = file_size(fp);
475 
476   if (verbose > VERBOSE_LEVEL_MIN) {
477     MESG("\n");
478     MESG("otl_conf>> Layout config. \"%s\" found: file=\"%s\" (%ld bytes)\n",
479 	 conf_name, filename, size);
480   }
481   RELEASE(filename);
482   if (size < 1)
483     return NULL;
484 
485   wbuf = NEW(size, char);
486   p = wbuf; endptr = p + size;
487   while (size > 0 && p < endptr) {
488     len = fread(p, sizeof(char), size, fp);
489     p    += len;
490     size -= len;
491   }
492 
493   pp     = wbuf;
494   gclass = pdf_new_dict();
495   rule   = parse_block(gclass, &pp, endptr);
496   pdf_release_obj(gclass);
497 
498   RELEASE(wbuf);
499 
500   return rule;
501 }
502 
503 static pdf_obj *otl_confs = NULL;
504 
505 pdf_obj *
otl_find_conf(const char * conf_name)506 otl_find_conf (const char *conf_name)
507 {
508   pdf_obj *rule;
509   pdf_obj *script, *language;
510   pdf_obj *options;
511 
512   return  NULL;
513 
514   if (otl_confs)
515     rule = pdf_lookup_dict(otl_confs, conf_name);
516   else {
517     otl_confs = pdf_new_dict();
518     rule = NULL;
519   }
520 
521   if (!rule) {
522     rule = otl_read_conf(conf_name);
523     if (rule) {
524       pdf_add_dict(otl_confs,
525 		   pdf_new_name(conf_name), rule);
526       script   = pdf_lookup_dict(rule, "script");
527       language = pdf_lookup_dict(rule, "language");
528       options  = pdf_lookup_dict(rule, "option");
529       if (!script) {
530 	script = pdf_new_string("*", 1);
531 	pdf_add_dict(rule,
532 		     pdf_new_name("script"),
533 		     script);
534 	WARN("Script unspecified in \"%s\"...", conf_name);
535       }
536       if (!language) {
537 	language = pdf_new_string("dflt", 4);
538 	pdf_add_dict(rule,
539 		     pdf_new_name("language"),
540 		     language);
541 	WARN("Language unspecified in \"%s\"...", conf_name);
542       }
543 
544       if (options) {
545 	pdf_obj *optkeys, *opt, *key;
546 	long     i, num_opts;
547 
548 	optkeys  = pdf_dict_keys(options);
549 	num_opts = pdf_array_length(optkeys);
550 	for (i = 0; i < num_opts; i++) {
551 	  key = pdf_get_array(optkeys, i);
552 	  opt = pdf_lookup_dict(options, pdf_name_value(key));
553 	  if (!pdf_lookup_dict(opt, "script"))
554 	    pdf_add_dict(opt,
555 			 pdf_new_name("script"),
556 			 pdf_link_obj(script));
557 	  if (!pdf_lookup_dict(opt, "language"))
558 	    pdf_add_dict(opt,
559 			 pdf_new_name("language"),
560 			 pdf_link_obj(language));
561 	}
562 	pdf_release_obj(optkeys);
563       }
564 
565     }
566   }
567 
568   return rule;
569 }
570 
571 
572 char *
otl_conf_get_script(pdf_obj * conf)573 otl_conf_get_script (pdf_obj *conf)
574 {
575   pdf_obj *script;
576 
577   ASSERT(conf);
578 
579   script = pdf_lookup_dict(conf, "script");
580 
581   return pdf_string_value(script);
582 }
583 
584 char *
otl_conf_get_language(pdf_obj * conf)585 otl_conf_get_language (pdf_obj *conf)
586 {
587   pdf_obj *language;
588 
589   ASSERT(conf);
590 
591   language = pdf_lookup_dict(conf, "language");
592 
593   return pdf_string_value(language);
594 }
595 
596 pdf_obj *
otl_conf_get_rule(pdf_obj * conf)597 otl_conf_get_rule (pdf_obj *conf)
598 {
599   ASSERT(conf);
600   return pdf_lookup_dict(conf, "rule");
601 }
602 
603 pdf_obj *
otl_conf_find_opt(pdf_obj * conf,const char * opt_tag)604 otl_conf_find_opt (pdf_obj *conf, const char *opt_tag)
605 {
606   pdf_obj *opt_conf = NULL;
607   pdf_obj *options;
608 
609   ASSERT(conf);
610 
611   options = pdf_lookup_dict(conf, "option");
612   if (options && opt_tag)
613     opt_conf = pdf_lookup_dict(options, opt_tag);
614   else
615     opt_conf = NULL;
616 
617   return opt_conf;
618 }
619 
620 void
otl_init_conf(void)621 otl_init_conf (void)
622 {
623   if (otl_confs)
624     pdf_release_obj(otl_confs);
625   otl_confs = pdf_new_dict();
626 
627   if (verbose > VERBOSE_LEVEL_MIN + 10) {
628     pdf_release_obj(pdf_ref_obj(otl_confs));
629   }
630 }
631 
632 void
otl_close_conf(void)633 otl_close_conf (void)
634 {
635   pdf_release_obj(otl_confs);
636   otl_confs = NULL;
637 }
638