1 /* Copyright (C) 1997, 2000 artofcode LLC. All rights reserved.
2
3 This program is free software; you can redistribute it and/or modify it
4 under the terms of the GNU General Public License as published by the
5 Free Software Foundation; either version 2 of the License, or (at your
6 option) any later version.
7
8 This program is distributed in the hope that it will be useful, but
9 WITHOUT ANY WARRANTY; without even the implied warranty of
10 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 General Public License for more details.
12
13 You should have received a copy of the GNU General Public License along
14 with this program; if not, write to the Free Software Foundation, Inc.,
15 59 Temple Place, Suite 330, Boston, MA, 02111-1307.
16
17 */
18
19 /*$Id: gsfcmap.c,v 1.14.2.2.2.1 2003/01/17 00:49:02 giles Exp $ */
20 /* CMap character decoding */
21 #include "memory_.h"
22 #include "errors.h"
23 #include "gx.h"
24 #include "gserrors.h"
25 #include "gsstruct.h"
26 #include "gsutil.h" /* for gs_next_ids */
27 #include "gxfcmap.h"
28
29 /* GC descriptors */
30 public_st_cmap();
31 /* Because lookup ranges can be elements of arrays, */
32 /* their enum_ptrs procedure must never return 0 prematurely. */
33 private
34 ENUM_PTRS_WITH(code_lookup_range_enum_ptrs,
35 gx_code_lookup_range_t *pclr) return 0;
36 case 0:
37 if (pclr->value_type == CODE_VALUE_GLYPH) {
38 const byte *pv = pclr->values.data;
39 int k;
40
41 for (k = 0; k < pclr->num_keys; ++k) {
42 gs_glyph glyph = 0;
43 int i;
44
45 for (i = 0; i < pclr->value_size; ++i)
46 glyph = (glyph << 8) + *pv++;
47 pclr->cmap->mark_glyph(glyph, pclr->cmap->mark_glyph_data);
48 }
49 }
50 return ENUM_OBJ(pclr->cmap);
51 case 1: return ENUM_STRING(&pclr->keys);
52 case 2: return ENUM_STRING(&pclr->values);
53 ENUM_PTRS_END
54 private
55 RELOC_PTRS_WITH(code_lookup_range_reloc_ptrs, gx_code_lookup_range_t *pclr)
56 RELOC_VAR(pclr->cmap);
57 RELOC_STRING_VAR(pclr->keys);
58 RELOC_STRING_VAR(pclr->values);
59 RELOC_PTRS_END
60 public_st_code_lookup_range();
61 public_st_code_lookup_range_element();
62
63 /* ---------------- Procedures ---------------- */
64
65 /*
66 * Initialize a just-allocated CMap, to ensure that all pointers are clean
67 * for the GC.
68 */
69 void
gs_cmap_init(gs_cmap_t * pcmap)70 gs_cmap_init(gs_cmap_t *pcmap)
71 {
72 memset(pcmap, 0, sizeof(*pcmap));
73 pcmap->id = gs_next_ids(1);
74 uid_set_invalid(&pcmap->uid);
75 }
76
77 /*
78 * Create an Identity CMap.
79 */
80 int
gs_cmap_create_identity(gs_cmap_t ** ppcmap,int num_bytes,int wmode,gs_memory_t * mem)81 gs_cmap_create_identity(gs_cmap_t **ppcmap, int num_bytes, int wmode,
82 gs_memory_t *mem)
83 {
84 gs_cmap_t *pcmap =
85 gs_alloc_struct(mem, gs_cmap_t, &st_cmap,
86 "gs_cmap_create_identity(CMap)");
87 gx_code_space_range_t *range = (gx_code_space_range_t *)
88 gs_alloc_bytes(mem, sizeof(gx_code_space_range_t),
89 "gs_cmap_create_identity(code space range)");
90 gx_code_lookup_range_t *lookup =
91 gs_alloc_struct_array(mem, 1, gx_code_lookup_range_t,
92 &st_code_lookup_range,
93 "gs_cmap_create_identity(lookup range)");
94 /* We allocate CIDSystemInfo dynamically only for the sake of the GC. */
95 gs_cid_system_info_t *pcidsi =
96 gs_alloc_struct(mem, gs_cid_system_info_t, &st_cid_system_info,
97 "gs_cmap_create_identity(CIDSystemInfo)");
98 static const byte key_data[8] = {
99 0, 0, 0, 0, 0xff, 0xff, 0xff, 0xff
100 };
101 static const gs_cid_system_info_t identity_cidsi = {
102 { (const byte *)"Adobe", 5 },
103 { (const byte *)"Identity", 8 },
104 0
105 };
106
107 if (pcmap == 0 || range == 0 || lookup == 0 || pcidsi == 0)
108 return_error(gs_error_VMerror);
109 if (num_bytes != 2) /* for now */
110 return_error(gs_error_rangecheck);
111 gs_cmap_init(pcmap);
112 pcmap->CMapType = 1;
113 pcmap->CMapName.data = (const byte *)
114 (wmode ? "Identity-V" : "Identity-H");
115 pcmap->CMapName.size = 10;
116 *pcidsi = identity_cidsi;
117 pcmap->CIDSystemInfo = pcidsi;
118 pcmap->num_fonts = 1;
119 pcmap->CMapVersion = 1.0;
120 /* no uid, UIDOffset */
121 pcmap->WMode = wmode;
122 memset(range->first, 0, num_bytes);
123 memset(range->last, 0xff, num_bytes);
124 range->size = num_bytes;
125 pcmap->code_space.ranges = range;
126 pcmap->code_space.num_ranges = 1;
127 memset(lookup, 0, sizeof(*lookup));
128 lookup->cmap = pcmap;
129 lookup->key_size = num_bytes;
130 lookup->num_keys = 1;
131 lookup->key_is_range = true;
132 /*
133 * It's OK to break const here, because the strings are never
134 * freed, and the GC can handle strings outside the heap.
135 */
136 lookup->keys.data = (byte*) (key_data + 4 - num_bytes);
137 lookup->keys.size = num_bytes * 2;
138 lookup->value_type = CODE_VALUE_CID;
139 lookup->value_size = num_bytes;
140 /* ditto */
141 lookup->values.data = (byte*) key_data;
142 lookup->values.size = num_bytes;
143 pcmap->def.lookup = lookup;
144 pcmap->def.num_lookup = 1;
145 /* no notdef */
146 /* no mark_glyph, mark_glyph_data, glyph_name, glyph_name_data */
147 *ppcmap = pcmap;
148 return 0;
149 }
150
151 /*
152 * multi-dimensional range comparator
153 */
154
155 private void
print_msg_str_in_range(const byte * str,const byte * key_lo,const byte * key_hi,int key_size)156 print_msg_str_in_range(const byte *str,
157 const byte *key_lo, const byte *key_hi,
158 int key_size)
159 {
160 debug_print_string_hex(str, key_size);
161 dlprintf(" in ");
162 debug_print_string_hex(key_lo, key_size);
163 dlprintf(" - ");
164 debug_print_string_hex(key_hi, key_size);
165 dlprintf("\n");
166 }
167
168 private int
gs_cmap_get_shortest_chr(const gx_code_map_t * pcmap,uint * pfidx)169 gs_cmap_get_shortest_chr(const gx_code_map_t * pcmap, uint *pfidx)
170 {
171 int i;
172 int len_shortest = MAX_CMAP_CODE_SIZE;
173 uint fidx_shortest = 0; /* font index for this fallback */
174
175 for (i = pcmap->num_lookup - 1; i >= 0; --i) {
176 const gx_code_lookup_range_t *pclr = &pcmap->lookup[i];
177 if ((pclr->key_prefix_size + pclr->key_size) <= len_shortest) {
178 len_shortest = (pclr->key_prefix_size + pclr->key_size);
179 fidx_shortest = pclr->font_index;
180 }
181 }
182
183 *pfidx = fidx_shortest;
184 return len_shortest;
185 }
186
187 /*
188 * multi-dimensional relative position calculator
189 *
190 * Returns offset of the given CID, considering CID range
191 * as array of CIDs (the last index changes fastest).
192 */
193 private int
gs_multidim_CID_offset(const byte * key_str,const byte * key_lo,const byte * key_hi,int key_size)194 gs_multidim_CID_offset(const byte *key_str,
195 const byte *key_lo, const byte *key_hi,
196 int key_size)
197 {
198
199 int i; /* index for current dimension */
200 int CID_offset = 0;
201
202 if (gs_debug_c('J')) {
203 dlprintf("[J]gmCo() calc CID_offset for 0x");
204 print_msg_str_in_range(key_str, key_lo, key_hi, key_size);
205 }
206
207 for (i = 0; i < key_size; i++)
208 CID_offset = CID_offset * (key_hi[i] - key_lo[i] + 1) +
209 key_str[i] - key_lo[i];
210
211 if_debug1('J', "[J]gmCo() CID_offset = %d\n", CID_offset);
212 return CID_offset;
213 }
214
215 /* Get a big-endian integer. */
216 private uint
bytes2int(const byte * p,int n)217 bytes2int(const byte *p, int n)
218 {
219 uint v = 0;
220 int i;
221
222 for (i = 0; i < n; ++i)
223 v = (v << 8) + p[i];
224 return v;
225 }
226
227 /*
228 * Decode a character from a string using a code map, updating the index.
229 * Return 0 for a CID or name, N > 0 for a character code where N is the
230 * number of bytes in the code, or an error. Shift the decoded bytes into
231 * *pchr. For undefined characters, set *pglyph = gs_no_glyph and return 0.
232 */
233 private int
code_map_decode_next(const gx_code_map_t * pcmap,const gs_const_string * pstr,uint * pindex,uint * pfidx,gs_char * pchr,gs_glyph * pglyph)234 code_map_decode_next(const gx_code_map_t * pcmap, const gs_const_string * pstr,
235 uint * pindex, uint * pfidx,
236 gs_char * pchr, gs_glyph * pglyph)
237 {
238 const byte *str = pstr->data + *pindex;
239 uint ssize = pstr->size - *pindex;
240 /*
241 * The keys are not sorted due to 'usecmap'. Possible optimization :
242 * merge and sort keys in 'zbuildcmap', then use binary search here.
243 * This would be valuable for UniJIS-UTF8-H, which contains about 7000
244 * keys.
245 */
246 int i;
247
248 for (i = pcmap->num_lookup - 1; i >= 0; --i) { /* reverse scan order due to 'usecmap' */
249 const gx_code_lookup_range_t *pclr = &pcmap->lookup[i];
250 int pre_size = pclr->key_prefix_size, key_size = pclr->key_size,
251 chr_size = pre_size + key_size;
252
253 if (ssize < chr_size)
254 continue;
255 if (memcmp(str, pclr->key_prefix, pre_size))
256 continue;
257 /* Search the lookup range. We could use binary search. */
258 {
259 const byte *key = pclr->keys.data;
260 int step = key_size;
261 int k;
262 const byte *pvalue;
263
264 if (pclr->key_is_range) {
265 step <<= 1;
266 for (k = 0; k < pclr->num_keys; ++k, key += step)
267 if (memcmp(str + pre_size, key, key_size) >= 0 &&
268 memcmp(str + pre_size, key + key_size, key_size) <= 0)
269 break;
270 } else {
271 for (k = 0; k < pclr->num_keys; ++k, key += step)
272 if (!memcmp(str + pre_size, key, key_size))
273 break;
274 }
275 if (k == pclr->num_keys)
276 continue;
277 /* We have a match. Return the result. */
278 *pchr = (*pchr << (chr_size * 8)) + bytes2int(str, chr_size);
279 *pindex += chr_size;
280 *pfidx = pclr->font_index;
281 pvalue = pclr->values.data + k * pclr->value_size;
282 switch (pclr->value_type) {
283 case CODE_VALUE_CID:
284 *pglyph = gs_min_cid_glyph +
285 bytes2int(pvalue, pclr->value_size) +
286 bytes2int(str + pre_size, key_size) -
287 bytes2int(key, key_size);
288 return 0;
289 case CODE_VALUE_GLYPH:
290 *pglyph = bytes2int(pvalue, pclr->value_size);
291 return 0;
292 case CODE_VALUE_CHARS:
293 *pglyph =
294 bytes2int(pvalue, pclr->value_size) +
295 bytes2int(str + pre_size, key_size) -
296 bytes2int(key, key_size);
297 return pclr->value_size;
298 default: /* shouldn't happen */
299 return_error(gs_error_rangecheck);
300 }
301 }
302 }
303 /* No mapping. */
304 *pglyph = gs_no_glyph;
305 return 0;
306 }
307
308 private int
code_map_decode_next_multidim_regime(const gx_code_map_t * pcmap,const gs_const_string * pstr,uint * pindex,uint * pfidx,gs_char * pchr,gs_glyph * pglyph)309 code_map_decode_next_multidim_regime(const gx_code_map_t * pcmap,
310 const gs_const_string * pstr,
311 uint * pindex, uint * pfidx,
312 gs_char * pchr, gs_glyph * pglyph)
313 {
314 const byte *str = pstr->data + *pindex;
315 uint ssize = pstr->size - *pindex;
316 /*
317 * The keys are not sorted due to 'usecmap'. Possible optimization :
318 * merge and sort keys in 'zbuildcmap', then use binary search here.
319 * This would be valuable for UniJIS-UTF8-H, which contains about 7000
320 * keys.
321 */
322 int i;
323
324 /*
325 * In the fallback of CMap decoding procedure, there is "partial matching".
326 * For detail, refer PostScript Ref. Manual v3 at the end of Fonts chapter.
327 */
328
329 /* "pm" stands for partial match (not pointer), temporal use. */
330 int pm_maxlen = 0; /* partial match: max length */
331 int pm_index = *pindex; /* partial match: ptr index (in str) */
332 uint pm_fidx = *pfidx; /* partial match: ptr font index */
333 gs_char pm_chr = *pchr; /* partial match: ptr character */
334
335 *pchr = '\0';
336
337 if (gs_debug_c('J')) {
338 dlprintf("[J]CMDNmr() is called: str=(");
339 debug_print_string_hex(str, ssize);
340 dlprintf3(") @ 0x%lx ssize=%d, %d ranges to check\n",
341 str, ssize, pcmap->num_lookup);
342 }
343
344 for (i = pcmap->num_lookup - 1; i >= 0; --i) {
345 /* main loop - scan the map passed via pcmap */
346 /* reverse scan order due to 'usecmap' */
347
348 const gx_code_lookup_range_t *pclr = &pcmap->lookup[i];
349 int pre_size = pclr->key_prefix_size, key_size = pclr->key_size,
350 chr_size = pre_size + key_size;
351
352 int j = 0;
353 /* length of the given byte stream is shorter than
354 * chr-length of current range, no need for further check,
355 * skip to the next range.
356 */
357 if (ssize < chr_size)
358 continue;
359
360 if (0 < pre_size) {
361 const byte * prefix = pclr->key_prefix;
362 /* check partial match in prefix */
363 for (j = 0; j < pre_size; j++)
364 if (prefix[j] != str[j])
365 break;
366
367 if (0 == j) /* no match, skip to next i */
368 continue;
369 else if (j < pre_size) { /* not exact, partial match */
370 if (gs_debug_c('J')) {
371 dlprintf("[J]CMDNmr() partial match with prefix:");
372 print_msg_str_in_range(str, prefix,
373 prefix, pre_size);
374 }
375
376 if (pm_maxlen < j) {
377 pm_maxlen = chr_size;
378 pm_chr = bytes2int(str, chr_size);
379 pm_index = (*pindex) + chr_size;
380 pm_fidx = pclr->font_index;
381 }
382 continue ; /* no need to check key, skip to next i */
383 }
384
385 if (gs_debug_c('J')) {
386 dlprintf("[J]CMDNmr() full match with prefix:");
387 print_msg_str_in_range(str, prefix, prefix, pre_size);
388 }
389
390 } /* if (0 < pre_size) */
391
392 /* full match in prefix. check key */
393 {
394 const byte *key = pclr->keys.data;
395 int step = key_size;
396 int k, l, m;
397 const byte *pvalue = NULL;
398
399 /* when range is "range", 2 keys for lo-end and hi-end
400 * are stacked. So twice the step. current "key" points
401 * lo-end of current range, and the pointer for hi-end
402 * is calculated by (key + step - key_size).
403 */
404
405 if (pclr->key_is_range)
406 step <<=1; /* step = step * 2; */
407
408 for (k = 0; k < pclr->num_keys; ++k, key += step) {
409
410 if_debug0('j', "[j]CMDNmr() check key:");
411 if (gs_debug_c('j'))
412 print_msg_str_in_range(str + pre_size,
413 key, key + step - key_size, key_size) ;
414
415 for (l = 0; l < key_size; l++) {
416 byte c = str[l + pre_size];
417 if (c < key[l] || c > key[step - key_size + l])
418 break;
419 }
420
421 if (pm_maxlen < pre_size + l) {
422 pm_maxlen = chr_size;
423 pm_chr = bytes2int(str, chr_size);
424 pm_index = (*pindex) + chr_size;
425 pm_fidx = pclr->font_index;
426 }
427 if (l == key_size)
428 break;
429 }
430
431 /* all keys are tried, but found no match. */
432 /* go to next prefix. */
433 if (k == pclr->num_keys)
434 continue;
435
436 /* We have a match. Return the result. */
437 *pchr = bytes2int(str, chr_size);
438 *pindex += chr_size;
439 *pfidx = pclr->font_index;
440 pvalue = pclr->values.data + k * pclr->value_size;
441
442 if (gs_debug_c('J')) {
443 dlprintf("[J]CMDNmr() full matched pvalue=(");
444 debug_print_string_hex(pvalue, pclr->value_size);
445 dlprintf(")\n");
446 }
447
448 switch (pclr->value_type) {
449 case CODE_VALUE_CID:
450 *pglyph = gs_min_cid_glyph +
451 bytes2int(pvalue, pclr->value_size) +
452 gs_multidim_CID_offset(str + pre_size,
453 key, key + step - key_size, key_size);
454 return 0;
455 case CODE_VALUE_NOTDEF:
456 *pglyph = gs_min_cid_glyph +
457 bytes2int(pvalue, pclr->value_size);
458 return 0;
459 case CODE_VALUE_GLYPH:
460 *pglyph = bytes2int(pvalue, pclr->value_size);
461 return 0;
462 case CODE_VALUE_CHARS:
463 *pglyph =
464 bytes2int(pvalue, pclr->value_size) +
465 bytes2int(str + pre_size, key_size) -
466 bytes2int(key, key_size);
467 return pclr->value_size;
468 default: /* shouldn't happen */
469 return_error(gs_error_rangecheck);
470 }
471 }
472 }
473 /* No mapping. */
474 *pchr = pm_chr;
475 *pindex = pm_index;
476 *pfidx = pm_fidx;
477 *pglyph = gs_no_glyph;
478 if (gs_debug_c('J')) {
479 dlprintf("[J]CMDNmr() no full match, use partial match for (");
480 debug_print_string_hex(str, pm_maxlen);
481 dlprintf(")\n");
482 }
483 return 0;
484 }
485
486 /*
487 * Decode a character from a string using a CMap.
488 * Return like code_map_decode_next.
489 * At present, the range specification by (begin|end)codespacerange
490 * is not used in this function. Therefore, this function accepts
491 * some invalid CMap which def & undef maps exceed the codespacerange.
492 * It should be checked in this function, or some procedure in gs_cmap.ps.
493 */
494 int
gs_cmap_decode_next(const gs_cmap_t * pcmap,const gs_const_string * pstr,uint * pindex,uint * pfidx,gs_char * pchr,gs_glyph * pglyph)495 gs_cmap_decode_next(const gs_cmap_t * pcmap, const gs_const_string * pstr,
496 uint * pindex, uint * pfidx,
497 gs_char * pchr, gs_glyph * pglyph)
498 {
499 uint save_index = *pindex;
500 int code;
501
502 uint pm_index;
503 uint pm_fidx;
504 gs_char pm_chr;
505
506 /* For first, check defined map */
507 if_debug0('J', "[J]GCDN() check def CMap\n");
508 code =
509 code_map_decode_next_multidim_regime(&pcmap->def, pstr, pindex, pfidx, pchr, pglyph);
510
511 /* This is defined character */
512 if (code != 0 || *pglyph != gs_no_glyph)
513 return code;
514
515 /* In here, this is NOT defined character */
516 /* save partially matched results */
517 pm_index = *pindex;
518 pm_fidx = *pfidx;
519 pm_chr = *pchr;
520
521 /* check notdef map. */
522 if_debug0('J', "[J]GCDN() check notdef CMap\n");
523 *pindex = save_index;
524 code =
525 code_map_decode_next_multidim_regime(&pcmap->notdef, pstr, pindex, pfidx, pchr, pglyph);
526
527 /* This is defined "notdef" character. */
528 if (code != 0 || *pglyph != gs_no_glyph)
529 return code;
530
531 /*
532 * This is undefined in def & undef maps,
533 * use partially matched result with default notdef (CID = 0).
534 */
535 if (save_index < pm_index) {
536
537 /* there was some partially matched */
538
539 *pglyph = gs_min_cid_glyph; /* CID = 0 */
540 *pindex = pm_index;
541 *pfidx = pm_fidx;
542 *pchr = '\0';
543 return 0; /* should return some error for partial matched .notdef? */
544 }
545 else {
546 /* no match */
547
548 /* Even partial match is failed.
549 * Getting the shortest length from defined characters,
550 * and take the leading bytes (with same length of the shortest
551 * defined chr) as an unidentified character: CID = 0.
552 * Also this procedure is specified in PS Ref. Manual v3,
553 * at the end of Fonts chapter.
554 */
555
556 const byte *str = pstr->data + save_index;
557 uint ssize = pstr->size - save_index;
558 int chr_size_shortest =
559 gs_cmap_get_shortest_chr(&pcmap->def, pfidx);
560
561 if (chr_size_shortest <= ssize) {
562 *pglyph = gs_min_cid_glyph; /* CID = 0, this is CMap fallback */
563 *pindex = save_index + chr_size_shortest;
564 *pchr = '\0';
565 if (gs_debug_c('J')) {
566 dlprintf1("[J]GCDN() no partial match, skip %d byte (",
567 chr_size_shortest);
568 debug_print_string_hex(str, chr_size_shortest);
569 dlprintf(")\n");
570 }
571 return 0; /* should return some error for fallback .notdef? */
572 }
573 else {
574 /* Undecodable string is shorter than the shortest character,
575 * there's no way except to return error.
576 */
577 *pglyph = gs_no_glyph;
578 return -1;
579 if (gs_debug_c('J')) {
580 dlprintf2("[J]GCDN() left data in buffer (%d) is shorter than shortest defined character (%d)\n",
581 ssize, chr_size_shortest);
582 }
583 *pglyph = gs_no_glyph;
584 return_error(e_rangecheck);
585 }
586 }
587 }
588