1 /*
2 * Copyright 1998, Gisle Aas.
3 *
4 * This library is free software; you can redistribute it and/or
5 * modify it under the same terms as Perl itself.
6 */
7
8 #include "map8.h"
9
10 #include <memory.h>
11 #include <stdlib.h>
12
13 static U16* nochar_map = 0;
14 static int num_maps = 0;
15
16
17
18 Map8*
map8_new()19 map8_new()
20 {
21 Map8* m;
22 int i;
23 m = (Map8*)malloc(sizeof(Map8));
24 if (!m) abort(); /* out of memory */
25
26 if (!nochar_map) {
27 /* initialize the shared array for second level u16 mapping */
28 nochar_map = (U16*)malloc(sizeof(U16)*256);
29 if (!nochar_map) abort(); /* out of memory */
30 for (i = 0; i < 256; i++)
31 nochar_map[i] = NOCHAR;
32 }
33
34 for (i = 0; i < 256; i++) {
35 m->to_16[i] = NOCHAR;
36 m->to_8[i] = nochar_map;
37 }
38
39 m->def_to8 = NOCHAR;
40 m->def_to16 = NOCHAR;
41 m->cb_to8 = 0;
42 m->cb_to16 = 0;
43 m->obj = 0;
44
45 num_maps++;
46 /* fprintf(stderr, "New %p (%d created)\n", m, num_maps); */
47 return m;
48 }
49
50
51
52 void
map8_addpair(Map8 * m,U8 u8,U16 u16)53 map8_addpair(Map8* m, U8 u8, U16 u16)
54 {
55 U8 hi = u16 >> 8;
56 U8 lo = u16 & 0xFF;
57 U16* himap = m->to_8[hi];
58 if (himap == nochar_map) {
59 int i;
60 U16* map = (U16*)malloc(sizeof(U16)*256);
61 if (!map) abort(); /* out of memory */
62 for (i = 0; i < 256; i++) {
63 map[i] = NOCHAR;
64 }
65 map[lo] = u8;
66 m->to_8[hi] = map;
67 } else if (himap[lo] == NOCHAR)
68 himap[lo] = u8;
69 if (m->to_16[u8] == NOCHAR)
70 m->to_16[u8] = htons(u16);
71 }
72
73
74
75 void
map8_nostrict(Map8 * m)76 map8_nostrict(Map8* m)
77 {
78 int i;
79 if (!m) return;
80 for (i = 0; i < 256; i++) {
81 if (map8_to_char8(m, i) != NOCHAR)
82 continue;
83 if (map8_to_char16(m, i) != NOCHAR)
84 continue;
85 map8_addpair(m, i, i);
86 }
87 }
88
89
90 static char*
my_fgets(char * buf,int len,PerlIO * f)91 my_fgets(char* buf, int len, PerlIO* f)
92 {
93 int pos = 0;
94 int ch;
95 while (1) {
96 ch = PerlIO_getc(f);
97 if (ch == EOF)
98 break;
99 if (pos < len - 1)
100 buf[pos++] = ch;
101 if (ch == '\n')
102 break;
103 }
104 buf[pos] = '\0';
105 return pos ? buf : 0;
106 }
107
108
109 Map8*
map8_new_txtfile(const char * file)110 map8_new_txtfile(const char *file)
111 {
112 dTHX;
113 Map8* m;
114 int count = 0;
115 PerlIO* f;
116 char buf[512];
117
118 f = PerlIO_open(file, "r");
119 if (!f)
120 return 0;
121
122 m = map8_new();
123
124 while (my_fgets(buf, sizeof(buf), f)) {
125 char *c1 = buf;
126 char *c2;
127 long from;
128 long to;
129
130 from = strtol(buf, &c1, 0);
131 if (buf == c1 || from < 0 || from > 255)
132 continue; /* not a valid number */
133
134 to = strtol(c1, &c2, 0);
135 if (c1 == c2 || to < 0 || to > 0xFFFF)
136 continue; /* not a valid second number */
137
138 if (0 && from == to)
139 continue;
140
141 map8_addpair(m, from, to);
142 count++;
143 }
144 PerlIO_close(f);
145
146 if (!count) /* no mappings found */ {
147 map8_free(m);
148 return 0;
149 }
150
151 return m;
152 }
153
154
155
156 Map8*
map8_new_binfile(const char * file)157 map8_new_binfile(const char *file)
158 {
159 dTHX;
160 Map8* m;
161 int count = 0;
162 int n;
163 int i;
164 PerlIO* f;
165 struct map8_filerec pair[256];
166
167 f = PerlIO_open(file, "rb");
168 if (!f)
169 return 0;
170
171 if (PerlIO_read(f, pair, sizeof(pair[0])) != sizeof(pair[0]) ||
172 pair[0].u8 != htons(MAP8_BINFILE_MAGIC_HI) ||
173 pair[0].u16 != htons(MAP8_BINFILE_MAGIC_LO))
174 {
175 /* fprintf(stderr, "Bad magic\n"); */
176 PerlIO_close(f);
177 return 0;
178 }
179
180 m = map8_new();
181
182 while ( (n = PerlIO_read(f, pair, sizeof(pair))) > 0)
183 {
184 n /= sizeof(pair[0]);
185 for (i = 0; i < n; i++) {
186 U16 u8 = ntohs(pair[i].u8);
187 U16 u16 = ntohs(pair[i].u16);
188 if (u8 > 255) continue;
189 count++;
190 map8_addpair(m, (U8)u8, u16);
191 }
192 }
193 PerlIO_close(f);
194
195 if (!count) /* no mappings found */ {
196 map8_free(m);
197 return 0;
198 }
199
200 return m;
201 }
202
203
204
205 void
map8_free(Map8 * m)206 map8_free(Map8* m)
207 {
208 int i;
209 if (!m) return;
210 for (i = 0; i < 256; i++) {
211 if (m->to_8[i] != nochar_map)
212 free(m->to_8[i]);
213 }
214 free(m);
215 if (--num_maps == 0) {
216 free(nochar_map);
217 nochar_map = 0;
218 }
219 /* fprintf(stderr, "Freeing %p (%d left)\n", m, num_maps); */
220 }
221
222
223 #ifndef PERL
224
map8_to_str16(Map8 * m,U8 * str8,U16 * str16,int len,int * rlen)225 U16* map8_to_str16(Map8* m, U8* str8, U16* str16, int len, int* rlen)
226 {
227 U16* tmp16;
228 if (str8 == 0)
229 return 0;
230 if (len < 0)
231 len = strlen(str8);
232 if (str16 == 0) {
233 str16 = (U16*)malloc(sizeof(U16)*(len+1));
234 if (!str16) abort();
235 }
236 tmp16 = str16;
237 while (len--) {
238 U16 c = map8_to_char16(m, *str8);
239 if (c != NOCHAR) {
240 *tmp16++ = c;
241 } else if (m->def_to16 != NOCHAR) {
242 *tmp16++ = m->def_to16;
243 } else if (m->cb_to16) {
244 U16* buf;
245 STRLEN len;
246 buf = (m->cb_to16)(*str8, m, &len);
247 if (buf && len > 0) {
248 if (len == 1) {
249 *tmp16++ = *buf;
250 } else {
251 fprintf(stderr, "one-to-many mapping not implemented yet\n");
252 }
253 }
254 }
255 str8++;
256 }
257 *tmp16 = 0x0000; /* NUL16 terminate */
258 if (rlen) {
259 *rlen = tmp16 - str16;
260 }
261 return str16;
262 }
263
264
265
266
map8_to_str8(Map8 * m,U16 * str16,U8 * str8,int len,int * rlen)267 U8* map8_to_str8(Map8* m, U16* str16, U8* str8, int len, int* rlen)
268 {
269 U8* tmp8;
270 if (str16 == 0)
271 return 0;
272 if (len < 0) {
273 len = strlen(str8);
274 }
275 if (str8 == 0) {
276 str8 = (U8*)malloc(sizeof(U8)*(len+1));
277 if (!str8) abort();
278 }
279 tmp8 = str8;
280 while (len--) {
281 U16 c = map8_to_char8(m, ntohs(*str16));
282 if (c != NOCHAR && c <= 0xFF) {
283 *tmp8++ = (U8)c;
284 } else if (m->def_to8 != NOCHAR) {
285 *tmp8++ = (U8)m->def_to8;
286 } else if (m->cb_to8) {
287 U8* buf;
288 STRLEN len;
289 buf = (m->cb_to8)(ntohs(*str16), m, &len);
290 if (buf && len > 0) {
291 if (len == 1) {
292 *tmp8++ = *buf;
293 } else {
294 fprintf(stderr, "one-to-many mapping not implemented yet\n");
295 }
296 }
297 }
298 str16++;
299 }
300 *tmp8 = '\0'; /* NUL terminate */
301 if (rlen) {
302 *rlen = tmp8 - str8;
303 }
304 return str8;
305 }
306
307 #endif /* !PERL */
308
309
map8_recode8(Map8 * m1,Map8 * m2,U8 * from,U8 * to,int len,int * rlen)310 U8* map8_recode8(Map8* m1, Map8* m2, U8* from, U8* to, int len, int* rlen)
311 {
312 dTHX;
313 U8* tmp;
314 U16 uc;
315 U16 u8; /* need U16 to represent NOCHAR */
316 int didwarn = 0;
317
318 if (from == 0)
319 return 0;
320 if (len < 0) {
321 len = strlen(from);
322 }
323 if (to == 0) {
324 to = (U8*)malloc(sizeof(U8)*(len+1));
325 if (!to) abort();
326 }
327
328 tmp = to;
329 while (len--) {
330 /* First translate to common Unicode representation */
331 U16 uc = map8_to_char16(m1, *from);
332
333 if (uc != NOCHAR)
334 goto got_16;
335
336 if (m1->def_to16 != NOCHAR) {
337 uc = m1->def_to16;
338 goto got_16;
339 }
340
341 if (m1->cb_to16) {
342 U16 *buf;
343 STRLEN len;
344 buf = (m1->cb_to16)(*from, m1, &len);
345 if (buf && len == 1) {
346 uc = htons(*buf);
347 goto got_16;
348 }
349
350 if (len > 1 && !didwarn++)
351 PerlIO_printf(PerlIO_stderr(), "one-to-many mapping not implemented yet\n");
352 }
353
354 /* Never managed to find a mapping to Unicode, skip it */
355 from++;
356 continue;
357
358 got_16:
359 from++; /* 'uc' char translated now */
360
361 /* Then map 'uc' back to the second 8-bit encoding */
362 u8 = map8_to_char8(m2, ntohs(uc));
363 if (u8 == NOCHAR || u8 > 0xFF) {
364 if (m2->def_to8 != NOCHAR)
365 u8 = m2->def_to8;
366 else if (m2->cb_to8) {
367 U8* buf;
368 STRLEN len;
369 buf = (m2->cb_to8)(ntohs(uc), m2, &len);
370 if (!buf || len != 1)
371 continue; /* no mapping exists for this char */
372 }
373 else
374 continue;
375 }
376
377 *tmp++ = (U8)u8;
378 }
379
380 *tmp = '\0'; /* NUL terminate */
381 if (rlen) {
382 *rlen = tmp - to;
383 }
384 return to;
385 }
386
387
map8_empty_block(Map8 * m,U8 block)388 int map8_empty_block(Map8* m, U8 block)
389 {
390 return m->to_8[block] == nochar_map;
391 }
392
393
394 #ifdef MAP8_DEBUGGING
395
396 void
map8_print(Map8 * m)397 map8_print(Map8* m)
398 {
399 map8_fprint(m, stdout);
400 }
401
402 void
map8_fprint(Map8 * m,FILE * f)403 map8_fprint(Map8* m, FILE* f)
404 {
405 int i, j;
406 long size = 0;
407 int num_ident = 0;
408 int num_nomap = 0;
409
410 if (!m) {
411 fprintf(f, "NULL mapping\n");
412 return;
413 }
414 size += sizeof(Map8);
415
416 fprintf(f, "MAP8 %p\n", m);
417 fprintf(f, " U8-U16\n");
418 for (i = 0; i < 256; i++) {
419 U16 u = m->to_16[i];
420 if (i == u) {
421 num_ident++;
422 } else if (u == NOCHAR) {
423 num_nomap++;
424 } else {
425 fprintf(f, " %02x U+%04x (%d --> %d)\n", i, u, i, u);
426 }
427 }
428 if (num_ident)
429 fprintf(f, " +%d identity mappings\n", num_ident);
430 if (num_nomap) {
431 fprintf(f, " +%d nochar mappings", num_nomap);
432 if (m->nomap8)
433 fprintf(f, " (mapping func %p)", m->nomap8);
434 fprintf(f, "\n");
435 }
436
437 for (i = 0; i < 256; i++) {
438 num_ident = 0;
439 num_nomap = 0;
440 if (m->to_8[i] == 0) {
441 fprintf(f, " U16-U8: block %d NULL (should not happen)\n", i);
442 } else if (m->to_8[i] != nochar_map) {
443 size += sizeof(U16)*256;
444 fprintf(f, " U16-U8: block %d %p\n", i, m->to_8[i]);
445 for (j = 0; j < 256; j++) {
446 int from = i*256+j;
447 int to = m->to_8[i][j];
448 if (from == to) {
449 num_ident++;
450 } else if (to == NOCHAR) {
451 num_nomap++;
452 /* fprintf(f, " NOMAP %d\n", from); */
453 } else {
454 fprintf(f, " U+%04x %02x (%d --> %d)\n", from, to, from, to);
455 }
456 }
457 if (num_ident)
458 fprintf(f, " +%d identity mappings\n", num_ident);
459 if (num_nomap)
460 fprintf(f, " +%d nochar mappings\n", num_nomap);
461 }
462 }
463 if (m->nomap16)
464 fprintf(f, " U16-U8: nochar mapping func %p\n", m->nomap16);
465 fprintf(f, " (%d bytes allocated)\n", size);
466 }
467 #endif
468