1 /* Part of XPCE --- The SWI-Prolog GUI toolkit
2
3 Author: Jan Wielemaker and Anjo Anjewierden
4 E-mail: jan@swi.psy.uva.nl
5 WWW: http://www.swi.psy.uva.nl/projects/xpce/
6 Copyright (c) 2005-2013, University of Amsterdam
7 All rights reserved.
8
9 Redistribution and use in source and binary forms, with or without
10 modification, are permitted provided that the following conditions
11 are met:
12
13 1. Redistributions of source code must retain the above copyright
14 notice, this list of conditions and the following disclaimer.
15
16 2. Redistributions in binary form must reproduce the above copyright
17 notice, this list of conditions and the following disclaimer in
18 the documentation and/or other materials provided with the
19 distribution.
20
21 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
29 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
31 ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32 POSSIBILITY OF SUCH DAMAGE.
33 */
34
35 #include <h/kernel.h>
36 #include <h/utf8.h>
37
38 #define utf8_get_uchar(s, chr) (unsigned char*)utf8_get_char((char *)(s), chr)
39
40 #ifndef MB_LEN_MAX
41 #define MB_LEN_MAX 6
42 #endif
43
44 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
45 These functions translate CharArray (PceString, Name) into a format
46 suitable to drive operating- or windowsystem calls, such as accessing
47 filenames, window titles, etc.
48
49 Both UTF-8 and locale-defined multibyte strings are not designed to deal
50 with embedded 0-bytes and APIs generally accept 0-terminated strings.
51 Only for wide-character arrays we work with sizes.
52
53 * MB
54 CTYPE Locale defined translation
55
56 * UTF-8
57 Well known UTF-8 encoding of UNICODE
58
59 * WC
60 wchar_t representation of UNICODE
61
62 The returned strings of this library are stored in a ring of RING_SIZE
63 fields.
64 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
65
66 /*******************************
67 * RING *
68 *******************************/
69
70 #define RING_SIZE 16
71
72 typedef struct rcell
73 { char *data; /* actual data */
74 char *bufp; /* pointer in buffer */
75 char *limitp; /* pointer to end */
76 size_t allocated; /* bytes allocated */
77 } rcell;
78
79 static rcell ring[RING_SIZE] = {{0}};
80 static int ring_index = 0;
81
82 static rcell *
find_ring()83 find_ring()
84 { rcell *c = &ring[ring_index++];
85
86 if ( ring_index == RING_SIZE )
87 ring_index = 0;
88
89 if ( c->allocated == 0 )
90 { c->allocated = 256;
91 c->data = pceMalloc(c->allocated);
92 } else if ( c->allocated >= 4096 )
93 { c->allocated = 256;
94 pceFree(c->data);
95 c->data = pceMalloc(c->allocated);
96 }
97 c->bufp = c->data;
98 c->limitp = &c->data[c->allocated];
99
100 return c;
101 }
102
103
104 static void
roomBuffer(rcell * c,size_t room)105 roomBuffer(rcell *c, size_t room)
106 { while ( c->bufp + room > c->limitp )
107 { size_t size = c->bufp - c->data;
108
109 c->allocated *= 2;
110 c->data = pceRealloc(c->data, c->allocated);
111 c->limitp = &c->data[c->allocated];
112 c->bufp = &c->data[size];
113 }
114 }
115
116
117 static void
addByte(rcell * c,int byte)118 addByte(rcell *c, int byte)
119 { roomBuffer(c, 1);
120
121 *c->bufp++ = byte;
122 }
123
124
125 /*******************************
126 * CHARARRAY --> *
127 *******************************/
128
129 typedef const unsigned char cuchar;
130 typedef const wchar_t cwchar;
131
132 static char *
stringToUTF8(PceString str)133 stringToUTF8(PceString str)
134 { rcell *out;
135
136 if ( isstrA(str) )
137 { cuchar *s = (cuchar*) str->s_textA;
138 cuchar *e = &s[str->s_size];
139
140 for( ; s<e; s++ ) /* do we need conversion */
141 { if ( *s & 0x80 )
142 break;
143 }
144 if ( s == e )
145 return (char *)str->s_textA; /* no */
146
147 out = find_ring();
148 for(s = (cuchar*) str->s_textA; s<e; s++ )
149 { roomBuffer(out, 2); /* max bytes per UTF-8 < 256 */
150
151 out->bufp = utf8_put_char(out->bufp, *s);
152 }
153 } else
154 { cwchar *s = str->s_textW;
155 cwchar *e = &s[str->s_size];
156
157 out = find_ring();
158 for( ; s<e; s++ )
159 { roomBuffer(out, 6); /* max bytes per UTF-8 */
160
161 out->bufp = utf8_put_char(out->bufp, *s);
162 }
163 }
164
165 addByte(out, 0);
166
167 return out->data;
168 }
169
170
171 static char *
stringToMB(PceString str)172 stringToMB(PceString str)
173 { rcell *out;
174 mbstate_t mbs;
175 char b[MB_LEN_MAX];
176 size_t rc;
177
178 memset(&mbs, 0, sizeof(mbs));
179
180 if ( isstrA(str) )
181 { cuchar *s = (cuchar*) str->s_textA;
182 cuchar *e = &s[str->s_size];
183
184 for( ; s<e; s++ ) /* do we need conversion? */
185 { if ( (rc=wcrtomb(b, *s, &mbs)) == 1 && b[0] == *s )
186 continue;
187 if ( rc == (size_t)-1 )
188 return NULL; /* cannot convert */
189 }
190 if ( s == e )
191 return (char *)str->s_textA; /* no */
192
193 memset(&mbs, 0, sizeof(mbs));
194 out = find_ring();
195 for( ; s <= e; s++ ) /* <=: also 0-byte! */
196 { roomBuffer(out, MB_LEN_MAX);
197
198 if ( (rc=wcrtomb(out->bufp, *s, &mbs)) == (size_t)-1 )
199 return NULL;
200 out->bufp += rc;
201 }
202 } else
203 { cwchar *s = str->s_textW;
204 cwchar *e = &s[str->s_size];
205
206 out = find_ring();
207 for( ; s<e; s++ )
208 { roomBuffer(out, MB_LEN_MAX);
209
210 if ( (rc=wcrtomb(out->bufp, *s, &mbs)) == (size_t)-1 )
211 return NULL;
212 out->bufp += rc;
213 }
214 }
215
216 roomBuffer(out, MB_LEN_MAX+1); /* add restore state + 0-byte */
217 if ( wcrtomb(out->bufp, 0, &mbs) == (size_t)-1 )
218 return NULL;
219
220 return out->data;
221 }
222
223
224 wchar_t *
charArrayToWC(CharArray ca,size_t * len)225 charArrayToWC(CharArray ca, size_t *len)
226 { PceString str = &ca->data;
227
228 if ( len )
229 *len = str->s_size;
230
231 if ( isstrA(str) )
232 { rcell *out = find_ring();
233 cuchar *s = (cuchar*) str->s_textA;
234 cuchar *e = &s[str->s_size];
235 wchar_t *o;
236
237 roomBuffer(out, (str->s_size+1)*sizeof(wchar_t));
238
239 for(o=(wchar_t*)out->data ; s<e; )
240 { *o++ = *s++;
241 }
242 *o = 0;
243
244 return (wchar_t *)out->data;
245 } else
246 return str->s_textW;
247 }
248
249
250 char *
charArrayToUTF8(CharArray ca)251 charArrayToUTF8(CharArray ca)
252 { return stringToUTF8(&ca->data);
253 }
254
255
256 char *
charArrayToMB(CharArray ca)257 charArrayToMB(CharArray ca)
258 { return stringToMB(&ca->data);
259 }
260
261
262 char *
nameToMB(Name nm)263 nameToMB(Name nm)
264 { return stringToMB(&nm->data);
265 }
266
267
268 char *
nameToUTF8(Name nm)269 nameToUTF8(Name nm)
270 { return stringToUTF8(&nm->data);
271 }
272
273
274 wchar_t *
nameToWC(Name nm,size_t * len)275 nameToWC(Name nm, size_t *len)
276 { return charArrayToWC((CharArray)nm, len);
277 }
278
279
280 /*******************************
281 * <-- NAME *
282 *******************************/
283
284 Name
UTF8ToName(const char * utf8)285 UTF8ToName(const char *utf8)
286 { cuchar *in;
287 cuchar *e;
288 int len;
289 int wide;
290
291 for(in=(cuchar*)utf8; *in; in++)
292 { if ( (*in)&0x80 )
293 break;
294 }
295
296 if ( *in == EOS ) /* simple ASCII string */
297 return CtoName(utf8);
298
299 e = in + strlen((const char*)in);
300 for(in=(cuchar*)utf8, len=0, wide=FALSE; in < e; )
301 { int chr;
302
303 in = utf8_get_uchar(in, &chr);
304 if ( chr > 0xff )
305 wide = TRUE;
306 len++;
307 }
308
309 if ( wide )
310 { wchar_t *ws, *o;
311 int mlcd;
312 string s;
313 Name nm;
314
315 if ( len < 1024 )
316 { ws = alloca((len+1)*sizeof(wchar_t));
317 mlcd = FALSE;
318 } else
319 { ws = pceMalloc((len+1)*sizeof(wchar_t));
320 mlcd = TRUE;
321 }
322
323 for(in=(cuchar*)utf8, o=ws; in < e; )
324 { int chr;
325
326 in = utf8_get_uchar(in, &chr);
327 *o++ = chr;
328 }
329
330 str_set_n_wchar(&s, len, ws);
331 nm = StringToName(&s);
332
333 if ( mlcd )
334 pceFree(ws);
335
336 return nm;
337 } else
338 { char *as, *o;
339 int mlcd;
340 string s;
341 Name nm;
342
343 if ( len < 1024 )
344 { as = alloca((len+1));
345 mlcd = FALSE;
346 } else
347 { as = pceMalloc((len+1));
348 mlcd = TRUE;
349 }
350
351 for(in=(cuchar*)utf8, o=as; in < e; )
352 { int chr;
353
354 in = utf8_get_uchar(in, &chr);
355 *o++ = (char)chr;
356 }
357
358 str_set_n_ascii(&s, len, as);
359 nm = StringToName(&s);
360
361 if ( mlcd )
362 pceFree(as);
363
364 return nm;
365 }
366 }
367
368
369 Name
MBToName(const char * mb)370 MBToName(const char *mb)
371 { size_t len;
372 mbstate_t mbs;
373 const char *in = mb;
374
375 memset(&mbs, 0, sizeof(mbs));
376 if ( (len = mbsrtowcs(NULL, &in, 0, &mbs)) != (size_t)(-1) )
377 { string s;
378 wchar_t *ws;
379 int mlcd;
380 Name nm;
381
382 if ( len < 1024 )
383 { ws = alloca((len+1)*sizeof(wchar_t));
384 mlcd = FALSE;
385 } else
386 { ws = pceMalloc((len+1)*sizeof(wchar_t));
387 mlcd = TRUE;
388 }
389
390 memset(&mbs, 0, sizeof(mbs));
391 in = mb;
392 mbsrtowcs(ws, &in, len+1, &mbs);
393 str_set_n_wchar(&s, len, ws);
394 nm = StringToName(&s);
395
396 if ( mlcd )
397 pceFree(ws);
398
399 return nm;
400 }
401
402 return NULL;
403 }
404
405
406 Name
WCToName(const wchar_t * wc,size_t len)407 WCToName(const wchar_t *wc, size_t len)
408 { if ( wc )
409 { string s;
410
411 if ( len == (size_t)-1 )
412 len = wcslen(wc);
413
414 str_set_n_wchar(&s, len, (wchar_t *)wc);
415
416 return StringToName(&s);
417 }
418
419 return NULL;
420 }
421
422
423 StringObj
WCToString(const wchar_t * wc,size_t len)424 WCToString(const wchar_t *wc, size_t len)
425 { if ( wc )
426 { string s;
427
428 str_set_n_wchar(&s, len, (wchar_t *)wc);
429
430 return StringToString(&s);
431 }
432
433 return NULL;
434 }
435
436
437 /*******************************
438 * FILE-NAMES *
439 *******************************/
440
441 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
442 Turn an OS filename into an XPCE name. With XOS, the filename
443 representation is always UTF-8
444 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
445
446 Name
FNToName(const char * name)447 FNToName(const char *name)
448 { Name rc;
449 #ifdef O_XOS
450 rc = UTF8ToName(name);
451 #else
452 rc = MBToName(name);
453 #endif
454
455 if ( !rc ) /* Illegal Multibyte; use plain */
456 rc = CtoName(name);
457
458 return rc;
459 }
460
461
462 char *
charArrayToFN(CharArray ca)463 charArrayToFN(CharArray ca)
464 {
465 #ifdef O_XOS
466 return charArrayToUTF8(ca);
467 #else
468 return charArrayToMB(ca);
469 #endif
470 }
471
472
473 char *
stringToFN(PceString s)474 stringToFN(PceString s)
475 {
476 #ifdef O_XOS
477 return stringToUTF8(s);
478 #else
479 return stringToMB(s);
480 #endif
481 }
482
483
484 char *
nameToFN(Name nm)485 nameToFN(Name nm)
486 { return stringToFN(&nm->data);
487 }
488