1 /*  Part of XPCE --- The SWI-Prolog GUI toolkit
2 
3     Author:        Jan Wielemaker and Anjo Anjewierden
4     E-mail:        jan@swi.psy.uva.nl
5     WWW:           http://www.swi.psy.uva.nl/projects/xpce/
6     Copyright (c)  2005-2013, University of Amsterdam
7     All rights reserved.
8 
9     Redistribution and use in source and binary forms, with or without
10     modification, are permitted provided that the following conditions
11     are met:
12 
13     1. Redistributions of source code must retain the above copyright
14        notice, this list of conditions and the following disclaimer.
15 
16     2. Redistributions in binary form must reproduce the above copyright
17        notice, this list of conditions and the following disclaimer in
18        the documentation and/or other materials provided with the
19        distribution.
20 
21     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24     FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25     COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26     INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
27     BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
29     CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30     LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
31     ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32     POSSIBILITY OF SUCH DAMAGE.
33 */
34 
35 #include <h/kernel.h>
36 #include <h/utf8.h>
37 
38 #define utf8_get_uchar(s, chr) (unsigned char*)utf8_get_char((char *)(s), chr)
39 
40 #ifndef MB_LEN_MAX
41 #define MB_LEN_MAX 6
42 #endif
43 
44 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
45 These  functions  translate  CharArray  (PceString,  Name)  into  a  format
46 suitable to drive operating- or windowsystem   calls,  such as accessing
47 filenames, window titles, etc.
48 
49 Both UTF-8 and locale-defined multibyte strings are not designed to deal
50 with embedded 0-bytes and APIs   generally  accept 0-terminated strings.
51 Only for wide-character arrays we work with sizes.
52 
53 	* MB
54 	CTYPE Locale defined translation
55 
56 	* UTF-8
57 	Well known UTF-8 encoding of UNICODE
58 
59 	* WC
60 	wchar_t representation of UNICODE
61 
62 The returned strings of this library are   stored in a ring of RING_SIZE
63 fields.
64 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
65 
66 		 /*******************************
67 		 *	      RING		*
68 		 *******************************/
69 
70 #define RING_SIZE 16
71 
72 typedef struct rcell
73 { char 		*data;			/* actual data */
74   char		*bufp;			/* pointer in buffer */
75   char		*limitp;		/* pointer to end */
76   size_t	allocated;		/* bytes allocated */
77 } rcell;
78 
79 static rcell ring[RING_SIZE] = {{0}};
80 static int   ring_index = 0;
81 
82 static rcell *
find_ring()83 find_ring()
84 { rcell *c = &ring[ring_index++];
85 
86   if ( ring_index == RING_SIZE )
87     ring_index = 0;
88 
89   if ( c->allocated == 0 )
90   { c->allocated = 256;
91     c->data = pceMalloc(c->allocated);
92   } else if ( c->allocated >= 4096 )
93   { c->allocated = 256;
94     pceFree(c->data);
95     c->data = pceMalloc(c->allocated);
96   }
97   c->bufp   = c->data;
98   c->limitp = &c->data[c->allocated];
99 
100   return c;
101 }
102 
103 
104 static void
roomBuffer(rcell * c,size_t room)105 roomBuffer(rcell *c, size_t room)
106 { while ( c->bufp + room > c->limitp )
107   { size_t size = c->bufp - c->data;
108 
109     c->allocated *= 2;
110     c->data   = pceRealloc(c->data, c->allocated);
111     c->limitp = &c->data[c->allocated];
112     c->bufp   = &c->data[size];
113   }
114 }
115 
116 
117 static void
addByte(rcell * c,int byte)118 addByte(rcell *c, int byte)
119 { roomBuffer(c, 1);
120 
121   *c->bufp++ = byte;
122 }
123 
124 
125 		 /*******************************
126 		 *	  CHARARRAY --> 	*
127 		 *******************************/
128 
129 typedef const unsigned char cuchar;
130 typedef const wchar_t       cwchar;
131 
132 static char *
stringToUTF8(PceString str)133 stringToUTF8(PceString str)
134 { rcell *out;
135 
136   if ( isstrA(str) )
137   { cuchar *s = (cuchar*) str->s_textA;
138     cuchar *e = &s[str->s_size];
139 
140     for( ; s<e; s++ )			/* do we need conversion */
141     { if ( *s & 0x80 )
142 	break;
143     }
144     if ( s == e )
145       return (char *)str->s_textA;	/* no */
146 
147     out = find_ring();
148     for(s = (cuchar*) str->s_textA; s<e; s++ )
149     { roomBuffer(out, 2);		/* max bytes per UTF-8 < 256 */
150 
151       out->bufp = utf8_put_char(out->bufp, *s);
152     }
153   } else
154   { cwchar *s = str->s_textW;
155     cwchar *e = &s[str->s_size];
156 
157     out = find_ring();
158     for( ; s<e; s++ )
159     { roomBuffer(out, 6);		/* max bytes per UTF-8 */
160 
161       out->bufp = utf8_put_char(out->bufp, *s);
162     }
163   }
164 
165   addByte(out, 0);
166 
167   return out->data;
168 }
169 
170 
171 static char *
stringToMB(PceString str)172 stringToMB(PceString str)
173 { rcell *out;
174   mbstate_t mbs;
175   char b[MB_LEN_MAX];
176   size_t rc;
177 
178   memset(&mbs, 0, sizeof(mbs));
179 
180   if ( isstrA(str) )
181   { cuchar *s = (cuchar*) str->s_textA;
182     cuchar *e = &s[str->s_size];
183 
184     for( ; s<e; s++ )			/* do we need conversion? */
185     { if ( (rc=wcrtomb(b, *s, &mbs)) == 1 && b[0] == *s )
186 	continue;
187       if ( rc == (size_t)-1 )
188 	return NULL;			/* cannot convert */
189     }
190     if ( s == e )
191       return (char *)str->s_textA;		/* no */
192 
193     memset(&mbs, 0, sizeof(mbs));
194     out = find_ring();
195     for( ; s <= e; s++ )		/* <=: also 0-byte! */
196     { roomBuffer(out, MB_LEN_MAX);
197 
198       if ( (rc=wcrtomb(out->bufp, *s, &mbs)) == (size_t)-1 )
199 	return NULL;
200       out->bufp += rc;
201     }
202   } else
203   { cwchar *s = str->s_textW;
204     cwchar *e = &s[str->s_size];
205 
206     out = find_ring();
207     for( ; s<e; s++ )
208     { roomBuffer(out, MB_LEN_MAX);
209 
210       if ( (rc=wcrtomb(out->bufp, *s, &mbs)) == (size_t)-1 )
211 	return NULL;
212       out->bufp += rc;
213     }
214   }
215 
216   roomBuffer(out, MB_LEN_MAX+1);	/* add restore state + 0-byte */
217   if ( wcrtomb(out->bufp, 0, &mbs) ==  (size_t)-1 )
218     return NULL;
219 
220   return out->data;
221 }
222 
223 
224 wchar_t *
charArrayToWC(CharArray ca,size_t * len)225 charArrayToWC(CharArray ca, size_t *len)
226 { PceString str = &ca->data;
227 
228   if ( len )
229     *len = str->s_size;
230 
231   if ( isstrA(str) )
232   { rcell *out = find_ring();
233     cuchar *s = (cuchar*) str->s_textA;
234     cuchar *e = &s[str->s_size];
235     wchar_t *o;
236 
237     roomBuffer(out, (str->s_size+1)*sizeof(wchar_t));
238 
239     for(o=(wchar_t*)out->data ; s<e; )
240     { *o++ = *s++;
241     }
242     *o = 0;
243 
244     return (wchar_t *)out->data;
245   } else
246     return str->s_textW;
247 }
248 
249 
250 char *
charArrayToUTF8(CharArray ca)251 charArrayToUTF8(CharArray ca)
252 { return stringToUTF8(&ca->data);
253 }
254 
255 
256 char *
charArrayToMB(CharArray ca)257 charArrayToMB(CharArray ca)
258 { return stringToMB(&ca->data);
259 }
260 
261 
262 char *
nameToMB(Name nm)263 nameToMB(Name nm)
264 { return stringToMB(&nm->data);
265 }
266 
267 
268 char *
nameToUTF8(Name nm)269 nameToUTF8(Name nm)
270 { return stringToUTF8(&nm->data);
271 }
272 
273 
274 wchar_t *
nameToWC(Name nm,size_t * len)275 nameToWC(Name nm, size_t *len)
276 { return charArrayToWC((CharArray)nm, len);
277 }
278 
279 
280 		 /*******************************
281 		 *	    <-- NAME	  	*
282 		 *******************************/
283 
284 Name
UTF8ToName(const char * utf8)285 UTF8ToName(const char *utf8)
286 { cuchar *in;
287   cuchar *e;
288   int len;
289   int wide;
290 
291   for(in=(cuchar*)utf8; *in; in++)
292   { if ( (*in)&0x80 )
293       break;
294   }
295 
296   if ( *in == EOS )			/* simple ASCII string */
297     return CtoName(utf8);
298 
299   e = in + strlen((const char*)in);
300   for(in=(cuchar*)utf8, len=0, wide=FALSE; in < e; )
301   { int chr;
302 
303     in = utf8_get_uchar(in, &chr);
304     if ( chr > 0xff )
305       wide = TRUE;
306     len++;
307   }
308 
309   if ( wide )
310   { wchar_t *ws, *o;
311     int mlcd;
312     string s;
313     Name nm;
314 
315     if ( len < 1024 )
316     { ws = alloca((len+1)*sizeof(wchar_t));
317       mlcd = FALSE;
318     } else
319     { ws = pceMalloc((len+1)*sizeof(wchar_t));
320       mlcd = TRUE;
321     }
322 
323     for(in=(cuchar*)utf8, o=ws; in < e; )
324     { int chr;
325 
326       in = utf8_get_uchar(in, &chr);
327       *o++ = chr;
328     }
329 
330     str_set_n_wchar(&s, len, ws);
331     nm = StringToName(&s);
332 
333     if ( mlcd )
334       pceFree(ws);
335 
336     return nm;
337   } else
338   { char *as, *o;
339     int mlcd;
340     string s;
341     Name nm;
342 
343     if ( len < 1024 )
344     { as = alloca((len+1));
345       mlcd = FALSE;
346     } else
347     { as = pceMalloc((len+1));
348       mlcd = TRUE;
349     }
350 
351     for(in=(cuchar*)utf8, o=as; in < e; )
352     { int chr;
353 
354       in = utf8_get_uchar(in, &chr);
355       *o++ = (char)chr;
356     }
357 
358     str_set_n_ascii(&s, len, as);
359     nm = StringToName(&s);
360 
361     if ( mlcd )
362       pceFree(as);
363 
364     return nm;
365   }
366 }
367 
368 
369 Name
MBToName(const char * mb)370 MBToName(const char *mb)
371 { size_t len;
372   mbstate_t mbs;
373   const char *in = mb;
374 
375   memset(&mbs, 0, sizeof(mbs));
376   if ( (len = mbsrtowcs(NULL, &in, 0, &mbs)) != (size_t)(-1) )
377   { string s;
378     wchar_t *ws;
379     int mlcd;
380     Name nm;
381 
382     if ( len < 1024 )
383     { ws = alloca((len+1)*sizeof(wchar_t));
384       mlcd = FALSE;
385     } else
386     { ws = pceMalloc((len+1)*sizeof(wchar_t));
387       mlcd = TRUE;
388     }
389 
390     memset(&mbs, 0, sizeof(mbs));
391     in = mb;
392     mbsrtowcs(ws, &in, len+1, &mbs);
393     str_set_n_wchar(&s, len, ws);
394     nm = StringToName(&s);
395 
396     if ( mlcd )
397       pceFree(ws);
398 
399     return nm;
400   }
401 
402   return NULL;
403 }
404 
405 
406 Name
WCToName(const wchar_t * wc,size_t len)407 WCToName(const wchar_t *wc, size_t len)
408 { if ( wc )
409   { string s;
410 
411     if ( len == (size_t)-1 )
412       len = wcslen(wc);
413 
414     str_set_n_wchar(&s, len, (wchar_t *)wc);
415 
416     return StringToName(&s);
417   }
418 
419   return NULL;
420 }
421 
422 
423 StringObj
WCToString(const wchar_t * wc,size_t len)424 WCToString(const wchar_t *wc, size_t len)
425 { if ( wc )
426   { string s;
427 
428     str_set_n_wchar(&s, len, (wchar_t *)wc);
429 
430     return StringToString(&s);
431   }
432 
433   return NULL;
434 }
435 
436 
437 		 /*******************************
438 		 *	     FILE-NAMES		*
439 		 *******************************/
440 
441 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
442 Turn  an  OS  filename  into  an  XPCE  name.  With  XOS,  the  filename
443 representation is always UTF-8
444 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
445 
446 Name
FNToName(const char * name)447 FNToName(const char *name)
448 { Name rc;
449 #ifdef O_XOS
450   rc = UTF8ToName(name);
451 #else
452   rc = MBToName(name);
453 #endif
454 
455   if ( !rc )				/* Illegal Multibyte; use plain */
456     rc = CtoName(name);
457 
458   return rc;
459 }
460 
461 
462 char *
charArrayToFN(CharArray ca)463 charArrayToFN(CharArray ca)
464 {
465 #ifdef O_XOS
466    return charArrayToUTF8(ca);
467 #else
468    return charArrayToMB(ca);
469 #endif
470 }
471 
472 
473 char *
stringToFN(PceString s)474 stringToFN(PceString s)
475 {
476 #ifdef O_XOS
477    return stringToUTF8(s);
478 #else
479    return stringToMB(s);
480 #endif
481 }
482 
483 
484 char *
nameToFN(Name nm)485 nameToFN(Name nm)
486 { return stringToFN(&nm->data);
487 }
488