1 /* locale.c -- locale module.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
3 National Institute of Advanced Industrial Science and Technology (AIST)
4 Registration Number H15PRO112
5
6 This file is part of the m17n library.
7
8 The m17n library is free software; you can redistribute it and/or
9 modify it under the terms of the GNU Lesser General Public License
10 as published by the Free Software Foundation; either version 2.1 of
11 the License, or (at your option) any later version.
12
13 The m17n library is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public
19 License along with the m17n library; if not, write to the Free
20 Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
21 Boston, MA 02110-1301 USA. */
22
23 /***en
24 @addtogroup m17nLocale
25 @brief Locale objects and API for them.
26
27 The m17n library represents locale related information as objects
28 of type #MLocale. */
29
30 /***ja
31 @addtogroup m17nLocale
32 @brief �����륪�֥������ȤȤ���˴ؤ��� API.
33
34 m17n �饤�֥��ϥ������Ϣ����� #MLocale ���Υ��֥������Ȥ�ɽ�����롣 */
35
36 /*=*/
37
38 #if !defined (FOR_DOXYGEN) || defined (DOXYGEN_INTERNAL_MODULE)
39 /*** @addtogroup m17nInternal
40 @{ */
41
42 #define _GNU_SOURCE
43
44 #include <config.h>
45 #include <stdio.h>
46 #include <stdlib.h>
47 #include <string.h>
48 #include <locale.h>
49 #ifdef HAVE_LANGINFO_H
50 #include <langinfo.h>
51 #endif
52 #include <time.h>
53
54 #include "m17n.h"
55 #include "m17n-misc.h"
56 #include "internal.h"
57 #include "symbol.h"
58 #include "coding.h"
59 #include "textprop.h"
60 #include "mlocale.h"
61
62 static MSymbol M_locale;
63 static MSymbol M_xfrm;
64
65
66 /** Structure of locales. */
67
68 struct MLocale
69 {
70 M17NObject control;
71 MSymbol name;
72 MSymbol language;
73 MSymbol territory;
74 MSymbol modifier;
75 MSymbol codeset;
76 MSymbol coding;
77 };
78
79
80 /** The current locales of each category. */
81 MLocale *mlocale__collate, *mlocale__ctype;
82 MLocale *mlocale__messages, *mlocale__time;
83
84 /* These are currently not used. */
85 #if 0
86 MLocale *mlocale_monetary, *mlocale_numeric, ;
87 #endif
88
89 /** Parse locale name NAME and return a newly created MLocale object. */
90
91 static MLocale *
make_locale(const char * name)92 make_locale (const char *name)
93 {
94 char *str;
95 int len;
96 MLocale *locale;
97 char c;
98
99 M17N_OBJECT (locale, NULL, MERROR_LOCALE);
100 locale->name = msymbol (name);
101 msymbol_put (locale->name, M_locale, (void *) locale);
102 M17N_OBJECT_UNREF (locale);
103
104 len = strlen (name) + 1;
105 str = alloca (len);
106 memcpy (str, name, len);
107
108 c = '\0';
109 while (1)
110 {
111 char c1;
112 int i;
113
114 for (i = 0; str[i]; i++)
115 if (str[i] == '_' || str[i] == '.' || str[i] == '@')
116 break;
117 c1 = str[i];
118 str[i] = '\0';
119 if (c == '\0')
120 /* The first field is for language. */
121 locale->language = msymbol (str);
122 else if (c == '_')
123 /* The field following '_' is for territory. */
124 locale->territory = msymbol (str);
125 else if (c == '.')
126 /* The field following '.' is for codeset. */
127 locale->codeset = msymbol (str);
128 else
129 /* The other field is for modifier. */
130 locale->modifier = msymbol (str);
131 if (! c1)
132 break;
133 c = c1;
134 str += i + 1;
135 }
136
137 #ifdef HAVE_NL_LANGINFO
138 #ifdef CODESET
139 /* If we can use nl_langinfo () to retrieve a codeset name, respect
140 it over the codeset name extracted from the locale name. */
141 locale->codeset = msymbol (nl_langinfo (CODESET));
142 #endif
143 #endif
144
145 /* If the locale name specifies a codeset, get the corresponding
146 coding system. */
147 if (locale->codeset != Mnil)
148 {
149 locale->coding = mconv_resolve_coding (locale->codeset);
150 if (locale->coding == Mnil)
151 locale->coding = Mcoding_us_ascii;
152 }
153 else
154 locale->coding = Mcoding_us_ascii;
155 return locale;
156 }
157
158
159 /** Decode the byte sequence at BUF of length SIZE bytes by the coding
160 system associated with LOCALE, and return a generated M-text. */
161
162 static MText *
decode_locale(unsigned char * buf,int size,MLocale * locale)163 decode_locale (unsigned char *buf, int size, MLocale *locale)
164 {
165 return mconv_decode_buffer (locale->coding, buf, size);
166 }
167
168
169 /** Encode the M-text MT by the coding system associated with LOCALE,
170 and store the resulting bytes in the memory area at BUF of *SIZE
171 bytes. If the area is too short, allocate a new and wider area.
172 Store the length of the generated bytes in the place pointed by
173 SIZE, and return the address of those bytes. */
174
175 static unsigned char *
encode_locale(MText * mt,unsigned char * buf,int * size,MLocale * locale)176 encode_locale (MText *mt, unsigned char *buf, int *size, MLocale *locale)
177 {
178 int nbytes = mconv_encode_buffer (locale->coding, mt, buf, *size - 1);
179
180 if (nbytes < 0)
181 {
182 buf = NULL;
183 *size *= 2;
184 do {
185 MTABLE_REALLOC (buf, *size, MERROR_LOCALE);
186 nbytes = mconv_encode_buffer (mlocale__ctype->coding, mt, buf,
187 *size - 1);
188 } while (nbytes < 0);
189 }
190 buf[nbytes] = '\0';
191 *size = nbytes;
192 return buf;
193 }
194
195
196 /** Structure of transformed strings. The function mtext_coll ()
197 caches this object in an M-text as a text property. */
198
199 typedef struct {
200 /* Common header for a managed object. */
201 M17NObject control;
202
203 /* Locale corresponding to <str>. */
204 MLocale *locale;
205
206 /** Result of strxfrm. */
207 char *str;
208 } MXfrm;
209
210
211 static void
free_xfrm(void * object)212 free_xfrm (void *object)
213 {
214 MXfrm *xfrm = (MXfrm *) object;
215
216 M17N_OBJECT_UNREF (xfrm->locale);
217 free (xfrm->str);
218 }
219
220 static char *
get_xfrm(MText * mt)221 get_xfrm (MText *mt)
222 {
223 MTextProperty *prop = mtext_get_property (mt, 0, M_xfrm);
224 MXfrm *xfrm;
225 int size;
226 unsigned char *buf, *newbuf;
227 int request;
228
229 if (prop)
230 {
231 if (prop->end == mt->nchars)
232 {
233 xfrm = (MXfrm *) prop->val;
234 if (xfrm->locale == mlocale__ctype)
235 return xfrm->str;
236 }
237 mtext_detach_property (prop);
238 }
239
240 size = mt->nbytes;
241 buf = alloca (size);
242 newbuf = encode_locale (mt, buf, &size, mlocale__ctype);
243 M17N_OBJECT (xfrm, free_xfrm, MERROR_MTEXT);
244 xfrm->str = malloc (size);
245 request = strxfrm (xfrm->str, (char *) newbuf, size);
246 if (request >= size)
247 {
248 xfrm->str = realloc (xfrm->str, request);
249 strxfrm (xfrm->str, (char *) newbuf, size);
250 }
251 if (buf != newbuf)
252 free (newbuf);
253 prop = mtext_property (M_xfrm, xfrm, MTEXTPROP_VOLATILE_WEAK);
254 mtext_attach_property (mt, 0, mt->nchars, prop);
255 M17N_OBJECT_UNREF (prop);
256 return xfrm->str;
257 }
258
259
260 /* Internal API */
261
262 int
mlocale__init()263 mlocale__init ()
264 {
265 M_locale = msymbol_as_managing_key (" locale");
266
267 Mterritory = msymbol ("territory");
268 Mcodeset = msymbol ("codeset");
269
270 mlocale__collate = mlocale_set (LC_COLLATE, NULL);
271 M17N_OBJECT_REF (mlocale__collate);
272 mlocale__ctype = mlocale_set (LC_CTYPE, NULL);
273 M17N_OBJECT_REF (mlocale__ctype);
274 mlocale__messages = mlocale_set (LC_MESSAGES, NULL);
275 M17N_OBJECT_REF (mlocale__messages);
276 mlocale__time = mlocale_set (LC_TIME, NULL);
277 M17N_OBJECT_REF (mlocale__time);
278
279 M_xfrm = msymbol_as_managing_key (" xfrm");
280 return 0;
281 }
282
283 void
mlocale__fini()284 mlocale__fini ()
285 {
286 M17N_OBJECT_UNREF (mlocale__collate);
287 M17N_OBJECT_UNREF (mlocale__ctype);
288 M17N_OBJECT_UNREF (mlocale__messages);
289 M17N_OBJECT_UNREF (mlocale__time);
290 }
291
292 /*** @} */
293 #endif /* !FOR_DOXYGEN || DOXYGEN_INTERNAL_MODULE */
294
295
296 /* External API */
297 /*** @addtogroup m17nLocale */
298 /*** @{ */
299
300 /*=*/
301 /***en The symbol whose name is "territory". */
302 /***ja "territory" �Ȥ���̾������ĥ���ܥ�. */
303 MSymbol Mterritory;
304
305 /*=*/
306 /***en The symbol whose name is "modifier". */
307 /***ja "modifier" �Ȥ���̾������ĥ���ܥ�. */
308 MSymbol Mmodifier;
309
310 /*=*/
311 /***en The symbol whose name is "codeset". */
312 /***ja "codeset" �Ȥ���̾������ĥ���ܥ�. */
313 MSymbol Mcodeset;
314
315 /*=*/
316
317 /***en
318 @brief Set the current locale.
319
320 The mlocale_set () function sets or query a part of the current
321 locale. The part is specified by $CATEGORY which must be a valid
322 first argument to <tt>setlocale ()</tt>.
323
324 If $LOCALE is not NULL, the locale of the specified part is set to
325 $LOCALE. If $LOCALE is not supported by the system, the current
326 locale is not changed.
327
328 If $LOCALE is NULL, the current locale of the specified part is
329 queried.
330
331 @return
332 If the call is successful, mlocale_set () returns an opaque locale
333 object that corresponds to the locale. The name of the locale can
334 be acquired by the function mlocale_get_prop ().
335 Otherwise, it returns NULL. */
336
337 /***ja
338 @brief ���ߤΥ���������ꤹ��.
339
340 �ؿ� mlocale_set () �ϸ��ߤΥ�����ΰ��������ꤷ�����䤤��碌���ꤹ�롣�����ǰ����Ȥ�
341 $CATEGORY �ǻ��ꤵ�졢<tt>setlocale ()</tt> ��ͭ�����������Ȥʤ��ΤǤʤ��ƤϤʤ�ʤ���
342
343 $LOCALE �� NULL �Ǥʤ���С����ꤷ����ʬ�Υ����뤬$LOCALE �����ꤵ��롣
344 $LOCALE �������ƥ�˥��ݡ��Ȥ���Ƥ��ʤ���С�����ϹԤ�줺�����ߤΥ�������Ѥ��ʤ���
345
346 $LOCALE �� NULL �ʤ�С����ߤΥ�����λ��ꤷ����ʬ���䤤��碌�롣
347
348 @return
349
350 �ƤӽФ�����������С�mlocale_set () �ϥ�������б����� opaque
351 �����륪�֥������Ȥ��֤����������̾���ϴؿ�
352 mlocale_get_prop () �ˤ�ä����뤳�Ȥ��Ǥ��롣
353 �����Ǥʤ���� NULL ���֤���
354 */
355
356 /***
357 @errors
358 @c MERROR_LOCALE */
359
360 MLocale *
mlocale_set(int category,const char * name)361 mlocale_set (int category, const char *name)
362 {
363 char *new;
364 MLocale *locale;
365
366 new = setlocale (category, name);
367 if (! new)
368 return NULL;
369
370 locale = (MLocale *) msymbol_get (msymbol (new), M_locale);
371 if (! locale)
372 locale = make_locale (new);
373 if (! locale)
374 return NULL;
375 if (name && (category == LC_ALL || category == LC_COLLATE))
376 {
377 M17N_OBJECT_REF (locale);
378 M17N_OBJECT_UNREF (mlocale__collate);
379 mlocale__collate = locale;
380 }
381 else if (name && (category == LC_ALL || category == LC_CTYPE))
382 {
383 M17N_OBJECT_REF (locale);
384 M17N_OBJECT_UNREF (mlocale__ctype);
385 mlocale__ctype = locale;
386 }
387 if (name && (category == LC_ALL || category == LC_MESSAGES))
388 {
389 M17N_OBJECT_REF (locale);
390 M17N_OBJECT_UNREF (mlocale__messages);
391 mlocale__messages = locale;
392 }
393 if (name && (category == LC_ALL || category == LC_TIME))
394 {
395 M17N_OBJECT_REF (locale);
396 M17N_OBJECT_UNREF (mlocale__time);
397 mlocale__time = locale;
398 }
399 return locale;
400 }
401
402 /*=*/
403
404 /***en
405 @brief Get the value of a locale property.
406
407 The mlocale_get_prop () function returns the value of a property
408 $KEY of local $LOCALE. $KEY must be #Mname, #Mlanguage,
409 #Mterritory, #Mcodeset, #Mmodifier, or #Mcoding. */
410
411 /***ja
412 @brief ������ץ�ѥƥ����ͤ�����.
413
414 �ؿ� mlocale_get_prop () �ϡ������� $LOCALE �� $KEY �ץ�ѥƥ����ͤ��֤���
415 $KEY �� #Mname, #Mlanguage, #Mterritory, #Mcodeset, #Mmodifier,
416 #Mcoding �Τ����줫�Ǥ��롣 */
417
418 MSymbol
mlocale_get_prop(MLocale * locale,MSymbol key)419 mlocale_get_prop (MLocale *locale, MSymbol key)
420 {
421 if (key == Mcoding)
422 return locale->coding;
423 if (key == Mname)
424 return locale->name;
425 if (key == Mlanguage)
426 return locale->language;
427 if (key == Mterritory)
428 return locale->territory;
429 if (key == Mcodeset)
430 return locale->codeset;
431 if (key == Mmodifier)
432 return locale->modifier;
433 return Mnil;
434 }
435
436 /*=*/
437 /***en
438 @brief Format date and time.
439
440 The mtext_ftime () function formats the broken-down time $TM
441 according to the format specification $FORMAT and append the
442 result to the M-text $MT. The formating is done according to the
443 locale $LOCALE (if not NULL) or the current locale (LC_TIME).
444
445 The meaning of the arguments $TM and $FORMAT are the same as those
446 of strftime ().
447
448 @seealso
449 strftime ().
450 */
451 /***ja
452 @brief ���դȻ��֤�ե����ޥåȤ���.
453
454 �ؿ� mtext_ftime () �ϻ���ǡ��� (broken-down time) $TM ��$FORMAT
455 �ǻ��ꤵ�줿������������̤�M-text $MT ���ղä��롣�ե����ޥåȤ�
456 NULL �Ǥʤ���� ������ $LOCALE �ˡ��ޤ��ϸ��ߤΥ�����(LC_TIME) �˽�����
457
458 ���� $TM �� $FORMAT �ΰ�̣�� strftime () �ξ���Ʊ����
459
460 @seealso
461 strftime ().
462 */
463
464 int
mtext_ftime(MText * mt,const char * format,const struct tm * tm,MLocale * locale)465 mtext_ftime (MText *mt, const char *format, const struct tm *tm,
466 MLocale *locale)
467 {
468 int bufsize;
469 unsigned char *buf;
470 size_t nbytes, nchars;
471 char *current_locale = NULL;
472
473 if (locale)
474 {
475 char *str = setlocale (LC_TIME, NULL);
476 int len = strlen (str) + 1;
477
478 current_locale = alloca (len);
479 memcpy (current_locale, str, len);
480 mlocale_set (LC_TIME, msymbol_name (locale->name));
481 }
482
483 bufsize = 1024;
484 while (1)
485 {
486 MTABLE_ALLOCA (buf, bufsize, MERROR_MTEXT);
487 buf[0] = 1;
488 nbytes = strftime ((char *) buf, bufsize, format, tm);
489 if (nbytes > 0
490 || ! buf[0])
491 break;
492 bufsize *= 2;
493 }
494
495 if (nbytes > 0)
496 {
497 MText *work = decode_locale (buf, nbytes, mlocale__time);
498
499 if (work)
500 {
501 nchars = work->nchars;
502 mtext_cat (mt, work);
503 M17N_OBJECT_UNREF (work);
504 }
505 else
506 nchars = 0;
507 }
508 else
509 nchars = 0;
510
511 if (current_locale)
512 mlocale_set (LC_TIME, current_locale);
513
514 return nchars;
515 }
516
517 /*=*/
518
519 /***en
520 @brief Get an environment variable.
521
522 The mtext_getenv () function searches the environment variable
523 list for a string that matches the string pointed to by $NAME.
524
525 If there is a match, the function decodes the value according to
526 the current locale (LC_CTYPE) into an M-text, and return that
527 M-text.
528
529 If there is no match, the function returns NULL. */
530 /***ja
531 @brief �Ķ��ѿ�������.
532
533 �ؿ� mtext_getenv () �� $NAME
534 �ǻؤ����ʸ����ȹ��פ���ʸ�����Ķ��ѿ��Υꥹ���椫��õ����
535
536 ���Ĥ��ä����ˤϡ������ͤߤΥ�����(LC_CTYPE) �˽��ä�
537 M-text �˥ǥ����ɤ�������M-text ���֤���
538
539 ���Ĥ���ʤ���С�NULL ���֤��� */
540
541 MText *
mtext_getenv(const char * name)542 mtext_getenv (const char *name)
543 {
544 char *p = getenv (name);
545
546 if (!p)
547 return NULL;
548 return decode_locale ((unsigned char *) p, strlen (p), mlocale__ctype);
549 }
550
551 /*=*/
552
553 /***en
554 @brief Change or add an environment variable.
555
556 The mtext_putenv () function changes or adds the value of
557 environment variables according to M-text $MT. It calls the
558 function <tt>putenv</tt> with an argument generated by encoding
559 $MT according to the current locale (LC_CTYPE).
560
561 @return
562 This function returns zero on success, or -1 if an error
563 occurs. */
564 /***ja
565 @brief �Ķ��ѿ����ѹ����ɲä���.
566
567 �ؿ� mtext_putenv () �� M-text $MT
568 �˽��äơ��Ķ��ѿ����ͤ��ѹ��������ɲä����ꤹ�롣���δؿ��ϡ����ߤΥ�����
569 (LC_CTYPE) �˽��ä�$MT ���ɤ������������Ȥ��ƴؿ� <tt>putenv</tt> ��Ƥ֡�
570
571 @return
572 ���δؿ��ϡ������������ˤ� 0 ���顼��������� -1 ���֤���
573 */
574
575
576 int
mtext_putenv(MText * mt)577 mtext_putenv (MText *mt)
578 {
579 unsigned char buf[1024];
580 int size = 1024;
581 unsigned char *newbuf;
582 int result;
583
584 newbuf = encode_locale (mt, buf, &size, mlocale__ctype);
585 result = putenv ((char *) newbuf);
586 if (buf != newbuf)
587 free (newbuf);
588 return result;
589 }
590
591 /*=*/
592
593 /***en
594 @brief Compare two M-texts using the current locale.
595
596 The mtext_coll () function compares the two M-texts $MT1 and $MT2.
597 It returns an integer less than, equal to, or greater than zero if
598 $MT1 is found, respectively, to be less than, to match, or to be
599 greater than $MT2. The comparison is based on texts as
600 appropriate for the current locale (LC_COLLATE).
601
602 This function makes use of information that is automatically
603 cached in the M-texts as a text property. So, the second call of
604 this function with $MT1 or $MT2 finishes faster than the first
605 call. */
606 /***ja
607 @brief ���ߤΥ�������Ѥ��ƣ��Ĥ� M-text ����Ӥ���.
608
609 �ؿ� mtext_coll () �ϣ��Ĥ� M-text $MT1 �� $MT2
610 ����Ӥ��롣����ͤ����������, 0, ���������ͤΤ����줫�Ǥ��ꡢ���줾��
611 $MT1 �� $MT2 ��꾮������Ʊ�����礭�������������롣��Ӥϸ��ߤΥ�����
612 (LC_COLLATE) �˴�Ť��ƹԤ��롣
613
614 ���δؿ��� M-text
615 �Υƥ����ȥץ�ѥƥ��Ȥ��Ƽ�ưŪ�˥���å��夵����������Ѥ���Τǡ������ܰʹߤ�Ʊ����Ӥϣ����ܤ��®���¹Ԥ���롣 */
616
617 int
mtext_coll(MText * mt1,MText * mt2)618 mtext_coll (MText *mt1, MText *mt2)
619 {
620 char *str1, *str2;
621
622 if (mt1->nchars == 0)
623 return (mt2->nchars == 0 ? 0 : -1);
624 else if (mt2->nchars == 0)
625 return 1;
626
627 str1 = get_xfrm (mt1);
628 str2 = get_xfrm (mt2);
629 return strcoll (str1, str2);
630 }
631
632 /*** @} */
633
634 /*
635 Local Variables:
636 coding: euc-japan
637 End:
638 */
639