1 /* locale.c -- locale module.
2    Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
3      National Institute of Advanced Industrial Science and Technology (AIST)
4      Registration Number H15PRO112
5 
6    This file is part of the m17n library.
7 
8    The m17n library is free software; you can redistribute it and/or
9    modify it under the terms of the GNU Lesser General Public License
10    as published by the Free Software Foundation; either version 2.1 of
11    the License, or (at your option) any later version.
12 
13    The m17n library is distributed in the hope that it will be useful,
14    but WITHOUT ANY WARRANTY; without even the implied warranty of
15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16    Lesser General Public License for more details.
17 
18    You should have received a copy of the GNU Lesser General Public
19    License along with the m17n library; if not, write to the Free
20    Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
21    Boston, MA 02110-1301 USA.  */
22 
23 /***en
24     @addtogroup m17nLocale
25     @brief Locale objects and API for them.
26 
27     The m17n library represents locale related information as objects
28     of type #MLocale.  */
29 
30 /***ja
31     @addtogroup m17nLocale
32     @brief �����륪�֥������ȤȤ���˴ؤ��� API.
33 
34     m17n �饤�֥��ϥ������Ϣ����� #MLocale ���Υ��֥������Ȥ�ɽ�����롣  */
35 
36 /*=*/
37 
38 #if !defined (FOR_DOXYGEN) || defined (DOXYGEN_INTERNAL_MODULE)
39 /*** @addtogroup m17nInternal
40      @{ */
41 
42 #define _GNU_SOURCE
43 
44 #include <config.h>
45 #include <stdio.h>
46 #include <stdlib.h>
47 #include <string.h>
48 #include <locale.h>
49 #ifdef HAVE_LANGINFO_H
50 #include <langinfo.h>
51 #endif
52 #include <time.h>
53 
54 #include "m17n.h"
55 #include "m17n-misc.h"
56 #include "internal.h"
57 #include "symbol.h"
58 #include "coding.h"
59 #include "textprop.h"
60 #include "mlocale.h"
61 
62 static MSymbol M_locale;
63 static MSymbol M_xfrm;
64 
65 
66 /** Structure of locales.  */
67 
68 struct MLocale
69 {
70   M17NObject control;
71   MSymbol name;
72   MSymbol language;
73   MSymbol territory;
74   MSymbol modifier;
75   MSymbol codeset;
76   MSymbol coding;
77 };
78 
79 
80 /** The current locales of each category.  */
81 MLocale *mlocale__collate, *mlocale__ctype;
82 MLocale *mlocale__messages, *mlocale__time;
83 
84 /* These are currently not used.  */
85 #if 0
86 MLocale *mlocale_monetary, *mlocale_numeric, ;
87 #endif
88 
89 /** Parse locale name NAME and return a newly created MLocale object.  */
90 
91 static MLocale *
make_locale(const char * name)92 make_locale (const char *name)
93 {
94   char *str;
95   int len;
96   MLocale *locale;
97   char c;
98 
99   M17N_OBJECT (locale, NULL, MERROR_LOCALE);
100   locale->name = msymbol (name);
101   msymbol_put (locale->name, M_locale, (void *) locale);
102   M17N_OBJECT_UNREF (locale);
103 
104   len = strlen (name) + 1;
105   str = alloca (len);
106   memcpy (str, name, len);
107 
108   c = '\0';
109   while (1)
110     {
111       char c1;
112       int i;
113 
114       for (i = 0; str[i]; i++)
115 	if (str[i] == '_' || str[i] == '.' || str[i] == '@')
116 	  break;
117       c1 = str[i];
118       str[i] = '\0';
119       if (c == '\0')
120 	/* The first field is for language.  */
121 	locale->language = msymbol (str);
122       else if (c == '_')
123 	/* The field following '_' is for territory.  */
124 	locale->territory = msymbol (str);
125       else if (c == '.')
126 	/* The field following '.' is for codeset.  */
127 	locale->codeset = msymbol (str);
128       else
129 	/* The other field is for modifier.  */
130 	locale->modifier = msymbol (str);
131       if (! c1)
132 	break;
133       c = c1;
134       str += i + 1;
135     }
136 
137 #ifdef HAVE_NL_LANGINFO
138 #ifdef CODESET
139   /* If we can use nl_langinfo () to retrieve a codeset name, respect
140      it over the codeset name extracted from the locale name.  */
141   locale->codeset = msymbol (nl_langinfo (CODESET));
142 #endif
143 #endif
144 
145   /* If the locale name specifies a codeset, get the corresponding
146      coding system.  */
147   if (locale->codeset != Mnil)
148     {
149       locale->coding = mconv_resolve_coding (locale->codeset);
150       if (locale->coding == Mnil)
151 	locale->coding = Mcoding_us_ascii;
152     }
153   else
154     locale->coding = Mcoding_us_ascii;
155   return locale;
156 }
157 
158 
159 /** Decode the byte sequence at BUF of length SIZE bytes by the coding
160     system associated with LOCALE, and return a generated M-text.  */
161 
162 static MText *
decode_locale(unsigned char * buf,int size,MLocale * locale)163 decode_locale (unsigned char *buf, int size, MLocale *locale)
164 {
165   return mconv_decode_buffer (locale->coding, buf, size);
166 }
167 
168 
169 /** Encode the M-text MT by the coding system associated with LOCALE,
170     and store the resulting bytes in the memory area at BUF of *SIZE
171     bytes.  If the area is too short, allocate a new and wider area.
172     Store the length of the generated bytes in the place pointed by
173     SIZE, and return the address of those bytes.  */
174 
175 static unsigned char *
encode_locale(MText * mt,unsigned char * buf,int * size,MLocale * locale)176 encode_locale (MText *mt, unsigned char *buf, int *size, MLocale *locale)
177 {
178   int nbytes = mconv_encode_buffer (locale->coding, mt, buf, *size - 1);
179 
180   if (nbytes < 0)
181     {
182       buf = NULL;
183       *size *= 2;
184       do {
185 	MTABLE_REALLOC (buf, *size, MERROR_LOCALE);
186 	nbytes = mconv_encode_buffer (mlocale__ctype->coding, mt, buf,
187 				      *size - 1);
188       } while (nbytes < 0);
189     }
190   buf[nbytes] = '\0';
191   *size = nbytes;
192   return buf;
193 }
194 
195 
196 /** Structure of transformed strings.  The function mtext_coll ()
197     caches this object in an M-text as a text property.  */
198 
199 typedef struct {
200   /* Common header for a managed object.  */
201   M17NObject control;
202 
203   /* Locale corresponding to <str>.  */
204   MLocale *locale;
205 
206   /** Result of strxfrm.  */
207   char *str;
208 } MXfrm;
209 
210 
211 static void
free_xfrm(void * object)212 free_xfrm (void *object)
213 {
214   MXfrm *xfrm = (MXfrm *) object;
215 
216   M17N_OBJECT_UNREF (xfrm->locale);
217   free (xfrm->str);
218 }
219 
220 static char *
get_xfrm(MText * mt)221 get_xfrm (MText *mt)
222 {
223   MTextProperty *prop = mtext_get_property (mt, 0, M_xfrm);
224   MXfrm *xfrm;
225   int size;
226   unsigned char *buf, *newbuf;
227   int request;
228 
229   if (prop)
230     {
231       if (prop->end == mt->nchars)
232 	{
233 	  xfrm = (MXfrm *) prop->val;
234 	  if (xfrm->locale == mlocale__ctype)
235 	    return xfrm->str;
236 	}
237       mtext_detach_property (prop);
238     }
239 
240   size = mt->nbytes;
241   buf = alloca (size);
242   newbuf = encode_locale (mt, buf, &size, mlocale__ctype);
243   M17N_OBJECT (xfrm, free_xfrm, MERROR_MTEXT);
244   xfrm->str = malloc (size);
245   request = strxfrm (xfrm->str, (char *) newbuf, size);
246   if (request >= size)
247     {
248       xfrm->str = realloc (xfrm->str, request);
249       strxfrm (xfrm->str, (char *) newbuf, size);
250     }
251   if (buf != newbuf)
252     free (newbuf);
253   prop = mtext_property (M_xfrm, xfrm, MTEXTPROP_VOLATILE_WEAK);
254   mtext_attach_property (mt, 0, mt->nchars, prop);
255   M17N_OBJECT_UNREF (prop);
256   return xfrm->str;
257 }
258 
259 
260 /* Internal API */
261 
262 int
mlocale__init()263 mlocale__init ()
264 {
265   M_locale = msymbol_as_managing_key ("  locale");
266 
267   Mterritory = msymbol ("territory");
268   Mcodeset = msymbol ("codeset");
269 
270   mlocale__collate = mlocale_set (LC_COLLATE, NULL);
271   M17N_OBJECT_REF (mlocale__collate);
272   mlocale__ctype = mlocale_set (LC_CTYPE, NULL);
273   M17N_OBJECT_REF (mlocale__ctype);
274   mlocale__messages = mlocale_set (LC_MESSAGES, NULL);
275   M17N_OBJECT_REF (mlocale__messages);
276   mlocale__time = mlocale_set (LC_TIME, NULL);
277   M17N_OBJECT_REF (mlocale__time);
278 
279   M_xfrm = msymbol_as_managing_key ("  xfrm");
280   return 0;
281 }
282 
283 void
mlocale__fini()284 mlocale__fini ()
285 {
286   M17N_OBJECT_UNREF (mlocale__collate);
287   M17N_OBJECT_UNREF (mlocale__ctype);
288   M17N_OBJECT_UNREF (mlocale__messages);
289   M17N_OBJECT_UNREF (mlocale__time);
290 }
291 
292 /*** @} */
293 #endif /* !FOR_DOXYGEN || DOXYGEN_INTERNAL_MODULE */
294 
295 
296 /* External API */
297 /*** @addtogroup m17nLocale */
298 /*** @{ */
299 
300 /*=*/
301 /***en The symbol whose name is "territory".  */
302 /***ja  "territory" �Ȥ���̾������ĥ���ܥ�. */
303 MSymbol Mterritory;
304 
305 /*=*/
306 /***en The symbol whose name is "modifier".  */
307 /***ja  "modifier" �Ȥ���̾������ĥ���ܥ�. */
308 MSymbol Mmodifier;
309 
310 /*=*/
311 /***en The symbol whose name is "codeset".  */
312 /***ja  "codeset" �Ȥ���̾������ĥ���ܥ�. */
313 MSymbol Mcodeset;
314 
315 /*=*/
316 
317 /***en
318     @brief Set the current locale.
319 
320     The mlocale_set () function sets or query a part of the current
321     locale.  The part is specified by $CATEGORY which must be a valid
322     first argument to <tt>setlocale ()</tt>.
323 
324     If $LOCALE is not NULL, the locale of the specified part is set to
325     $LOCALE.  If $LOCALE is not supported by the system, the current
326     locale is not changed.
327 
328     If $LOCALE is NULL, the current locale of the specified part is
329     queried.
330 
331     @return
332     If the call is successful, mlocale_set () returns an opaque locale
333     object that corresponds to the locale.  The name of the locale can
334     be acquired by the function mlocale_get_prop ().
335     Otherwise, it returns NULL.  */
336 
337 /***ja
338     @brief ���ߤΥ���������ꤹ��.
339 
340     �ؿ� mlocale_set () �ϸ��ߤΥ�����ΰ��������ꤷ�����䤤��碌���ꤹ�롣�����ǰ����Ȥ�
341     $CATEGORY �ǻ��ꤵ�졢<tt>setlocale ()</tt> ��ͭ�����������Ȥʤ��ΤǤʤ��ƤϤʤ�ʤ���
342 
343     $LOCALE �� NULL �Ǥʤ���С����ꤷ����ʬ�Υ����뤬$LOCALE �����ꤵ��롣
344     $LOCALE �������ƥ�˥��ݡ��Ȥ���Ƥ��ʤ���С�����ϹԤ�줺�����ߤΥ�������Ѥ��ʤ���
345 
346     $LOCALE �� NULL �ʤ�С����ߤΥ�����λ��ꤷ����ʬ���䤤��碌�롣
347 
348     @return
349 
350     �ƤӽФ�����������С�mlocale_set () �ϥ�������б����� opaque
351     �����륪�֥������Ȥ��֤����������̾���ϴؿ�
352     mlocale_get_prop () �ˤ�ä����뤳�Ȥ��Ǥ��롣
353     �����Ǥʤ���� NULL ���֤���
354      */
355 
356 /***
357     @errors
358     @c MERROR_LOCALE  */
359 
360 MLocale *
mlocale_set(int category,const char * name)361 mlocale_set (int category, const char *name)
362 {
363   char *new;
364   MLocale *locale;
365 
366   new = setlocale (category, name);
367   if (! new)
368     return NULL;
369 
370   locale = (MLocale *) msymbol_get (msymbol (new), M_locale);
371   if (! locale)
372     locale = make_locale (new);
373   if (! locale)
374     return NULL;
375   if (name && (category == LC_ALL || category == LC_COLLATE))
376     {
377       M17N_OBJECT_REF (locale);
378       M17N_OBJECT_UNREF (mlocale__collate);
379       mlocale__collate = locale;
380     }
381   else if (name && (category == LC_ALL || category == LC_CTYPE))
382     {
383       M17N_OBJECT_REF (locale);
384       M17N_OBJECT_UNREF (mlocale__ctype);
385       mlocale__ctype = locale;
386     }
387   if (name && (category == LC_ALL || category == LC_MESSAGES))
388     {
389       M17N_OBJECT_REF (locale);
390       M17N_OBJECT_UNREF (mlocale__messages);
391       mlocale__messages = locale;
392     }
393   if (name && (category == LC_ALL || category == LC_TIME))
394     {
395       M17N_OBJECT_REF (locale);
396       M17N_OBJECT_UNREF (mlocale__time);
397       mlocale__time = locale;
398     }
399   return locale;
400 }
401 
402 /*=*/
403 
404 /***en
405     @brief Get the value of a locale property.
406 
407     The mlocale_get_prop () function returns the value of a property
408     $KEY of local $LOCALE.  $KEY must be #Mname, #Mlanguage,
409     #Mterritory, #Mcodeset, #Mmodifier, or #Mcoding.  */
410 
411 /***ja
412     @brief ������ץ�ѥƥ����ͤ�����.
413 
414     �ؿ� mlocale_get_prop () �ϡ������� $LOCALE �� $KEY �ץ�ѥƥ����ͤ��֤���
415     $KEY �� #Mname, #Mlanguage, #Mterritory, #Mcodeset, #Mmodifier,
416     #Mcoding �Τ����줫�Ǥ��롣 */
417 
418 MSymbol
mlocale_get_prop(MLocale * locale,MSymbol key)419 mlocale_get_prop (MLocale *locale, MSymbol key)
420 {
421   if (key == Mcoding)
422     return locale->coding;
423   if (key == Mname)
424     return locale->name;
425   if (key == Mlanguage)
426     return locale->language;
427   if (key == Mterritory)
428     return locale->territory;
429   if (key == Mcodeset)
430     return locale->codeset;
431   if (key == Mmodifier)
432     return locale->modifier;
433   return Mnil;
434 }
435 
436 /*=*/
437 /***en
438     @brief Format date and time.
439 
440     The mtext_ftime () function formats the broken-down time $TM
441     according to the format specification $FORMAT and append the
442     result to the M-text $MT.  The formating is done according to the
443     locale $LOCALE (if not NULL) or the current locale (LC_TIME).
444 
445     The meaning of the arguments $TM and $FORMAT are the same as those
446     of strftime ().
447 
448     @seealso
449     strftime ().
450 */
451 /***ja
452     @brief ���դȻ��֤�ե����ޥåȤ���.
453 
454     �ؿ� mtext_ftime () �ϻ���ǡ��� (broken-down time) $TM ��$FORMAT
455     �ǻ��ꤵ�줿��������������̤�M-text $MT ���ղä��롣�ե����ޥåȤ�
456     NULL �Ǥʤ���� ������ $LOCALE �ˡ��ޤ��ϸ��ߤΥ�����(LC_TIME) �˽�����
457 
458     ���� $TM �� $FORMAT �ΰ�̣�� strftime () �ξ���Ʊ����
459 
460     @seealso
461     strftime ().
462 */
463 
464 int
mtext_ftime(MText * mt,const char * format,const struct tm * tm,MLocale * locale)465 mtext_ftime (MText *mt, const char *format, const struct tm *tm,
466 	     MLocale *locale)
467 {
468   int bufsize;
469   unsigned char *buf;
470   size_t nbytes, nchars;
471   char *current_locale = NULL;
472 
473   if (locale)
474     {
475       char *str = setlocale (LC_TIME, NULL);
476       int len = strlen (str) + 1;
477 
478       current_locale = alloca (len);
479       memcpy (current_locale, str, len);
480       mlocale_set (LC_TIME, msymbol_name (locale->name));
481     }
482 
483   bufsize = 1024;
484   while (1)
485     {
486       MTABLE_ALLOCA (buf, bufsize, MERROR_MTEXT);
487       buf[0] = 1;
488       nbytes = strftime ((char *) buf, bufsize, format, tm);
489       if (nbytes > 0
490 	  || ! buf[0])
491 	break;
492       bufsize *= 2;
493     }
494 
495   if (nbytes > 0)
496     {
497       MText *work = decode_locale (buf, nbytes, mlocale__time);
498 
499       if (work)
500 	{
501 	  nchars = work->nchars;
502 	  mtext_cat (mt, work);
503 	  M17N_OBJECT_UNREF (work);
504 	}
505       else
506 	nchars = 0;
507     }
508   else
509     nchars = 0;
510 
511   if (current_locale)
512     mlocale_set (LC_TIME, current_locale);
513 
514   return nchars;
515 }
516 
517 /*=*/
518 
519 /***en
520     @brief Get an environment variable.
521 
522     The mtext_getenv () function searches the environment variable
523     list for a string that matches the string pointed to by $NAME.
524 
525     If there is a match, the function decodes the value according to
526     the current locale (LC_CTYPE) into an M-text, and return that
527     M-text.
528 
529     If there is no match, the function returns NULL.  */
530 /***ja
531     @brief �Ķ��ѿ�������.
532 
533     �ؿ� mtext_getenv () �� $NAME
534     �ǻؤ����ʸ����ȹ��פ���ʸ�����Ķ��ѿ��Υꥹ���椫��õ����
535 
536     ���Ĥ��ä����ˤϡ������ͤ��ߤΥ�����(LC_CTYPE) �˽��ä�
537     M-text �˥ǥ����ɤ�������M-text ���֤���
538 
539     ���Ĥ���ʤ���С�NULL ���֤���  */
540 
541 MText *
mtext_getenv(const char * name)542 mtext_getenv (const char *name)
543 {
544   char *p = getenv (name);
545 
546   if (!p)
547     return NULL;
548   return decode_locale ((unsigned char *) p, strlen (p), mlocale__ctype);
549 }
550 
551 /*=*/
552 
553 /***en
554     @brief Change or add an environment variable.
555 
556     The mtext_putenv () function changes or adds the value of
557     environment variables according to M-text $MT.  It calls the
558     function <tt>putenv</tt> with an argument generated by encoding
559     $MT according to the current locale (LC_CTYPE).
560 
561     @return
562     This function returns zero on success, or -1 if an error
563     occurs.  */
564 /***ja
565     @brief �Ķ��ѿ����ѹ����ɲä���.
566 
567     �ؿ� mtext_putenv () �� M-text $MT
568     �˽��äơ��Ķ��ѿ����ͤ��ѹ��������ɲä����ꤹ�롣���δؿ��ϡ����ߤΥ�����
569     (LC_CTYPE) �˽��ä�$MT �������ɤ������������Ȥ��ƴؿ� <tt>putenv</tt> ��Ƥ֡�
570 
571     @return
572     ���δؿ��ϡ������������ˤ� 0 �����顼��������� -1 ���֤���
573     */
574 
575 
576 int
mtext_putenv(MText * mt)577 mtext_putenv (MText *mt)
578 {
579   unsigned char buf[1024];
580   int size = 1024;
581   unsigned char *newbuf;
582   int result;
583 
584   newbuf = encode_locale (mt, buf, &size, mlocale__ctype);
585   result = putenv ((char *) newbuf);
586   if (buf != newbuf)
587     free (newbuf);
588   return result;
589 }
590 
591 /*=*/
592 
593 /***en
594     @brief Compare two M-texts using the current locale.
595 
596     The mtext_coll () function compares the two M-texts $MT1 and $MT2.
597     It returns an integer less than, equal to, or greater than zero if
598     $MT1 is found, respectively, to be less than, to match, or to be
599     greater than $MT2.  The comparison is based on texts as
600     appropriate for the current locale (LC_COLLATE).
601 
602     This function makes use of information that is automatically
603     cached in the M-texts as a text property.  So, the second call of
604     this function with $MT1 or $MT2 finishes faster than the first
605     call.  */
606 /***ja
607     @brief ���ߤΥ�������Ѥ��ƣ��Ĥ� M-text ����Ӥ���.
608 
609     �ؿ� mtext_coll () �ϣ��Ĥ� M-text $MT1 �� $MT2
610     ����Ӥ��롣����ͤ����������, 0, ���������ͤΤ����줫�Ǥ��ꡢ���줾��
611     $MT1 �� $MT2 ��꾮������Ʊ�����礭�������������롣��Ӥϸ��ߤΥ�����
612     (LC_COLLATE) �˴�Ť��ƹԤ��롣
613 
614     ���δؿ��� M-text
615     �Υƥ����ȥץ�ѥƥ��Ȥ��Ƽ�ưŪ�˥���å��夵����������Ѥ���Τǡ������ܰʹߤ�Ʊ����Ӥϣ����ܤ��®���¹Ԥ���롣  */
616 
617 int
mtext_coll(MText * mt1,MText * mt2)618 mtext_coll (MText *mt1, MText *mt2)
619 {
620   char *str1, *str2;
621 
622   if (mt1->nchars == 0)
623     return (mt2->nchars == 0 ? 0 : -1);
624   else if (mt2->nchars == 0)
625     return 1;
626 
627   str1 = get_xfrm (mt1);
628   str2 = get_xfrm (mt2);
629   return strcoll (str1, str2);
630 }
631 
632 /*** @} */
633 
634 /*
635   Local Variables:
636   coding: euc-japan
637   End:
638 */
639