1 /* Load needed message catalogs.
2    Copyright (C) 1995-1999, 2000, 2001 Free Software Foundation, Inc.
3 
4    This program is free software; you can redistribute it and/or modify it
5    under the terms of the GNU Library General Public License as published
6    by the Free Software Foundation; either version 2, or (at your option)
7    any later version.
8 
9    This program is distributed in the hope that it will be useful,
10    but WITHOUT ANY WARRANTY; without even the implied warranty of
11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12    Library General Public License for more details.
13 
14    You should have received a copy of the GNU Library General Public
15    License along with this program; if not, write to the Free Software
16    Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
17    USA.  */
18 
19 /* Tell glibc's <string.h> to provide a prototype for mempcpy().
20    This must come before <config.h> because <config.h> may include
21    <features.h>, and once <features.h> has been included, it's too late.  */
22 #ifndef _GNU_SOURCE
23 # define _GNU_SOURCE    1
24 #endif
25 
26 #ifdef HAVE_CONFIG_H
27 # include <config.h>
28 #endif
29 
30 #include <ctype.h>
31 #include <errno.h>
32 #include <fcntl.h>
33 #include <sys/types.h>
34 #include <sys/stat.h>
35 
36 #ifdef __GNUC__
37 # define alloca __builtin_alloca
38 # define HAVE_ALLOCA 1
39 #else
40 # if defined HAVE_ALLOCA_H || defined _LIBC
41 #  include <alloca.h>
42 # else
43 #  ifdef _AIX
44  #pragma alloca
45 #  else
46 #   ifndef alloca
47 char *alloca ();
48 #   endif
49 #  endif
50 # endif
51 #endif
52 
53 #include <stdlib.h>
54 #include <string.h>
55 
56 #if defined HAVE_UNISTD_H || defined _LIBC
57 # include <unistd.h>
58 #endif
59 
60 #ifdef _LIBC
61 # include <langinfo.h>
62 # include <locale.h>
63 #endif
64 
65 #if (defined HAVE_MMAP && defined HAVE_MUNMAP && !defined DISALLOW_MMAP) \
66     || (defined _LIBC && defined _POSIX_MAPPED_FILES)
67 # include <sys/mman.h>
68 # undef HAVE_MMAP
69 # define HAVE_MMAP	1
70 #else
71 # undef HAVE_MMAP
72 #endif
73 
74 #include "gettext.h"
75 #include "gettextP.h"
76 
77 #ifdef _LIBC
78 # include "../locale/localeinfo.h"
79 #endif
80 
81 /* @@ end of prolog @@ */
82 
83 #ifdef _LIBC
84 /* Rename the non ISO C functions.  This is required by the standard
85    because some ISO C functions will require linking with this object
86    file and the name space must not be polluted.  */
87 # define open   __open
88 # define close  __close
89 # define read   __read
90 # define mmap   __mmap
91 # define munmap __munmap
92 #endif
93 
94 /* Names for the libintl functions are a problem.  They must not clash
95    with existing names and they should follow ANSI C.  But this source
96    code is also used in GNU C Library where the names have a __
97    prefix.  So we have to make a difference here.  */
98 #ifdef _LIBC
99 # define PLURAL_PARSE __gettextparse
100 #else
101 # define PLURAL_PARSE gettextparse__
102 #endif
103 
104 /* For those losing systems which don't have `alloca' we have to add
105    some additional code emulating it.  */
106 #ifdef HAVE_ALLOCA
107 # define freea(p) /* nothing */
108 #else
109 # define alloca(n) malloc (n)
110 # define freea(p) free (p)
111 #endif
112 
113 /* For systems that distinguish between text and binary I/O.
114    O_BINARY is usually declared in <fcntl.h>. */
115 #if !defined O_BINARY && defined _O_BINARY
116   /* For MSC-compatible compilers.  */
117 # define O_BINARY _O_BINARY
118 # define O_TEXT _O_TEXT
119 #endif
120 #ifdef __BEOS__
121   /* BeOS 5 has O_BINARY and O_TEXT, but they have no effect.  */
122 # undef O_BINARY
123 # undef O_TEXT
124 #endif
125 /* On reasonable systems, binary I/O is the default.  */
126 #ifndef O_BINARY
127 # define O_BINARY 0
128 #endif
129 
130 /* We need a sign, whether a new catalog was loaded, which can be associated
131    with all translations.  This is important if the translations are
132    cached by one of GCC's features.  */
133 int _nl_msg_cat_cntr;
134 
135 #if (defined __GNUC__ && !defined __APPLE_CC__) \
136     || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L)
137 
138 /* These structs are the constant expression for the germanic plural
139    form determination.  It represents the expression  "n != 1".  */
140 static const struct expression plvar =
141 {
142   .nargs = 0,
143   .operation = var,
144 };
145 static const struct expression plone =
146 {
147   .nargs = 0,
148   .operation = num,
149   .val =
150   {
151     .num = 1
152   }
153 };
154 static struct expression germanic_plural =
155 {
156   .nargs = 2,
157   .operation = not_equal,
158   .val =
159   {
160     .args =
161     {
162       [0] = (struct expression *) &plvar,
163       [1] = (struct expression *) &plone
164     }
165   }
166 };
167 
168 # define INIT_GERMANIC_PLURAL()
169 
170 #else
171 
172 /* For compilers without support for ISO C 99 struct/union initializers:
173    Initialization at run-time.  */
174 
175 static struct expression plvar;
176 static struct expression plone;
177 static struct expression germanic_plural;
178 
179 static void
init_germanic_plural()180 init_germanic_plural ()
181 {
182   if (plone.val.num == 0)
183     {
184       plvar.nargs = 0;
185       plvar.operation = var;
186 
187       plone.nargs = 0;
188       plone.operation = num;
189       plone.val.num = 1;
190 
191       germanic_plural.nargs = 2;
192       germanic_plural.operation = not_equal;
193       germanic_plural.val.args[0] = &plvar;
194       germanic_plural.val.args[1] = &plone;
195     }
196 }
197 
198 # define INIT_GERMANIC_PLURAL() init_germanic_plural ()
199 
200 #endif
201 
202 
203 /* Initialize the codeset dependent parts of an opened message catalog.
204    Return the header entry.  */
205 const char *
206 internal_function
_nl_init_domain_conv(domain_file,domain,domainbinding)207 _nl_init_domain_conv (domain_file, domain, domainbinding)
208      struct loaded_l10nfile *domain_file;
209      struct loaded_domain *domain;
210      struct binding *domainbinding;
211 {
212   /* Find out about the character set the file is encoded with.
213      This can be found (in textual form) in the entry "".  If this
214      entry does not exist or if this does not contain the `charset='
215      information, we will assume the charset matches the one the
216      current locale and we don't have to perform any conversion.  */
217   char *nullentry;
218   size_t nullentrylen;
219 
220   /* Preinitialize fields, to avoid recursion during _nl_find_msg.  */
221   domain->codeset_cntr =
222     (domainbinding != NULL ? domainbinding->codeset_cntr : 0);
223 #ifdef _LIBC
224   domain->conv = (__gconv_t) -1;
225 #else
226 # if HAVE_ICONV
227   domain->conv = (iconv_t) -1;
228 # endif
229 #endif
230   domain->conv_tab = NULL;
231 
232   /* Get the header entry.  */
233   nullentry = _nl_find_msg (domain_file, domainbinding, "", &nullentrylen);
234 
235   if (nullentry != NULL)
236     {
237 #if defined _LIBC || HAVE_ICONV
238       const char *charsetstr;
239 
240       charsetstr = strstr (nullentry, "charset=");
241       if (charsetstr != NULL)
242 	{
243 	  size_t len;
244 	  char *charset;
245 	  const char *outcharset;
246 
247 	  charsetstr += strlen ("charset=");
248 	  len = strcspn (charsetstr, " \t\n");
249 
250 	  charset = (char *) alloca (len + 1);
251 # if defined _LIBC || HAVE_MEMPCPY
252 	  *((char *) mempcpy (charset, charsetstr, len)) = '\0';
253 # else
254 	  memcpy (charset, charsetstr, len);
255 	  charset[len] = '\0';
256 # endif
257 
258 	  /* The output charset should normally be determined by the
259 	     locale.  But sometimes the locale is not used or not correctly
260 	     set up, so we provide a possibility for the user to override
261 	     this.  Moreover, the value specified through
262 	     bind_textdomain_codeset overrides both.  */
263 	  if (domainbinding != NULL && domainbinding->codeset != NULL)
264 	    outcharset = domainbinding->codeset;
265 	  else
266 	    {
267 	      outcharset = getenv ("OUTPUT_CHARSET");
268 	      if (outcharset == NULL || outcharset[0] == '\0')
269 		{
270 # ifdef _LIBC
271 		  outcharset = (*_nl_current[LC_CTYPE])->values[_NL_ITEM_INDEX (CODESET)].string;
272 # else
273 #  if HAVE_ICONV
274 		  extern const char *locale_charset (void);
275 		  outcharset = locale_charset ();
276 #  endif
277 # endif
278 		}
279 	    }
280 
281 # ifdef _LIBC
282 	  /* We always want to use transliteration.  */
283 	  outcharset = norm_add_slashes (outcharset, "TRANSLIT");
284 	  charset = norm_add_slashes (charset, NULL);
285 	  if (__gconv_open (outcharset, charset, &domain->conv,
286 			    GCONV_AVOID_NOCONV)
287 	      != __GCONV_OK)
288 	    domain->conv = (__gconv_t) -1;
289 # else
290 #  if HAVE_ICONV
291 	  /* When using GNU libiconv, we want to use transliteration.  */
292 #   if _LIBICONV_VERSION >= 0x0105
293 	  len = strlen (outcharset);
294 	  {
295 	    char *tmp = (char *) alloca (len + 10 + 1);
296 	    memcpy (tmp, outcharset, len);
297 	    memcpy (tmp + len, "//TRANSLIT", 10 + 1);
298 	    outcharset = tmp;
299 	  }
300 #   endif
301 	  domain->conv = iconv_open (outcharset, charset);
302 #   if _LIBICONV_VERSION >= 0x0105
303 	  freea (outcharset);
304 #   endif
305 #  endif
306 # endif
307 
308 	  freea (charset);
309 	}
310 #endif /* _LIBC || HAVE_ICONV */
311     }
312 
313   return nullentry;
314 }
315 
316 /* Frees the codeset dependent parts of an opened message catalog.  */
317 void
318 internal_function
_nl_free_domain_conv(domain)319 _nl_free_domain_conv (domain)
320      struct loaded_domain *domain;
321 {
322   if (domain->conv_tab != NULL && domain->conv_tab != (char **) -1)
323     free (domain->conv_tab);
324 
325 #ifdef _LIBC
326   if (domain->conv != (__gconv_t) -1)
327     __gconv_close (domain->conv);
328 #else
329 # if HAVE_ICONV
330   if (domain->conv != (iconv_t) -1)
331     iconv_close (domain->conv);
332 # endif
333 #endif
334 }
335 
336 /* Load the message catalogs specified by FILENAME.  If it is no valid
337    message catalog do nothing.  */
338 void
339 internal_function
_nl_load_domain(domain_file,domainbinding)340 _nl_load_domain (domain_file, domainbinding)
341      struct loaded_l10nfile *domain_file;
342      struct binding *domainbinding;
343 {
344   int fd;
345   size_t size;
346 #ifdef _LIBC
347   struct stat64 st;
348 #else
349   struct stat st;
350 #endif
351   struct mo_file_header *data = (struct mo_file_header *) -1;
352   int use_mmap = 0;
353   struct loaded_domain *domain;
354   const char *nullentry;
355 
356   domain_file->decided = 1;
357   domain_file->data = NULL;
358 
359   /* Note that it would be useless to store domainbinding in domain_file
360      because domainbinding might be == NULL now but != NULL later (after
361      a call to bind_textdomain_codeset).  */
362 
363   /* If the record does not represent a valid locale the FILENAME
364      might be NULL.  This can happen when according to the given
365      specification the locale file name is different for XPG and CEN
366      syntax.  */
367   if (domain_file->filename == NULL)
368     return;
369 
370   /* Try to open the addressed file.  */
371   fd = open (domain_file->filename, O_RDONLY | O_BINARY);
372   if (fd == -1)
373     return;
374 
375   /* We must know about the size of the file.  */
376   if (
377 #ifdef _LIBC
378       __builtin_expect (fstat64 (fd, &st) != 0, 0)
379 #else
380       __builtin_expect (fstat (fd, &st) != 0, 0)
381 #endif
382       || __builtin_expect ((size = (size_t) st.st_size) != st.st_size, 0)
383       || __builtin_expect (size < sizeof (struct mo_file_header), 0))
384     {
385       /* Something went wrong.  */
386       close (fd);
387       return;
388     }
389 
390 #ifdef HAVE_MMAP
391   /* Now we are ready to load the file.  If mmap() is available we try
392      this first.  If not available or it failed we try to load it.  */
393   data = (struct mo_file_header *) mmap (NULL, size, PROT_READ,
394 					 MAP_PRIVATE, fd, 0);
395 
396   if (__builtin_expect (data != (struct mo_file_header *) -1, 1))
397     {
398       /* mmap() call was successful.  */
399       close (fd);
400       use_mmap = 1;
401     }
402 #endif
403 
404   /* If the data is not yet available (i.e. mmap'ed) we try to load
405      it manually.  */
406   if (data == (struct mo_file_header *) -1)
407     {
408       size_t to_read;
409       char *read_ptr;
410 
411       data = (struct mo_file_header *) malloc (size);
412       if (data == NULL)
413 	return;
414 
415       to_read = size;
416       read_ptr = (char *) data;
417       do
418 	{
419 	  long int nb = (long int) read (fd, read_ptr, to_read);
420 	  if (nb <= 0)
421 	    {
422 #ifdef EINTR
423 	      if (nb == -1 && errno == EINTR)
424 		continue;
425 #endif
426 	      close (fd);
427 	      return;
428 	    }
429 	  read_ptr += nb;
430 	  to_read -= nb;
431 	}
432       while (to_read > 0);
433 
434       close (fd);
435     }
436 
437   /* Using the magic number we can test whether it really is a message
438      catalog file.  */
439   if (__builtin_expect (data->magic != _MAGIC && data->magic != _MAGIC_SWAPPED,
440 			0))
441     {
442       /* The magic number is wrong: not a message catalog file.  */
443 #ifdef HAVE_MMAP
444       if (use_mmap)
445 	munmap ((caddr_t) data, size);
446       else
447 #endif
448 	free (data);
449       return;
450     }
451 
452   domain = (struct loaded_domain *) malloc (sizeof (struct loaded_domain));
453   if (domain == NULL)
454     return;
455   domain_file->data = domain;
456 
457   domain->data = (char *) data;
458   domain->use_mmap = use_mmap;
459   domain->mmap_size = size;
460   domain->must_swap = data->magic != _MAGIC;
461 
462   /* Fill in the information about the available tables.  */
463   switch (W (domain->must_swap, data->revision))
464     {
465     case 0:
466       domain->nstrings = W (domain->must_swap, data->nstrings);
467       domain->orig_tab = (struct string_desc *)
468 	((char *) data + W (domain->must_swap, data->orig_tab_offset));
469       domain->trans_tab = (struct string_desc *)
470 	((char *) data + W (domain->must_swap, data->trans_tab_offset));
471       domain->hash_size = W (domain->must_swap, data->hash_tab_size);
472       domain->hash_tab = (nls_uint32 *)
473 	((char *) data + W (domain->must_swap, data->hash_tab_offset));
474       break;
475     default:
476       /* This is an invalid revision.  */
477 #ifdef HAVE_MMAP
478       if (use_mmap)
479 	munmap ((caddr_t) data, size);
480       else
481 #endif
482 	free (data);
483       free (domain);
484       domain_file->data = NULL;
485       return;
486     }
487 
488   /* Now initialize the character set converter from the character set
489      the file is encoded with (found in the header entry) to the domain's
490      specified character set or the locale's character set.  */
491   nullentry = _nl_init_domain_conv (domain_file, domain, domainbinding);
492 
493   /* Also look for a plural specification.  */
494   if (nullentry != NULL)
495     {
496       const char *plural;
497       const char *nplurals;
498 
499       plural = strstr (nullentry, "plural=");
500       nplurals = strstr (nullentry, "nplurals=");
501       if (plural == NULL || nplurals == NULL)
502 	goto no_plural;
503       else
504 	{
505 	  /* First get the number.  */
506 	  char *endp;
507 	  unsigned long int n;
508 	  struct parse_args args;
509 
510 	  nplurals += 9;
511 	  while (*nplurals != '\0' && isspace (*nplurals))
512 	    ++nplurals;
513 #if defined HAVE_STRTOUL || defined _LIBC
514 	  n = strtoul (nplurals, &endp, 10);
515 #else
516 	  for (endp = nplurals, n = 0; *endp >= '0' && *endp <= '9'; endp++)
517 	    n = n * 10 + (*endp - '0');
518 #endif
519 	  domain->nplurals = n;
520 	  if (nplurals == endp)
521 	    goto no_plural;
522 
523 	  /* Due to the restrictions bison imposes onto the interface of the
524 	     scanner function we have to put the input string and the result
525 	     passed up from the parser into the same structure which address
526 	     is passed down to the parser.  */
527 	  plural += 7;
528 	  args.cp = plural;
529 	  if (PLURAL_PARSE (&args) != 0)
530 	    goto no_plural;
531 	  domain->plural = args.res;
532 	}
533     }
534   else
535     {
536       /* By default we are using the Germanic form: singular form only
537          for `one', the plural form otherwise.  Yes, this is also what
538          English is using since English is a Germanic language.  */
539     no_plural:
540       INIT_GERMANIC_PLURAL ();
541       domain->plural = &germanic_plural;
542       domain->nplurals = 2;
543     }
544 }
545 
546 
547 #ifdef _LIBC
548 void
549 internal_function
_nl_unload_domain(domain)550 _nl_unload_domain (domain)
551      struct loaded_domain *domain;
552 {
553   if (domain->plural != &germanic_plural)
554     __gettext_free_exp (domain->plural);
555 
556   _nl_free_domain_conv (domain);
557 
558 # ifdef _POSIX_MAPPED_FILES
559   if (domain->use_mmap)
560     munmap ((caddr_t) domain->data, domain->mmap_size);
561   else
562 # endif	/* _POSIX_MAPPED_FILES */
563     free ((void *) domain->data);
564 
565   free (domain);
566 }
567 #endif
568