1 /* Localization of proper names. 2 Copyright (C) 2006-2015 Free Software Foundation, Inc. 3 Written by Bruno Haible <bruno@clisp.org>, 2006. 4 5 This program is free software: you can redistribute it and/or modify 6 it under the terms of the GNU General Public License as published by 7 the Free Software Foundation; either version 3 of the License, or 8 (at your option) any later version. 9 10 This program is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 GNU General Public License for more details. 14 15 You should have received a copy of the GNU General Public License 16 along with this program. If not, see <http://www.gnu.org/licenses/>. */ 17 18 /* Without this pragma, gcc 4.7.0 20111124 mistakenly suggests that 19 the proper_name function might be candidate for attribute 'const' */ 20 #if (__GNUC__ == 4 && 6 <= __GNUC_MINOR__) || 4 < __GNUC__ 21 # pragma GCC diagnostic ignored "-Wsuggest-attribute=const" 22 #endif 23 24 #include <config.h> 25 26 /* Specification. */ 27 #include "propername.h" 28 29 #include <ctype.h> 30 #include <stdbool.h> 31 #include <stdio.h> 32 #include <stdlib.h> 33 #include <string.h> 34 #if HAVE_ICONV 35 # include <iconv.h> 36 #endif 37 38 #include "trim.h" 39 #include "mbchar.h" 40 #include "mbuiter.h" 41 #include "localcharset.h" 42 #include "c-strcase.h" 43 #include "xstriconv.h" 44 #include "xalloc.h" 45 #include "gettext.h" 46 47 48 /* Tests whether STRING contains trim (SUB), starting and ending at word 49 boundaries. 50 Here, instead of implementing Unicode Standard Annex #29 for determining 51 word boundaries, we assume that trim (SUB) starts and ends with words and 52 only test whether the part before it ends with a non-word and the part 53 after it starts with a non-word. */ 54 static bool 55 mbsstr_trimmed_wordbounded (const char *string, const char *sub) 56 { 57 char *tsub = trim (sub); 58 bool found = false; 59 60 for (; *string != '\0';) 61 { 62 const char *tsub_in_string = mbsstr (string, tsub); 63 if (tsub_in_string == NULL) 64 break; 65 else 66 { 67 if (MB_CUR_MAX > 1) 68 { 69 mbui_iterator_t string_iter; 70 bool word_boundary_before; 71 bool word_boundary_after; 72 73 mbui_init (string_iter, string); 74 word_boundary_before = true; 75 if (mbui_cur_ptr (string_iter) < tsub_in_string) 76 { 77 mbchar_t last_char_before_tsub; 78 do 79 { 80 if (!mbui_avail (string_iter)) 81 abort (); 82 last_char_before_tsub = mbui_cur (string_iter); 83 mbui_advance (string_iter); 84 } 85 while (mbui_cur_ptr (string_iter) < tsub_in_string); 86 if (mb_isalnum (last_char_before_tsub)) 87 word_boundary_before = false; 88 } 89 90 mbui_init (string_iter, tsub_in_string); 91 { 92 mbui_iterator_t tsub_iter; 93 94 for (mbui_init (tsub_iter, tsub); 95 mbui_avail (tsub_iter); 96 mbui_advance (tsub_iter)) 97 { 98 if (!mbui_avail (string_iter)) 99 abort (); 100 mbui_advance (string_iter); 101 } 102 } 103 word_boundary_after = true; 104 if (mbui_avail (string_iter)) 105 { 106 mbchar_t first_char_after_tsub = mbui_cur (string_iter); 107 if (mb_isalnum (first_char_after_tsub)) 108 word_boundary_after = false; 109 } 110 111 if (word_boundary_before && word_boundary_after) 112 { 113 found = true; 114 break; 115 } 116 117 mbui_init (string_iter, tsub_in_string); 118 if (!mbui_avail (string_iter)) 119 break; 120 string = tsub_in_string + mb_len (mbui_cur (string_iter)); 121 } 122 else 123 { 124 bool word_boundary_before; 125 const char *p; 126 bool word_boundary_after; 127 128 word_boundary_before = true; 129 if (string < tsub_in_string) 130 if (isalnum ((unsigned char) tsub_in_string[-1])) 131 word_boundary_before = false; 132 133 p = tsub_in_string + strlen (tsub); 134 word_boundary_after = true; 135 if (*p != '\0') 136 if (isalnum ((unsigned char) *p)) 137 word_boundary_after = false; 138 139 if (word_boundary_before && word_boundary_after) 140 { 141 found = true; 142 break; 143 } 144 145 if (*tsub_in_string == '\0') 146 break; 147 string = tsub_in_string + 1; 148 } 149 } 150 } 151 free (tsub); 152 return found; 153 } 154 155 /* Return the localization of NAME. NAME is written in ASCII. */ 156 157 const char * 158 proper_name (const char *name) 159 { 160 /* See whether there is a translation. */ 161 const char *translation = gettext (name); 162 163 if (translation != name) 164 { 165 /* See whether the translation contains the original name. */ 166 if (mbsstr_trimmed_wordbounded (translation, name)) 167 return translation; 168 else 169 { 170 /* Return "TRANSLATION (NAME)". */ 171 char *result = 172 XNMALLOC (strlen (translation) + 2 + strlen (name) + 1 + 1, char); 173 174 sprintf (result, "%s (%s)", translation, name); 175 return result; 176 } 177 } 178 else 179 return name; 180 } 181 182 /* Return the localization of a name whose original writing is not ASCII. 183 NAME_UTF8 is the real name, written in UTF-8 with octal or hexadecimal 184 escape sequences. NAME_ASCII is a fallback written only with ASCII 185 characters. */ 186 187 const char * 188 proper_name_utf8 (const char *name_ascii, const char *name_utf8) 189 { 190 /* See whether there is a translation. */ 191 const char *translation = gettext (name_ascii); 192 193 /* Try to convert NAME_UTF8 to the locale encoding. */ 194 const char *locale_code = locale_charset (); 195 char *alloc_name_converted = NULL; 196 char *alloc_name_converted_translit = NULL; 197 const char *name_converted = NULL; 198 const char *name_converted_translit = NULL; 199 const char *name; 200 201 if (c_strcasecmp (locale_code, "UTF-8") != 0) 202 { 203 #if HAVE_ICONV 204 name_converted = alloc_name_converted = 205 xstr_iconv (name_utf8, "UTF-8", locale_code); 206 207 # if (((__GLIBC__ == 2 && __GLIBC_MINOR__ >= 2) || __GLIBC__ > 2) \ 208 && !defined __UCLIBC__) \ 209 || _LIBICONV_VERSION >= 0x0105 210 { 211 char *converted_translit; 212 213 size_t len = strlen (locale_code); 214 char *locale_code_translit = XNMALLOC (len + 10 + 1, char); 215 memcpy (locale_code_translit, locale_code, len); 216 memcpy (locale_code_translit + len, "//TRANSLIT", 10 + 1); 217 218 converted_translit = 219 xstr_iconv (name_utf8, "UTF-8", locale_code_translit); 220 221 free (locale_code_translit); 222 223 if (converted_translit != NULL) 224 { 225 # if !_LIBICONV_VERSION 226 /* Don't use the transliteration if it added question marks. 227 glibc's transliteration falls back to question marks; libiconv's 228 transliteration does not. 229 mbschr is equivalent to strchr in this case. */ 230 if (strchr (converted_translit, '?') != NULL) 231 free (converted_translit); 232 else 233 # endif 234 name_converted_translit = alloc_name_converted_translit = 235 converted_translit; 236 } 237 } 238 # endif 239 #endif 240 } 241 else 242 { 243 name_converted = name_utf8; 244 name_converted_translit = name_utf8; 245 } 246 247 /* The name in locale encoding. */ 248 name = (name_converted != NULL ? name_converted : 249 name_converted_translit != NULL ? name_converted_translit : 250 name_ascii); 251 252 /* See whether we have a translation. Some translators have not understood 253 that they should use the UTF-8 form of the name, if possible. So if the 254 translator provided a no-op translation, we ignore it. */ 255 if (strcmp (translation, name_ascii) != 0) 256 { 257 /* See whether the translation contains the original name. */ 258 if (mbsstr_trimmed_wordbounded (translation, name_ascii) 259 || (name_converted != NULL 260 && mbsstr_trimmed_wordbounded (translation, name_converted)) 261 || (name_converted_translit != NULL 262 && mbsstr_trimmed_wordbounded (translation, name_converted_translit))) 263 { 264 if (alloc_name_converted != NULL) 265 free (alloc_name_converted); 266 if (alloc_name_converted_translit != NULL) 267 free (alloc_name_converted_translit); 268 return translation; 269 } 270 else 271 { 272 /* Return "TRANSLATION (NAME)". */ 273 char *result = 274 XNMALLOC (strlen (translation) + 2 + strlen (name) + 1 + 1, char); 275 276 sprintf (result, "%s (%s)", translation, name); 277 278 if (alloc_name_converted != NULL) 279 free (alloc_name_converted); 280 if (alloc_name_converted_translit != NULL) 281 free (alloc_name_converted_translit); 282 return result; 283 } 284 } 285 else 286 { 287 if (alloc_name_converted != NULL && alloc_name_converted != name) 288 free (alloc_name_converted); 289 if (alloc_name_converted_translit != NULL 290 && alloc_name_converted_translit != name) 291 free (alloc_name_converted_translit); 292 return name; 293 } 294 } 295 296 #ifdef TEST1 297 # include <locale.h> 298 int 299 main (int argc, char *argv[]) 300 { 301 setlocale (LC_ALL, ""); 302 if (mbsstr_trimmed_wordbounded (argv[1], argv[2])) 303 printf("found\n"); 304 return 0; 305 } 306 #endif 307 308 #ifdef TEST2 309 # include <locale.h> 310 # include <stdio.h> 311 int 312 main (int argc, char *argv[]) 313 { 314 setlocale (LC_ALL, ""); 315 printf ("%s\n", proper_name_utf8 ("Franc,ois Pinard", "Fran\303\247ois Pinard")); 316 return 0; 317 } 318 #endif 319