1 /* strgutil.c - string utilities
2 * Copyright (C) 1994, 1998, 1999, 2000, 2001,
3 * 2003, 2004, 2005, 2009 Free Software Foundation, Inc.
4 *
5 * This file is part of GnuPG.
6 *
7 * GnuPG is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * GnuPG is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, see <http://www.gnu.org/licenses/>.
19 */
20
21 #include <config.h>
22 #include <stdlib.h>
23 #include <string.h>
24 #include <ctype.h>
25 #include <errno.h>
26 #ifdef HAVE_LANGINFO_CODESET
27 #include <langinfo.h>
28 #endif
29
30 /* For W32 we use dynamic loading of the iconv dll and don't need any
31 * iconv headers at all. */
32 #ifndef _WIN32
33 # ifndef HAVE_ICONV
34 # undef USE_GNUPG_ICONV
35 # endif
36 #endif
37
38 #ifdef USE_GNUPG_ICONV
39 # include <limits.h>
40 # ifndef _WIN32
41 # include <iconv.h>
42 # endif
43 #endif
44
45 #include "types.h"
46 #include "util.h"
47 #include "memory.h"
48 #include "i18n.h"
49 #include "dynload.h"
50 #include "estream-printf.h"
51
52 /* Our xasprintf replacements are expected to work with our memory
53 allocator. Let's test for this here. */
54 #if !defined(_ESTREAM_PRINTF_MALLOC) || !defined(_ESTREAM_PRINTF_FREE)
55 #error Please define _ESTREAM_PRINTF_MALLOC and _FREE
56 #endif
57
58
59
60 #ifndef USE_GNUPG_ICONV
61 static ushort koi8_unicode[128] = {
62 0x2500,0x2502,0x250c,0x2510,0x2514,0x2518,0x251c,0x2524,
63 0x252c,0x2534,0x253c,0x2580,0x2584,0x2588,0x258c,0x2590,
64 0x2591,0x2592,0x2593,0x2320,0x25a0,0x2219,0x221a,0x2248,
65 0x2264,0x2265,0x00a0,0x2321,0x00b0,0x00b2,0x00b7,0x00f7,
66 0x2550,0x2551,0x2552,0x0451,0x2553,0x2554,0x2555,0x2556,
67 0x2557,0x2558,0x2559,0x255a,0x255b,0x255c,0x255d,0x255e,
68 0x255f,0x2560,0x2561,0x0401,0x2562,0x2563,0x2564,0x2565,
69 0x2566,0x2567,0x2568,0x2569,0x256a,0x256b,0x256c,0x00a9,
70 0x044e,0x0430,0x0431,0x0446,0x0434,0x0435,0x0444,0x0433,
71 0x0445,0x0438,0x0439,0x043a,0x043b,0x043c,0x043d,0x043e,
72 0x043f,0x044f,0x0440,0x0441,0x0442,0x0443,0x0436,0x0432,
73 0x044c,0x044b,0x0437,0x0448,0x044d,0x0449,0x0447,0x044a,
74 0x042e,0x0410,0x0411,0x0426,0x0414,0x0415,0x0424,0x0413,
75 0x0425,0x0418,0x0419,0x041a,0x041b,0x041c,0x041d,0x041e,
76 0x041f,0x042f,0x0420,0x0421,0x0422,0x0423,0x0416,0x0412,
77 0x042c,0x042b,0x0417,0x0428,0x042d,0x0429,0x0427,0x042a
78 };
79
80 static ushort latin2_unicode[128] = {
81 0x0080,0x0081,0x0082,0x0083,0x0084,0x0085,0x0086,0x0087,
82 0x0088,0x0089,0x008A,0x008B,0x008C,0x008D,0x008E,0x008F,
83 0x0090,0x0091,0x0092,0x0093,0x0094,0x0095,0x0096,0x0097,
84 0x0098,0x0099,0x009A,0x009B,0x009C,0x009D,0x009E,0x009F,
85 0x00A0,0x0104,0x02D8,0x0141,0x00A4,0x013D,0x015A,0x00A7,
86 0x00A8,0x0160,0x015E,0x0164,0x0179,0x00AD,0x017D,0x017B,
87 0x00B0,0x0105,0x02DB,0x0142,0x00B4,0x013E,0x015B,0x02C7,
88 0x00B8,0x0161,0x015F,0x0165,0x017A,0x02DD,0x017E,0x017C,
89 0x0154,0x00C1,0x00C2,0x0102,0x00C4,0x0139,0x0106,0x00C7,
90 0x010C,0x00C9,0x0118,0x00CB,0x011A,0x00CD,0x00CE,0x010E,
91 0x0110,0x0143,0x0147,0x00D3,0x00D4,0x0150,0x00D6,0x00D7,
92 0x0158,0x016E,0x00DA,0x0170,0x00DC,0x00DD,0x0162,0x00DF,
93 0x0155,0x00E1,0x00E2,0x0103,0x00E4,0x013A,0x0107,0x00E7,
94 0x010D,0x00E9,0x0119,0x00EB,0x011B,0x00ED,0x00EE,0x010F,
95 0x0111,0x0144,0x0148,0x00F3,0x00F4,0x0151,0x00F6,0x00F7,
96 0x0159,0x016F,0x00FA,0x0171,0x00FC,0x00FD,0x0163,0x02D9
97 };
98 #endif /*!USE_GNUPG_ICONV*/
99
100
101 #ifndef MB_LEN_MAX
102 #define MB_LEN_MAX 16
103 #endif
104
105
106 static const char *active_charset_name = "iso-8859-1";
107 static ushort *active_charset = NULL;
108 static int no_translation = 0;
109 static int use_iconv = 0;
110
111
112 #ifdef _WIN32
113 typedef void* iconv_t;
114 #ifndef ICONV_CONST
115 #define ICONV_CONST const
116 #endif
117
118 iconv_t (* __stdcall iconv_open) (const char *tocode, const char *fromcode);
119 size_t (* __stdcall iconv) (iconv_t cd,
120 const char **inbuf, size_t *inbytesleft,
121 char **outbuf, size_t *outbytesleft);
122 int (* __stdcall iconv_close) (iconv_t cd);
123
124 #endif /*_WIN32*/
125
126
127
128 #ifdef _WIN32
129 static int
load_libiconv(void)130 load_libiconv (void)
131 {
132 static int done;
133
134 if (!done)
135 {
136 void *handle;
137
138 done = 1; /* Do it right now because we might get called recursivly
139 through gettext. */
140
141 handle = dlopen ("iconv.dll", RTLD_LAZY);
142 if (handle)
143 {
144 iconv_open = dlsym (handle, "libiconv_open");
145 if (iconv_open)
146 iconv = dlsym (handle, "libiconv");
147 if (iconv)
148 iconv_close = dlsym (handle, "libiconv_close");
149 }
150 if (!handle || !iconv_close)
151 {
152 log_info (_("error loading `%s': %s\n"),
153 "iconv.dll", dlerror ());
154 log_info(_("please see http://www.gnupg.org/download/iconv.html "
155 "for more information\n"));
156 iconv_open = NULL;
157 iconv = NULL;
158 iconv_close = NULL;
159 if (handle)
160 dlclose (handle);
161 }
162 }
163 return iconv_open? 0: -1;
164 }
165 #endif /* _WIN32 */
166
167
168
169
170 void
free_strlist(STRLIST sl)171 free_strlist( STRLIST sl )
172 {
173 STRLIST sl2;
174
175 for(; sl; sl = sl2 ) {
176 sl2 = sl->next;
177 xfree(sl);
178 }
179 }
180
181
182 STRLIST
add_to_strlist(STRLIST * list,const char * string)183 add_to_strlist( STRLIST *list, const char *string )
184 {
185 STRLIST sl;
186
187 sl = xmalloc( sizeof *sl + strlen(string));
188 sl->flags = 0;
189 strcpy(sl->d, string);
190 sl->next = *list;
191 *list = sl;
192 return sl;
193 }
194
195 /****************
196 * Same as add_to_strlist() but if is_utf8 is *not* set a conversion
197 * to UTF8 is done
198 */
199 STRLIST
add_to_strlist2(STRLIST * list,const char * string,int is_utf8)200 add_to_strlist2( STRLIST *list, const char *string, int is_utf8 )
201 {
202 STRLIST sl;
203
204 if( is_utf8 )
205 sl = add_to_strlist( list, string );
206 else {
207 char *p = native_to_utf8( string );
208 sl = add_to_strlist( list, p );
209 xfree( p );
210 }
211 return sl;
212 }
213
214 STRLIST
append_to_strlist(STRLIST * list,const char * string)215 append_to_strlist( STRLIST *list, const char *string )
216 {
217 STRLIST r, sl;
218
219 sl = xmalloc( sizeof *sl + strlen(string));
220 sl->flags = 0;
221 strcpy(sl->d, string);
222 sl->next = NULL;
223 if( !*list )
224 *list = sl;
225 else {
226 for( r = *list; r->next; r = r->next )
227 ;
228 r->next = sl;
229 }
230 return sl;
231 }
232
233 STRLIST
append_to_strlist2(STRLIST * list,const char * string,int is_utf8)234 append_to_strlist2( STRLIST *list, const char *string, int is_utf8 )
235 {
236 STRLIST sl;
237
238 if( is_utf8 )
239 sl = append_to_strlist( list, string );
240 else {
241 char *p = native_to_utf8( string );
242 sl = append_to_strlist( list, p );
243 xfree( p );
244 }
245 return sl;
246 }
247
248
249 STRLIST
strlist_prev(STRLIST head,STRLIST node)250 strlist_prev( STRLIST head, STRLIST node )
251 {
252 STRLIST n;
253
254 for(n=NULL; head && head != node; head = head->next )
255 n = head;
256 return n;
257 }
258
259 STRLIST
strlist_last(STRLIST node)260 strlist_last( STRLIST node )
261 {
262 if( node )
263 for( ; node->next ; node = node->next )
264 ;
265 return node;
266 }
267
268 char *
pop_strlist(STRLIST * list)269 pop_strlist( STRLIST *list )
270 {
271 char *str=NULL;
272 STRLIST sl=*list;
273
274 if(sl)
275 {
276 str=xmalloc(strlen(sl->d)+1);
277 strcpy(str,sl->d);
278
279 *list=sl->next;
280 xfree(sl);
281 }
282
283 return str;
284 }
285
286 /****************
287 * Look for the substring SUB in buffer and return a pointer to that
288 * substring in BUF or NULL if not found.
289 * Comparison is case-insensitive.
290 */
291 const char *
memistr(const char * buf,size_t buflen,const char * sub)292 memistr( const char *buf, size_t buflen, const char *sub )
293 {
294 const byte *t, *s ;
295 size_t n;
296
297 for( t=buf, n=buflen, s=sub ; n ; t++, n-- )
298 if( toupper(*t) == toupper(*s) ) {
299 for( buf=t++, buflen = n--, s++;
300 n && toupper(*t) == toupper(*s); t++, s++, n-- )
301 ;
302 if( !*s )
303 return buf;
304 t = buf; n = buflen; s = sub ;
305 }
306
307 return NULL ;
308 }
309
310 const char *
ascii_memistr(const char * buf,size_t buflen,const char * sub)311 ascii_memistr( const char *buf, size_t buflen, const char *sub )
312 {
313 const byte *t, *s ;
314 size_t n;
315
316 for( t=buf, n=buflen, s=sub ; n ; t++, n-- )
317 if( ascii_toupper(*t) == ascii_toupper(*s) ) {
318 for( buf=t++, buflen = n--, s++;
319 n && ascii_toupper(*t) == ascii_toupper(*s); t++, s++, n-- )
320 ;
321 if( !*s )
322 return buf;
323 t = buf; n = buflen; s = sub ;
324 }
325
326 return NULL ;
327 }
328
329
330 /* Like strncpy() but copy at max N-1 bytes and append a '\0'. With
331 * N given as 0 nothing is copied at all. With DEST given as NULL
332 * sufficient memory is allocated using xmalloc (note that xmalloc is
333 * guaranteed to succeed or to abort the process). */
334 char *
mem2str(char * dest,const void * src,size_t n)335 mem2str( char *dest , const void *src , size_t n )
336 {
337 char *d;
338 const char *s;
339
340 if( n ) {
341 if( !dest )
342 dest = xmalloc( n ) ;
343 d = dest;
344 s = src ;
345 for(n--; n && *s; n-- )
346 *d++ = *s++;
347 *d = '\0' ;
348 }
349
350 return dest ;
351 }
352
353
354 /*
355 * Remove leading and trailing white spaces
356 */
357 char *
trim_spaces(char * str)358 trim_spaces( char *str )
359 {
360 char *string, *p, *mark;
361
362 string = str;
363 /* Find first non space character. */
364 for( p=string; *p && isspace( *(byte*)p ) ; p++ )
365 ;
366 /* Move characters. */
367 for( (mark = NULL); (*string = *p); string++, p++ )
368 if( isspace( *(byte*)p ) ) {
369 if( !mark )
370 mark = string ;
371 }
372 else
373 mark = NULL ;
374 if( mark )
375 *mark = '\0' ; /* Remove trailing spaces. */
376
377 return str ;
378 }
379
380
381
382 unsigned int
trim_trailing_chars(byte * line,unsigned len,const char * trimchars)383 trim_trailing_chars( byte *line, unsigned len, const char *trimchars )
384 {
385 byte *p, *mark;
386 unsigned n;
387
388 for(mark=NULL, p=line, n=0; n < len; n++, p++ ) {
389 if( strchr(trimchars, *p ) ) {
390 if( !mark )
391 mark = p;
392 }
393 else
394 mark = NULL;
395 }
396
397 if( mark ) {
398 *mark = 0;
399 return mark - line;
400 }
401 return len;
402 }
403
404 /****************
405 * Remove trailing white spaces and return the length of the buffer
406 */
407 unsigned
trim_trailing_ws(byte * line,unsigned len)408 trim_trailing_ws( byte *line, unsigned len )
409 {
410 return trim_trailing_chars( line, len, " \t\r\n" );
411 }
412
413
414 unsigned int
check_trailing_chars(const byte * line,unsigned int len,const char * trimchars)415 check_trailing_chars( const byte *line, unsigned int len,
416 const char *trimchars )
417 {
418 const byte *p, *mark;
419 unsigned int n;
420
421 for(mark=NULL, p=line, n=0; n < len; n++, p++ ) {
422 if( strchr(trimchars, *p ) ) {
423 if( !mark )
424 mark = p;
425 }
426 else
427 mark = NULL;
428 }
429
430 if( mark ) {
431 return mark - line;
432 }
433 return len;
434 }
435
436
437 /****************
438 * Remove trailing white spaces and return the length of the buffer
439 */
440 unsigned int
check_trailing_ws(const byte * line,unsigned int len)441 check_trailing_ws( const byte *line, unsigned int len )
442 {
443 return check_trailing_chars( line, len, " \t\r\n" );
444 }
445
446
447
448 int
string_count_chr(const char * string,int c)449 string_count_chr( const char *string, int c )
450 {
451 int count;
452 for(count=0; *string; string++ )
453 if( *string == c )
454 count++;
455 return count;
456 }
457
458
459 /* Check whether the string has characters not valid in an RFC-822
460 address. To cope with OpenPGP we ignore non-ascii characters
461 so that for example umlauts are legal in an email address. An
462 OpenPGP user ID must be utf-8 encoded but there is no strict
463 requirement for RFC-822. Thus to avoid IDNA encoding we put the
464 address verbatim as utf-8 into the user ID under the assumption
465 that mail programs handle IDNA at a lower level and take OpenPGP
466 user IDs as utf-8. Note that we can't do an utf-8 encoding
467 checking here because in keygen.c this function is called with the
468 native encoding and native to utf-8 encoding is only done later. */
469 int
has_invalid_email_chars(const char * s)470 has_invalid_email_chars (const char *s)
471 {
472 int at_seen=0;
473 const char *valid_chars=
474 "01234567890_-.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
475
476 for ( ; *s; s++ )
477 {
478 if ( *s & 0x80 )
479 continue; /* We only care about ASCII. */
480 if ( *s == '@' )
481 at_seen=1;
482 else if ( !at_seen && !( !!strchr( valid_chars, *s ) || *s == '+' ) )
483 return 1;
484 else if ( at_seen && !strchr( valid_chars, *s ) )
485 return 1;
486 }
487 return 0;
488 }
489
490
491 /* Check whether NAME represents a valid mailbox according t
492 RFC822. Returns true if so. */
493 int
is_valid_mailbox(const char * name)494 is_valid_mailbox (const char *name)
495 {
496 return !( !name
497 || !*name
498 || has_invalid_email_chars (name)
499 || string_count_chr (name,'@') != 1
500 || *name == '@'
501 || name[strlen(name)-1] == '@'
502 || name[strlen(name)-1] == '.'
503 || strstr (name, "..") );
504 }
505
506
507
508 #ifdef USE_GNUPG_ICONV
509 static void
handle_iconv_error(const char * to,const char * from,int use_fallback)510 handle_iconv_error (const char *to, const char *from, int use_fallback)
511 {
512 if (errno == EINVAL)
513 {
514 static int shown1, shown2;
515 int x;
516
517 if (to && !strcmp (to, "utf-8"))
518 {
519 x = shown1;
520 shown1 = 1;
521 }
522 else
523 {
524 x = shown2;
525 shown2 = 1;
526 }
527
528 if (!x)
529 log_info (_("conversion from `%s' to `%s' not available\n"),
530 from, to);
531 }
532 else
533 {
534 static int shown;
535
536 if (!shown)
537 log_info (_("iconv_open failed: %s\n"), strerror (errno));
538 shown = 1;
539 }
540
541 if (use_fallback)
542 {
543 /* To avoid further error messages we fallback to Latin-1 for the
544 native encoding. This is justified as one can expect that on a
545 utf-8 enabled system nl_langinfo() will work and thus we won't
546 never get to here. Thus Latin-1 seems to be a reasonable
547 default. */
548 active_charset_name = "iso-8859-1";
549 no_translation = 0;
550 active_charset = NULL;
551 use_iconv = 0;
552 }
553 }
554 #endif /*USE_GNUPG_ICONV*/
555
556 int
set_native_charset(const char * newset)557 set_native_charset( const char *newset )
558 {
559 const char *full_newset;
560
561 if (!newset) {
562 #ifdef _WIN32
563 static char codepage[30];
564 unsigned int cpno;
565 const char *aliases;
566
567 /* We are a console program thus we need to use the
568 GetConsoleOutputCP function and not the the GetACP which
569 would give the codepage for a GUI program. Note this is
570 not a bulletproof detection because GetConsoleCP might
571 return a different one for console input. Not sure how to
572 cope with that. If the console Code page is not known we
573 fall back to the system code page. */
574 cpno = GetConsoleOutputCP ();
575 if (!cpno)
576 cpno = GetACP ();
577 sprintf (codepage, "CP%u", cpno );
578 /* Resolve alias. We use a long string string and not the
579 usual array to optimize if the code is taken to a DSO.
580 Taken from libiconv 1.9.2. */
581 newset = codepage;
582 for (aliases = ("CP936" "\0" "GBK" "\0"
583 "CP1361" "\0" "JOHAB" "\0"
584 "CP20127" "\0" "ASCII" "\0"
585 "CP20866" "\0" "KOI8-R" "\0"
586 "CP21866" "\0" "KOI8-RU" "\0"
587 "CP28591" "\0" "ISO-8859-1" "\0"
588 "CP28592" "\0" "ISO-8859-2" "\0"
589 "CP28593" "\0" "ISO-8859-3" "\0"
590 "CP28594" "\0" "ISO-8859-4" "\0"
591 "CP28595" "\0" "ISO-8859-5" "\0"
592 "CP28596" "\0" "ISO-8859-6" "\0"
593 "CP28597" "\0" "ISO-8859-7" "\0"
594 "CP28598" "\0" "ISO-8859-8" "\0"
595 "CP28599" "\0" "ISO-8859-9" "\0"
596 "CP28605" "\0" "ISO-8859-15" "\0"
597 "CP65001" "\0" "UTF-8" "\0");
598 *aliases;
599 aliases += strlen (aliases) + 1, aliases += strlen (aliases) + 1)
600 {
601 if (!strcmp (codepage, aliases) ||(*aliases == '*' && !aliases[1]))
602 {
603 newset = aliases + strlen (aliases) + 1;
604 break;
605 }
606 }
607
608 #else
609 #ifdef HAVE_LANGINFO_CODESET
610 newset = nl_langinfo (CODESET);
611 #else /* !HAVE_LANGINFO_CODESET */
612 /* Try to get the used charset from environment variables. */
613 static char codepage[30];
614 const char *lc, *dot, *mod;
615
616 strcpy (codepage, "iso-8859-1");
617 lc = getenv ("LC_ALL");
618 if (!lc || !*lc) {
619 lc = getenv ("LC_CTYPE");
620 if (!lc || !*lc)
621 lc = getenv ("LANG");
622 }
623 if (lc && *lc) {
624 dot = strchr (lc, '.');
625 if (dot) {
626 mod = strchr (++dot, '@');
627 if (!mod)
628 mod = dot + strlen (dot);
629 if (mod - dot < sizeof codepage && dot != mod) {
630 memcpy (codepage, dot, mod - dot);
631 codepage [mod - dot] = 0;
632 }
633 }
634 }
635 newset = codepage;
636 #endif /* !HAVE_LANGINFO_CODESET */
637 #endif
638 }
639
640 full_newset = newset;
641 if (strlen (newset) > 3 && !ascii_memcasecmp (newset, "iso", 3)) {
642 newset += 3;
643 if (*newset == '-' || *newset == '_')
644 newset++;
645 }
646
647 /* Note that we silently assume that plain ASCII is actually meant
648 as Latin-1. This makes sense because many Unix system don't
649 have their locale set up properly and thus would get annoying
650 error messages and we have to handle all the "bug"
651 reports. Latin-1 has always been the character set used for 8
652 bit characters on Unix systems. */
653 if( !*newset
654 || !ascii_strcasecmp (newset, "8859-1" )
655 || !ascii_strcasecmp (newset, "646" )
656 || !ascii_strcasecmp (newset, "ASCII" )
657 || !ascii_strcasecmp (newset, "ANSI_X3.4-1968" )
658 ) {
659 active_charset_name = "iso-8859-1";
660 no_translation = 0;
661 active_charset = NULL;
662 use_iconv = 0;
663 }
664 else if( !ascii_strcasecmp (newset, "utf8" )
665 || !ascii_strcasecmp(newset, "utf-8") ) {
666 active_charset_name = "utf-8";
667 no_translation = 1;
668 active_charset = NULL;
669 use_iconv = 0;
670 }
671 #ifdef USE_GNUPG_ICONV
672 else {
673 iconv_t cd;
674
675 #ifdef _WIN32
676 if (load_libiconv ())
677 return G10ERR_GENERAL;
678 #endif /*_WIN32*/
679
680 cd = iconv_open (full_newset, "utf-8");
681 if (cd == (iconv_t)-1) {
682 handle_iconv_error (full_newset, "utf-8", 0);
683 return G10ERR_GENERAL;
684 }
685 iconv_close (cd);
686 cd = iconv_open ("utf-8", full_newset);
687 if (cd == (iconv_t)-1) {
688 handle_iconv_error ("utf-8", full_newset, 0);
689 return G10ERR_GENERAL;
690 }
691 iconv_close (cd);
692 active_charset_name = full_newset;
693 no_translation = 0;
694 active_charset = NULL;
695 use_iconv = 1;
696 }
697 #else /*!USE_GNUPG_ICONV*/
698 else if( !ascii_strcasecmp( newset, "8859-2" ) ) {
699 active_charset_name = "iso-8859-2";
700 no_translation = 0;
701 active_charset = latin2_unicode;
702 use_iconv = 0;
703 }
704 else if( !ascii_strcasecmp( newset, "koi8-r" ) ) {
705 active_charset_name = "koi8-r";
706 no_translation = 0;
707 active_charset = koi8_unicode;
708 use_iconv = 0;
709 }
710 else
711 return G10ERR_GENERAL;
712 #endif /*!USE_GNUPG_ICONV*/
713 return 0;
714 }
715
716 const char*
get_native_charset()717 get_native_charset()
718 {
719 return active_charset_name;
720 }
721
722 /****************
723 * Convert string, which is in native encoding to UTF8 and return the
724 * new allocated UTF8 string.
725 */
726 char *
native_to_utf8(const char * string)727 native_to_utf8( const char *string )
728 {
729 const byte *s;
730 char *buffer;
731 byte *p;
732 size_t length=0;
733
734 if (no_translation)
735 { /* Already utf-8 encoded. */
736 buffer = xstrdup (string);
737 }
738 else if( !active_charset && !use_iconv) /* Shortcut implementation
739 for Latin-1. */
740 {
741 for(s=string; *s; s++ )
742 {
743 length++;
744 if( *s & 0x80 )
745 length++;
746 }
747 buffer = xmalloc( length + 1 );
748 for(p=buffer, s=string; *s; s++ )
749 {
750 if( *s & 0x80 )
751 {
752 *p++ = 0xc0 | ((*s >> 6) & 3);
753 *p++ = 0x80 | ( *s & 0x3f );
754 }
755 else
756 *p++ = *s;
757 }
758 *p = 0;
759 }
760 else /* Need to use a translation table. */
761 {
762 #ifdef USE_GNUPG_ICONV
763 iconv_t cd;
764 const char *inptr;
765 char *outptr;
766 size_t inbytes, outbytes;
767
768 cd = iconv_open ("utf-8", active_charset_name);
769 if (cd == (iconv_t)-1)
770 {
771 handle_iconv_error ("utf-8", active_charset_name, 1);
772 return native_to_utf8 (string);
773 }
774
775 for (s=string; *s; s++ )
776 {
777 length++;
778 if ((*s & 0x80))
779 length += 5; /* We may need up to 6 bytes for the utf8 output. */
780 }
781 buffer = xmalloc (length + 1);
782
783 inptr = string;
784 inbytes = strlen (string);
785 outptr = buffer;
786 outbytes = length;
787 if ( iconv (cd, (ICONV_CONST char **)&inptr, &inbytes,
788 &outptr, &outbytes) == (size_t)-1)
789 {
790 static int shown;
791
792 if (!shown)
793 log_info (_("conversion from `%s' to `%s' failed: %s\n"),
794 active_charset_name, "utf-8", strerror (errno));
795 shown = 1;
796 /* We don't do any conversion at all but use the strings as is. */
797 strcpy (buffer, string);
798 }
799 else /* Success. */
800 {
801 *outptr = 0;
802 /* We could realloc the buffer now but I doubt that it makes
803 much sense given that it will get freed anyway soon
804 after. */
805 }
806 iconv_close (cd);
807
808 #else /*!USE_GNUPG_ICONV*/
809 for(s=string; *s; s++ )
810 {
811 length++;
812 if( *s & 0x80 )
813 length += 2; /* We may need up to 3 bytes. */
814 }
815 buffer = xmalloc( length + 1 );
816 for(p=buffer, s=string; *s; s++ ) {
817 if( *s & 0x80 ) {
818 ushort val = active_charset[ *s & 0x7f ];
819 if( val < 0x0800 ) {
820 *p++ = 0xc0 | ( (val >> 6) & 0x1f );
821 *p++ = 0x80 | ( val & 0x3f );
822 }
823 else {
824 *p++ = 0xe0 | ( (val >> 12) & 0x0f );
825 *p++ = 0x80 | ( (val >> 6) & 0x3f );
826 *p++ = 0x80 | ( val & 0x3f );
827 }
828 }
829 else
830 *p++ = *s;
831 }
832 *p = 0;
833 #endif /*!USE_GNUPG_ICONV*/
834
835 }
836 return buffer;
837 }
838
839
840 /****************
841 * Convert string, which is in UTF8 to native encoding. illegal
842 * encodings by some "\xnn" and quote all control characters. A
843 * character with value DELIM will always be quoted, it must be a
844 * vanilla ASCII character. A DELIM value of -1 is special: it disables
845 * all quoting of control characters.
846 */
847 char *
utf8_to_native(const char * string,size_t length,int delim)848 utf8_to_native( const char *string, size_t length, int delim )
849 {
850 int nleft;
851 int i;
852 byte encbuf[8];
853 int encidx;
854 const byte *s;
855 size_t n;
856 byte *buffer = NULL, *p = NULL;
857 unsigned long val = 0;
858 size_t slen;
859 int resync = 0;
860
861 /* 1. pass (p==NULL): count the extended utf-8 characters */
862 /* 2. pass (p!=NULL): create string */
863 for( ;; ) {
864 for( slen=length, nleft=encidx=0, n=0, s=string; slen; s++, slen-- ) {
865 if( resync ) {
866 if( !(*s < 128 || (*s >= 0xc0 && *s <= 0xfd)) ) {
867 /* still invalid */
868 if( p ) {
869 sprintf(p, "\\x%02x", *s );
870 p += 4;
871 }
872 n += 4;
873 continue;
874 }
875 resync = 0;
876 }
877 if( !nleft ) {
878 if( !(*s & 0x80) ) { /* plain ascii */
879 if( delim != -1
880 && (*s < 0x20 || *s == 0x7f || *s == delim
881 || (delim && *s=='\\'))) {
882 n++;
883 if( p )
884 *p++ = '\\';
885 switch( *s ) {
886 case '\n': n++; if( p ) *p++ = 'n'; break;
887 case '\r': n++; if( p ) *p++ = 'r'; break;
888 case '\f': n++; if( p ) *p++ = 'f'; break;
889 case '\v': n++; if( p ) *p++ = 'v'; break;
890 case '\b': n++; if( p ) *p++ = 'b'; break;
891 case 0 : n++; if( p ) *p++ = '0'; break;
892 default:
893 n += 3;
894 if ( p ) {
895 sprintf( p, "x%02x", *s );
896 p += 3;
897 }
898 break;
899 }
900 }
901 else {
902 if( p ) *p++ = *s;
903 n++;
904 }
905 }
906 else if( (*s & 0xe0) == 0xc0 ) { /* 110x xxxx */
907 val = *s & 0x1f;
908 nleft = 1;
909 encidx = 0;
910 encbuf[encidx++] = *s;
911 }
912 else if( (*s & 0xf0) == 0xe0 ) { /* 1110 xxxx */
913 val = *s & 0x0f;
914 nleft = 2;
915 encidx = 0;
916 encbuf[encidx++] = *s;
917 }
918 else if( (*s & 0xf8) == 0xf0 ) { /* 1111 0xxx */
919 val = *s & 0x07;
920 nleft = 3;
921 encidx = 0;
922 encbuf[encidx++] = *s;
923 }
924 else if( (*s & 0xfc) == 0xf8 ) { /* 1111 10xx */
925 val = *s & 0x03;
926 nleft = 4;
927 encidx = 0;
928 encbuf[encidx++] = *s;
929 }
930 else if( (*s & 0xfe) == 0xfc ) { /* 1111 110x */
931 val = *s & 0x01;
932 nleft = 5;
933 encidx = 0;
934 encbuf[encidx++] = *s;
935 }
936 else { /* invalid encoding: print as \xnn */
937 if( p ) {
938 sprintf(p, "\\x%02x", *s );
939 p += 4;
940 }
941 n += 4;
942 resync = 1;
943 }
944 }
945 else if( *s < 0x80 || *s >= 0xc0 ) { /* invalid */
946 if( p ) {
947 for(i=0; i < encidx; i++ ) {
948 sprintf(p, "\\x%02x", encbuf[i] );
949 p += 4;
950 }
951 sprintf(p, "\\x%02x", *s );
952 p += 4;
953 }
954 n += 4 + 4*encidx;
955 nleft = 0;
956 encidx = 0;
957 resync = 1;
958 }
959 else {
960 encbuf[encidx++] = *s;
961 val <<= 6;
962 val |= *s & 0x3f;
963 if( !--nleft ) { /* ready */
964 if (no_translation) {
965 if( p ) {
966 for(i=0; i < encidx; i++ )
967 *p++ = encbuf[i];
968 }
969 n += encidx;
970 encidx = 0;
971 }
972 #ifdef USE_GNUPG_ICONV
973 else if(use_iconv) {
974 /* Our strategy for using iconv is a bit
975 * strange but it better keeps compatibility
976 * with previous versions in regard to how
977 * invalid encodings are displayed. What we
978 * do is to keep the utf-8 as is and have the
979 * real translation step then at the end.
980 * Yes, I know that this is ugly. However we
981 * are short of the 1.4 release and for this
982 * branch we should not mee too much around
983 * with iconv things. One reason for this is
984 * that we don't know enough about non-GNU
985 * iconv implementation and want to minimize
986 * the risk of breaking the code on too many
987 * platforms. */
988 if( p ) {
989 for(i=0; i < encidx; i++ )
990 *p++ = encbuf[i];
991 }
992 n += encidx;
993 encidx = 0;
994 }
995 #endif /*USE_GNUPG_ICONV*/
996 else if( active_charset ) { /* table lookup */
997 for(i=0; i < 128; i++ ) {
998 if( active_charset[i] == val )
999 break;
1000 }
1001 if( i < 128 ) { /* we can print this one */
1002 if( p ) *p++ = i+128;
1003 n++;
1004 }
1005 else { /* we do not have a translation: print utf8 */
1006 if( p ) {
1007 for(i=0; i < encidx; i++ ) {
1008 sprintf(p, "\\x%02x", encbuf[i] );
1009 p += 4;
1010 }
1011 }
1012 n += encidx*4;
1013 encidx = 0;
1014 }
1015 }
1016 else { /* native set */
1017 if( val >= 0x80 && val < 256 ) {
1018 n++; /* we can simply print this character */
1019 if( p ) *p++ = val;
1020 }
1021 else { /* we do not have a translation: print utf8 */
1022 if( p ) {
1023 for(i=0; i < encidx; i++ ) {
1024 sprintf(p, "\\x%02x", encbuf[i] );
1025 p += 4;
1026 }
1027 }
1028 n += encidx*4;
1029 encidx = 0;
1030 }
1031 }
1032 }
1033
1034 }
1035 }
1036 if( !buffer ) { /* allocate the buffer after the first pass */
1037 buffer = p = xmalloc( n + 1 );
1038 }
1039 #ifdef USE_GNUPG_ICONV
1040 else if(use_iconv) {
1041 /* Note: See above for comments. */
1042 iconv_t cd;
1043 const char *inptr;
1044 char *outbuf, *outptr;
1045 size_t inbytes, outbytes;
1046
1047 *p = 0; /* Terminate the buffer. */
1048
1049 cd = iconv_open (active_charset_name, "utf-8");
1050 if (cd == (iconv_t)-1)
1051 {
1052 handle_iconv_error (active_charset_name, "utf-8", 1);
1053 xfree (buffer);
1054 return utf8_to_native (string, length, delim);
1055 }
1056
1057 /* Allocate a new buffer large enough to hold all possible
1058 * encodings. */
1059 n = p - buffer + 1;
1060 inbytes = n - 1;;
1061 inptr = buffer;
1062 outbytes = n * MB_LEN_MAX;
1063 if (outbytes / MB_LEN_MAX != n)
1064 BUG (); /* Actually an overflow. */
1065 outbuf = outptr = xmalloc (outbytes);
1066 if ( iconv (cd, (ICONV_CONST char **)&inptr, &inbytes,
1067 &outptr, &outbytes) == (size_t)-1) {
1068 static int shown;
1069
1070 if (!shown)
1071 log_info (_("conversion from `%s' to `%s' failed: %s\n"),
1072 "utf-8", active_charset_name, strerror (errno));
1073 shown = 1;
1074 /* Didn't worked out. Temporary disable the use of
1075 * iconv and fall back to our old code. */
1076 xfree (buffer);
1077 buffer = NULL;
1078 xfree (outbuf);
1079 use_iconv = 0;
1080 outbuf = utf8_to_native (string, length, delim);
1081 use_iconv = 1;
1082 }
1083 else { /* Success. */
1084 *outptr = 0;
1085 /* We could realloc the buffer now but I doubt that it makes
1086 much sense given that it will get freed anyway soon
1087 after. */
1088 xfree (buffer);
1089 }
1090 iconv_close (cd);
1091 return outbuf;
1092 }
1093 #endif /*USE_GNUPG_ICONV*/
1094 else {
1095 *p = 0; /* make a string */
1096 return buffer;
1097 }
1098 }
1099 }
1100
1101 /* This is similar to native_to_utf8, except it can take any input
1102 (which may or may not be UTF8 encoded) and return something that is
1103 (almost) definitely UTF8. This code is mostly borrowed from
1104 GPA. */
1105
1106 char *
string_to_utf8(const char * string)1107 string_to_utf8 (const char *string)
1108 {
1109 const char *s;
1110
1111 if (!string)
1112 return NULL;
1113
1114 /* Due to a bug in old and not so old PGP versions user IDs have
1115 been copied verbatim into the key. Thus many users with Umlauts
1116 et al. in their name will see their names garbled. Although this
1117 is not an issue for me (;-)), I have a couple of friends with
1118 Umlauts in their name, so let's try to make their life easier by
1119 detecting invalid encodings and convert that to Latin-1. We use
1120 this even for X.509 because it may make things even better given
1121 all the invalid encodings often found in X.509 certificates. */
1122 for (s = string; *s && !(*s & 0x80); s++)
1123 ;
1124 if (*s && ((s[1] & 0xc0) == 0x80) && ( ((*s & 0xe0) == 0xc0)
1125 || ((*s & 0xf0) == 0xe0)
1126 || ((*s & 0xf8) == 0xf0)
1127 || ((*s & 0xfc) == 0xf8)
1128 || ((*s & 0xfe) == 0xfc)) )
1129 {
1130 /* Possible utf-8 character followed by continuation byte.
1131 Although this might still be Latin-1 we better assume that it
1132 is valid utf-8. */
1133 return xstrdup (string);
1134 }
1135 else if (*s && !strchr (string, 0xc3))
1136 {
1137 size_t length=0;
1138 char *buffer,*p;
1139
1140 /* No 0xC3 character in the string; assume that it is Latin-1. */
1141
1142 for(s=string; *s; s++ )
1143 {
1144 length++;
1145 if( *s & 0x80 )
1146 length++;
1147 }
1148 buffer = xmalloc( length + 1 );
1149 for(p=buffer, s=string; *s; s++ )
1150 {
1151 if( *s & 0x80 )
1152 {
1153 *p++ = 0xc0 | ((*s >> 6) & 3);
1154 *p++ = 0x80 | ( *s & 0x3f );
1155 }
1156 else
1157 *p++ = *s;
1158 }
1159 *p = 0;
1160
1161 return buffer;
1162 }
1163 else
1164 {
1165 /* Everything else is assumed to be UTF-8. We do this even that
1166 we know the encoding is not valid. However as we only test
1167 the first non-ascii character, valid encodings might
1168 follow. */
1169 return xstrdup (string);
1170 }
1171 }
1172
1173 /* Same as asprintf but return an allocated buffer suitable to be
1174 freed using xfree. This function simply dies on memory failure,
1175 thus no extra check is required. */
1176 char *
xasprintf(const char * fmt,...)1177 xasprintf (const char *fmt, ...)
1178 {
1179 va_list ap;
1180 char *buf;
1181
1182 va_start (ap, fmt);
1183 if (estream_vasprintf (&buf, fmt, ap) < 0)
1184 log_fatal ("estream_asprintf failed: %s\n", strerror (errno));
1185 va_end (ap);
1186 return buf;
1187 }
1188
1189 /* Same as above but return NULL on memory failure. */
1190 char *
xtryasprintf(const char * fmt,...)1191 xtryasprintf (const char *fmt, ...)
1192 {
1193 int rc;
1194 va_list ap;
1195 char *buf;
1196
1197 va_start (ap, fmt);
1198 rc = estream_vasprintf (&buf, fmt, ap);
1199 va_end (ap);
1200 if (rc < 0)
1201 return NULL;
1202 return buf;
1203 }
1204
1205
1206 char *
xtryvasprintf(const char * fmt,va_list arg_ptr)1207 xtryvasprintf (const char *fmt, va_list arg_ptr)
1208 {
1209 int rc;
1210 char *buf;
1211
1212 rc = estream_vasprintf (&buf, fmt, arg_ptr);
1213 if (rc < 0)
1214 return NULL;
1215 return buf;
1216 }
1217
1218
1219 static char *
do_strconcat(const char * s1,va_list arg_ptr)1220 do_strconcat (const char *s1, va_list arg_ptr)
1221 {
1222 const char *argv[48];
1223 size_t argc;
1224 size_t needed;
1225 char *buffer, *p;
1226
1227 argc = 0;
1228 argv[argc++] = s1;
1229 needed = strlen (s1);
1230 while (((argv[argc] = va_arg (arg_ptr, const char *))))
1231 {
1232 needed += strlen (argv[argc]);
1233 if (argc >= DIM (argv)-1)
1234 {
1235 errno = EINVAL;
1236 return NULL;
1237 }
1238 argc++;
1239 }
1240 needed++;
1241 buffer = xtrymalloc (needed);
1242 if (buffer)
1243 {
1244 for (p = buffer, argc=0; argv[argc]; argc++)
1245 p = stpcpy (p, argv[argc]);
1246 }
1247 return buffer;
1248 }
1249
1250
1251 /* Concatenate the string S1 with all the following strings up to a
1252 NULL. Returns a malloced buffer with the new string or NULL on a
1253 malloc error or if too many arguments are given. */
1254 char *
strconcat(const char * s1,...)1255 strconcat (const char *s1, ...)
1256 {
1257 va_list arg_ptr;
1258 char *result;
1259
1260 if (!s1)
1261 result = xtrystrdup ("");
1262 else
1263 {
1264 va_start (arg_ptr, s1);
1265 result = do_strconcat (s1, arg_ptr);
1266 va_end (arg_ptr);
1267 }
1268 return result;
1269 }
1270
1271
1272 /****************************************************
1273 ******** locale insensitive ctype functions ********
1274 ****************************************************/
1275 /* FIXME: replace them by a table lookup and macros */
1276 int
ascii_isupper(int c)1277 ascii_isupper (int c)
1278 {
1279 return c >= 'A' && c <= 'Z';
1280 }
1281
1282 int
ascii_islower(int c)1283 ascii_islower (int c)
1284 {
1285 return c >= 'a' && c <= 'z';
1286 }
1287
1288 int
ascii_memcasecmp(const char * a,const char * b,size_t n)1289 ascii_memcasecmp( const char *a, const char *b, size_t n )
1290 {
1291 if (a == b)
1292 return 0;
1293 for ( ; n; n--, a++, b++ ) {
1294 if( *a != *b && ascii_toupper (*a) != ascii_toupper (*b) )
1295 return *a == *b? 0 : (ascii_toupper (*a) - ascii_toupper (*b));
1296 }
1297 return 0;
1298 }
1299
1300 /* Lowercase all ASCII characters in S. */
1301 char *
ascii_strlwr(char * s)1302 ascii_strlwr (char *s)
1303 {
1304 char *p;
1305
1306 for (p = s; *p; p++ )
1307 if (isascii (*p) && *p >= 'A' && *p <= 'Z')
1308 *p |= 0x20;
1309
1310 return s;
1311 }
1312
1313
1314 /*********************************************
1315 ********** missing string functions *********
1316 *********************************************/
1317
1318 #ifndef HAVE_STPCPY
1319 char *
stpcpy(char * a,const char * b)1320 stpcpy(char *a,const char *b)
1321 {
1322 while( *b )
1323 *a++ = *b++;
1324 *a = 0;
1325
1326 return (char*)a;
1327 }
1328 #endif
1329
1330 #ifndef HAVE_STRLWR
1331 char *
strlwr(char * s)1332 strlwr(char *s)
1333 {
1334 char *p;
1335 for(p=s; *p; p++ )
1336 *p = tolower(*(unsigned char *)p);
1337 return s;
1338 }
1339 #endif
1340
1341 #ifndef HAVE_STRCASECMP
1342 int
strcasecmp(const char * a,const char * b)1343 strcasecmp( const char *a, const char *b )
1344 {
1345 for( ; *a && *b; a++, b++ ) {
1346 if( *a != *b
1347 && toupper(*(const byte *)a) != toupper(*(const byte *)b) )
1348 break;
1349 }
1350 return *(const byte*)a - *(const byte*)b;
1351 }
1352 #endif
1353
1354 #ifndef HAVE_STRNCASECMP
1355 int
strncasecmp(const char * a,const char * b,size_t n)1356 strncasecmp( const char *a, const char *b, size_t n )
1357 {
1358 for( ; n && *a && *b; a++, b++, n--) {
1359 if( *a != *b
1360 && toupper(*(const byte *)a) != toupper(*(const byte *)b) )
1361 break;
1362 }
1363 if (!n)
1364 return 0;
1365 return *(const byte*)a - *(const byte*)b;
1366 }
1367 #endif
1368
1369
1370 #ifdef _WIN32
1371 const char *
w32_strerror(int w32_errno)1372 w32_strerror (int w32_errno)
1373 {
1374 static char strerr[256];
1375 int ec = (int)GetLastError ();
1376
1377 if (w32_errno == 0)
1378 w32_errno = ec;
1379 FormatMessage (FORMAT_MESSAGE_FROM_SYSTEM, NULL, w32_errno,
1380 MAKELANGID (LANG_NEUTRAL, SUBLANG_DEFAULT),
1381 strerr, DIM (strerr)-1, NULL);
1382 return strerr;
1383 }
1384 #endif /*_WIN32*/
1385
1386
1387
1388