1 /* strgutil.c -  string utilities
2  * Copyright (C) 1994, 1998, 1999, 2000, 2001,
3  *               2003, 2004, 2005, 2009 Free Software Foundation, Inc.
4  *
5  * This file is part of GnuPG.
6  *
7  * GnuPG is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 3 of the License, or
10  * (at your option) any later version.
11  *
12  * GnuPG is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, see <http://www.gnu.org/licenses/>.
19  */
20 
21 #include <config.h>
22 #include <stdlib.h>
23 #include <string.h>
24 #include <ctype.h>
25 #include <errno.h>
26 #ifdef HAVE_LANGINFO_CODESET
27 #include <langinfo.h>
28 #endif
29 
30 /* For W32 we use dynamic loading of the iconv dll and don't need any
31  * iconv headers at all. */
32 #ifndef _WIN32
33 # ifndef HAVE_ICONV
34 #  undef USE_GNUPG_ICONV
35 # endif
36 #endif
37 
38 #ifdef USE_GNUPG_ICONV
39 # include <limits.h>
40 # ifndef _WIN32
41 #  include <iconv.h>
42 # endif
43 #endif
44 
45 #include "types.h"
46 #include "util.h"
47 #include "memory.h"
48 #include "i18n.h"
49 #include "dynload.h"
50 #include "estream-printf.h"
51 
52 /* Our xasprintf replacements are expected to work with our memory
53    allocator.  Let's test for this here.  */
54 #if !defined(_ESTREAM_PRINTF_MALLOC) || !defined(_ESTREAM_PRINTF_FREE)
55 #error Please define _ESTREAM_PRINTF_MALLOC and _FREE
56 #endif
57 
58 
59 
60 #ifndef USE_GNUPG_ICONV
61 static ushort koi8_unicode[128] = {
62     0x2500,0x2502,0x250c,0x2510,0x2514,0x2518,0x251c,0x2524,
63     0x252c,0x2534,0x253c,0x2580,0x2584,0x2588,0x258c,0x2590,
64     0x2591,0x2592,0x2593,0x2320,0x25a0,0x2219,0x221a,0x2248,
65     0x2264,0x2265,0x00a0,0x2321,0x00b0,0x00b2,0x00b7,0x00f7,
66     0x2550,0x2551,0x2552,0x0451,0x2553,0x2554,0x2555,0x2556,
67     0x2557,0x2558,0x2559,0x255a,0x255b,0x255c,0x255d,0x255e,
68     0x255f,0x2560,0x2561,0x0401,0x2562,0x2563,0x2564,0x2565,
69     0x2566,0x2567,0x2568,0x2569,0x256a,0x256b,0x256c,0x00a9,
70     0x044e,0x0430,0x0431,0x0446,0x0434,0x0435,0x0444,0x0433,
71     0x0445,0x0438,0x0439,0x043a,0x043b,0x043c,0x043d,0x043e,
72     0x043f,0x044f,0x0440,0x0441,0x0442,0x0443,0x0436,0x0432,
73     0x044c,0x044b,0x0437,0x0448,0x044d,0x0449,0x0447,0x044a,
74     0x042e,0x0410,0x0411,0x0426,0x0414,0x0415,0x0424,0x0413,
75     0x0425,0x0418,0x0419,0x041a,0x041b,0x041c,0x041d,0x041e,
76     0x041f,0x042f,0x0420,0x0421,0x0422,0x0423,0x0416,0x0412,
77     0x042c,0x042b,0x0417,0x0428,0x042d,0x0429,0x0427,0x042a
78 };
79 
80 static ushort latin2_unicode[128] = {
81     0x0080,0x0081,0x0082,0x0083,0x0084,0x0085,0x0086,0x0087,
82     0x0088,0x0089,0x008A,0x008B,0x008C,0x008D,0x008E,0x008F,
83     0x0090,0x0091,0x0092,0x0093,0x0094,0x0095,0x0096,0x0097,
84     0x0098,0x0099,0x009A,0x009B,0x009C,0x009D,0x009E,0x009F,
85     0x00A0,0x0104,0x02D8,0x0141,0x00A4,0x013D,0x015A,0x00A7,
86     0x00A8,0x0160,0x015E,0x0164,0x0179,0x00AD,0x017D,0x017B,
87     0x00B0,0x0105,0x02DB,0x0142,0x00B4,0x013E,0x015B,0x02C7,
88     0x00B8,0x0161,0x015F,0x0165,0x017A,0x02DD,0x017E,0x017C,
89     0x0154,0x00C1,0x00C2,0x0102,0x00C4,0x0139,0x0106,0x00C7,
90     0x010C,0x00C9,0x0118,0x00CB,0x011A,0x00CD,0x00CE,0x010E,
91     0x0110,0x0143,0x0147,0x00D3,0x00D4,0x0150,0x00D6,0x00D7,
92     0x0158,0x016E,0x00DA,0x0170,0x00DC,0x00DD,0x0162,0x00DF,
93     0x0155,0x00E1,0x00E2,0x0103,0x00E4,0x013A,0x0107,0x00E7,
94     0x010D,0x00E9,0x0119,0x00EB,0x011B,0x00ED,0x00EE,0x010F,
95     0x0111,0x0144,0x0148,0x00F3,0x00F4,0x0151,0x00F6,0x00F7,
96     0x0159,0x016F,0x00FA,0x0171,0x00FC,0x00FD,0x0163,0x02D9
97 };
98 #endif /*!USE_GNUPG_ICONV*/
99 
100 
101 #ifndef MB_LEN_MAX
102 #define MB_LEN_MAX 16
103 #endif
104 
105 
106 static const char *active_charset_name = "iso-8859-1";
107 static ushort *active_charset = NULL;
108 static int no_translation = 0;
109 static int use_iconv = 0;
110 
111 
112 #ifdef _WIN32
113 typedef void* iconv_t;
114 #ifndef ICONV_CONST
115 #define ICONV_CONST const
116 #endif
117 
118 iconv_t (* __stdcall iconv_open) (const char *tocode, const char *fromcode);
119 size_t  (* __stdcall iconv) (iconv_t cd,
120                              const char **inbuf, size_t *inbytesleft,
121                              char **outbuf, size_t *outbytesleft);
122 int     (* __stdcall iconv_close) (iconv_t cd);
123 
124 #endif /*_WIN32*/
125 
126 
127 
128 #ifdef _WIN32
129 static int
load_libiconv(void)130 load_libiconv (void)
131 {
132   static int done;
133 
134   if (!done)
135     {
136       void *handle;
137 
138       done = 1; /* Do it right now because we might get called recursivly
139                    through gettext.  */
140 
141       handle = dlopen ("iconv.dll", RTLD_LAZY);
142       if (handle)
143         {
144           iconv_open  = dlsym (handle, "libiconv_open");
145           if (iconv_open)
146             iconv      = dlsym (handle, "libiconv");
147           if (iconv)
148             iconv_close = dlsym (handle, "libiconv_close");
149         }
150       if (!handle || !iconv_close)
151         {
152           log_info (_("error loading `%s': %s\n"),
153                      "iconv.dll",  dlerror ());
154           log_info(_("please see http://www.gnupg.org/download/iconv.html "
155                      "for more information\n"));
156           iconv_open = NULL;
157           iconv = NULL;
158           iconv_close = NULL;
159           if (handle)
160               dlclose (handle);
161         }
162     }
163   return iconv_open? 0: -1;
164 }
165 #endif /* _WIN32 */
166 
167 
168 
169 
170 void
free_strlist(STRLIST sl)171 free_strlist( STRLIST sl )
172 {
173     STRLIST sl2;
174 
175     for(; sl; sl = sl2 ) {
176 	sl2 = sl->next;
177 	xfree(sl);
178     }
179 }
180 
181 
182 STRLIST
add_to_strlist(STRLIST * list,const char * string)183 add_to_strlist( STRLIST *list, const char *string )
184 {
185     STRLIST sl;
186 
187     sl = xmalloc( sizeof *sl + strlen(string));
188     sl->flags = 0;
189     strcpy(sl->d, string);
190     sl->next = *list;
191     *list = sl;
192     return sl;
193 }
194 
195 /****************
196  * Same as add_to_strlist() but if is_utf8 is *not* set a conversion
197  * to UTF8 is done
198  */
199 STRLIST
add_to_strlist2(STRLIST * list,const char * string,int is_utf8)200 add_to_strlist2( STRLIST *list, const char *string, int is_utf8 )
201 {
202     STRLIST sl;
203 
204     if( is_utf8 )
205 	sl = add_to_strlist( list, string );
206     else {
207 	char *p = native_to_utf8( string );
208 	sl = add_to_strlist( list, p );
209 	xfree( p );
210     }
211     return sl;
212 }
213 
214 STRLIST
append_to_strlist(STRLIST * list,const char * string)215 append_to_strlist( STRLIST *list, const char *string )
216 {
217     STRLIST r, sl;
218 
219     sl = xmalloc( sizeof *sl + strlen(string));
220     sl->flags = 0;
221     strcpy(sl->d, string);
222     sl->next = NULL;
223     if( !*list )
224 	*list = sl;
225     else {
226 	for( r = *list; r->next; r = r->next )
227 	    ;
228 	r->next = sl;
229     }
230     return sl;
231 }
232 
233 STRLIST
append_to_strlist2(STRLIST * list,const char * string,int is_utf8)234 append_to_strlist2( STRLIST *list, const char *string, int is_utf8 )
235 {
236     STRLIST sl;
237 
238     if( is_utf8 )
239 	sl = append_to_strlist( list, string );
240     else {
241 	char *p = native_to_utf8( string );
242 	sl = append_to_strlist( list, p );
243 	xfree( p );
244     }
245     return sl;
246 }
247 
248 
249 STRLIST
strlist_prev(STRLIST head,STRLIST node)250 strlist_prev( STRLIST head, STRLIST node )
251 {
252     STRLIST n;
253 
254     for(n=NULL; head && head != node; head = head->next )
255 	n = head;
256     return n;
257 }
258 
259 STRLIST
strlist_last(STRLIST node)260 strlist_last( STRLIST node )
261 {
262     if( node )
263 	for( ; node->next ; node = node->next )
264 	    ;
265     return node;
266 }
267 
268 char *
pop_strlist(STRLIST * list)269 pop_strlist( STRLIST *list )
270 {
271   char *str=NULL;
272   STRLIST sl=*list;
273 
274   if(sl)
275     {
276       str=xmalloc(strlen(sl->d)+1);
277       strcpy(str,sl->d);
278 
279       *list=sl->next;
280       xfree(sl);
281     }
282 
283   return str;
284 }
285 
286 /****************
287  * Look for the substring SUB in buffer and return a pointer to that
288  * substring in BUF or NULL if not found.
289  * Comparison is case-insensitive.
290  */
291 const char *
memistr(const char * buf,size_t buflen,const char * sub)292 memistr( const char *buf, size_t buflen, const char *sub )
293 {
294     const byte *t, *s ;
295     size_t n;
296 
297     for( t=buf, n=buflen, s=sub ; n ; t++, n-- )
298 	if( toupper(*t) == toupper(*s) ) {
299 	    for( buf=t++, buflen = n--, s++;
300 		 n && toupper(*t) == toupper(*s); t++, s++, n-- )
301 		;
302 	    if( !*s )
303 		return buf;
304 	    t = buf; n = buflen; s = sub ;
305 	}
306 
307     return NULL ;
308 }
309 
310 const char *
ascii_memistr(const char * buf,size_t buflen,const char * sub)311 ascii_memistr( const char *buf, size_t buflen, const char *sub )
312 {
313     const byte *t, *s ;
314     size_t n;
315 
316     for( t=buf, n=buflen, s=sub ; n ; t++, n-- )
317 	if( ascii_toupper(*t) == ascii_toupper(*s) ) {
318 	    for( buf=t++, buflen = n--, s++;
319 		 n && ascii_toupper(*t) == ascii_toupper(*s); t++, s++, n-- )
320 		;
321 	    if( !*s )
322 		return buf;
323 	    t = buf; n = buflen; s = sub ;
324 	}
325 
326     return NULL ;
327 }
328 
329 
330 /* Like strncpy() but copy at max N-1 bytes and append a '\0'.  With
331  * N given as 0 nothing is copied at all. With DEST given as NULL
332  * sufficient memory is allocated using xmalloc (note that xmalloc is
333  * guaranteed to succeed or to abort the process).  */
334 char *
mem2str(char * dest,const void * src,size_t n)335 mem2str( char *dest , const void *src , size_t n )
336 {
337     char *d;
338     const char *s;
339 
340     if( n ) {
341 	if( !dest )
342 	    dest = xmalloc( n ) ;
343 	d = dest;
344 	s = src ;
345 	for(n--; n && *s; n-- )
346 	    *d++ = *s++;
347 	*d = '\0' ;
348     }
349 
350     return dest ;
351 }
352 
353 
354 /*
355  * Remove leading and trailing white spaces
356  */
357 char *
trim_spaces(char * str)358 trim_spaces( char *str )
359 {
360     char *string, *p, *mark;
361 
362     string = str;
363     /* Find first non space character. */
364     for( p=string; *p && isspace( *(byte*)p ) ; p++ )
365 	;
366     /* Move characters. */
367     for( (mark = NULL); (*string = *p); string++, p++ )
368 	if( isspace( *(byte*)p ) ) {
369 	    if( !mark )
370 		mark = string ;
371 	}
372 	else
373 	    mark = NULL ;
374     if( mark )
375 	*mark = '\0' ;  /* Remove trailing spaces.  */
376 
377     return str ;
378 }
379 
380 
381 
382 unsigned int
trim_trailing_chars(byte * line,unsigned len,const char * trimchars)383 trim_trailing_chars( byte *line, unsigned len, const char *trimchars )
384 {
385     byte *p, *mark;
386     unsigned n;
387 
388     for(mark=NULL, p=line, n=0; n < len; n++, p++ ) {
389 	if( strchr(trimchars, *p ) ) {
390 	    if( !mark )
391 		mark = p;
392 	}
393 	else
394 	    mark = NULL;
395     }
396 
397     if( mark ) {
398 	*mark = 0;
399 	return mark - line;
400     }
401     return len;
402 }
403 
404 /****************
405  * Remove trailing white spaces and return the length of the buffer
406  */
407 unsigned
trim_trailing_ws(byte * line,unsigned len)408 trim_trailing_ws( byte *line, unsigned len )
409 {
410     return trim_trailing_chars( line, len, " \t\r\n" );
411 }
412 
413 
414 unsigned int
check_trailing_chars(const byte * line,unsigned int len,const char * trimchars)415 check_trailing_chars( const byte *line, unsigned int len,
416                       const char *trimchars )
417 {
418     const byte *p, *mark;
419     unsigned int n;
420 
421     for(mark=NULL, p=line, n=0; n < len; n++, p++ ) {
422 	if( strchr(trimchars, *p ) ) {
423 	    if( !mark )
424 		mark = p;
425 	}
426 	else
427 	    mark = NULL;
428     }
429 
430     if( mark ) {
431 	return mark - line;
432     }
433     return len;
434 }
435 
436 
437 /****************
438  * Remove trailing white spaces and return the length of the buffer
439  */
440 unsigned int
check_trailing_ws(const byte * line,unsigned int len)441 check_trailing_ws( const byte *line, unsigned int len )
442 {
443     return check_trailing_chars( line, len, " \t\r\n" );
444 }
445 
446 
447 
448 int
string_count_chr(const char * string,int c)449 string_count_chr( const char *string, int c )
450 {
451     int count;
452     for(count=0; *string; string++ )
453 	if( *string == c )
454 	    count++;
455     return count;
456 }
457 
458 
459 /* Check whether the string has characters not valid in an RFC-822
460    address.  To cope with OpenPGP we ignore non-ascii characters
461    so that for example umlauts are legal in an email address.  An
462    OpenPGP user ID must be utf-8 encoded but there is no strict
463    requirement for RFC-822.  Thus to avoid IDNA encoding we put the
464    address verbatim as utf-8 into the user ID under the assumption
465    that mail programs handle IDNA at a lower level and take OpenPGP
466    user IDs as utf-8.  Note that we can't do an utf-8 encoding
467    checking here because in keygen.c this function is called with the
468    native encoding and native to utf-8 encoding is only done  later.  */
469 int
has_invalid_email_chars(const char * s)470 has_invalid_email_chars (const char *s)
471 {
472   int at_seen=0;
473   const char *valid_chars=
474     "01234567890_-.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
475 
476   for ( ; *s; s++ )
477     {
478       if ( *s & 0x80 )
479         continue; /* We only care about ASCII.  */
480       if ( *s == '@' )
481         at_seen=1;
482       else if ( !at_seen && !( !!strchr( valid_chars, *s ) || *s == '+' ) )
483         return 1;
484       else if ( at_seen && !strchr( valid_chars, *s ) )
485         return 1;
486     }
487   return 0;
488 }
489 
490 
491 /* Check whether NAME represents a valid mailbox according t
492    RFC822. Returns true if so.  */
493 int
is_valid_mailbox(const char * name)494 is_valid_mailbox (const char *name)
495 {
496   return !( !name
497             || !*name
498             || has_invalid_email_chars (name)
499             || string_count_chr (name,'@') != 1
500             || *name == '@'
501             || name[strlen(name)-1] == '@'
502             || name[strlen(name)-1] == '.'
503             || strstr (name, "..") );
504 }
505 
506 
507 
508 #ifdef USE_GNUPG_ICONV
509 static void
handle_iconv_error(const char * to,const char * from,int use_fallback)510 handle_iconv_error (const char *to, const char *from, int use_fallback)
511 {
512   if (errno == EINVAL)
513     {
514       static int shown1, shown2;
515       int x;
516 
517       if (to && !strcmp (to, "utf-8"))
518         {
519           x = shown1;
520           shown1 = 1;
521         }
522       else
523         {
524           x = shown2;
525           shown2 = 1;
526         }
527 
528       if (!x)
529         log_info (_("conversion from `%s' to `%s' not available\n"),
530                   from, to);
531     }
532   else
533     {
534       static int shown;
535 
536       if (!shown)
537         log_info (_("iconv_open failed: %s\n"), strerror (errno));
538       shown = 1;
539     }
540 
541   if (use_fallback)
542     {
543       /* To avoid further error messages we fallback to Latin-1 for the
544          native encoding.  This is justified as one can expect that on a
545          utf-8 enabled system nl_langinfo() will work and thus we won't
546          never get to here.  Thus Latin-1 seems to be a reasonable
547          default.  */
548       active_charset_name = "iso-8859-1";
549       no_translation = 0;
550       active_charset = NULL;
551       use_iconv = 0;
552     }
553 }
554 #endif /*USE_GNUPG_ICONV*/
555 
556 int
set_native_charset(const char * newset)557 set_native_charset( const char *newset )
558 {
559     const char *full_newset;
560 
561     if (!newset) {
562 #ifdef _WIN32
563         static char codepage[30];
564         unsigned int cpno;
565         const char *aliases;
566 
567         /* We are a console program thus we need to use the
568            GetConsoleOutputCP function and not the the GetACP which
569            would give the codepage for a GUI program.  Note this is
570            not a bulletproof detection because GetConsoleCP might
571            return a different one for console input.  Not sure how to
572            cope with that.  If the console Code page is not known we
573            fall back to the system code page.  */
574         cpno = GetConsoleOutputCP ();
575         if (!cpno)
576           cpno = GetACP ();
577         sprintf (codepage, "CP%u", cpno );
578         /* Resolve alias.  We use a long string string and not the
579            usual array to optimize if the code is taken to a DSO.
580            Taken from libiconv 1.9.2. */
581         newset = codepage;
582         for (aliases = ("CP936"   "\0" "GBK" "\0"
583                         "CP1361"  "\0" "JOHAB" "\0"
584                         "CP20127" "\0" "ASCII" "\0"
585                         "CP20866" "\0" "KOI8-R" "\0"
586                         "CP21866" "\0" "KOI8-RU" "\0"
587                         "CP28591" "\0" "ISO-8859-1" "\0"
588                         "CP28592" "\0" "ISO-8859-2" "\0"
589                         "CP28593" "\0" "ISO-8859-3" "\0"
590                         "CP28594" "\0" "ISO-8859-4" "\0"
591                         "CP28595" "\0" "ISO-8859-5" "\0"
592                         "CP28596" "\0" "ISO-8859-6" "\0"
593                         "CP28597" "\0" "ISO-8859-7" "\0"
594                         "CP28598" "\0" "ISO-8859-8" "\0"
595                         "CP28599" "\0" "ISO-8859-9" "\0"
596                         "CP28605" "\0" "ISO-8859-15" "\0"
597 			"CP65001" "\0" "UTF-8" "\0");
598              *aliases;
599              aliases += strlen (aliases) + 1, aliases += strlen (aliases) + 1)
600           {
601             if (!strcmp (codepage, aliases) ||(*aliases == '*' && !aliases[1]))
602               {
603                 newset = aliases + strlen (aliases) + 1;
604                 break;
605               }
606           }
607 
608 #else
609 #ifdef HAVE_LANGINFO_CODESET
610         newset = nl_langinfo (CODESET);
611 #else /* !HAVE_LANGINFO_CODESET */
612         /* Try to get the used charset from environment variables.  */
613         static char codepage[30];
614         const char *lc, *dot, *mod;
615 
616         strcpy (codepage, "iso-8859-1");
617         lc = getenv ("LC_ALL");
618         if (!lc || !*lc) {
619             lc = getenv ("LC_CTYPE");
620             if (!lc || !*lc)
621                 lc = getenv ("LANG");
622         }
623         if (lc && *lc) {
624             dot = strchr (lc, '.');
625             if (dot) {
626                 mod = strchr (++dot, '@');
627                 if (!mod)
628                     mod = dot + strlen (dot);
629                 if (mod - dot < sizeof codepage && dot != mod) {
630                     memcpy (codepage, dot, mod - dot);
631                     codepage [mod - dot] = 0;
632                 }
633             }
634         }
635         newset = codepage;
636 #endif  /* !HAVE_LANGINFO_CODESET */
637 #endif
638     }
639 
640     full_newset = newset;
641     if (strlen (newset) > 3 && !ascii_memcasecmp (newset, "iso", 3)) {
642         newset += 3;
643         if (*newset == '-' || *newset == '_')
644             newset++;
645     }
646 
647     /* Note that we silently assume that plain ASCII is actually meant
648        as Latin-1.  This makes sense because many Unix system don't
649        have their locale set up properly and thus would get annoying
650        error messages and we have to handle all the "bug"
651        reports. Latin-1 has always been the character set used for 8
652        bit characters on Unix systems. */
653     if( !*newset
654         || !ascii_strcasecmp (newset, "8859-1" )
655         || !ascii_strcasecmp (newset, "646" )
656         || !ascii_strcasecmp (newset, "ASCII" )
657         || !ascii_strcasecmp (newset, "ANSI_X3.4-1968" )
658         ) {
659         active_charset_name = "iso-8859-1";
660         no_translation = 0;
661 	active_charset = NULL;
662         use_iconv = 0;
663     }
664     else if( !ascii_strcasecmp (newset, "utf8" )
665              || !ascii_strcasecmp(newset, "utf-8") ) {
666 	active_charset_name = "utf-8";
667         no_translation = 1;
668 	active_charset = NULL;
669         use_iconv = 0;
670     }
671 #ifdef USE_GNUPG_ICONV
672     else {
673       iconv_t cd;
674 
675 #ifdef _WIN32
676       if (load_libiconv ())
677           return G10ERR_GENERAL;
678 #endif /*_WIN32*/
679 
680       cd = iconv_open (full_newset, "utf-8");
681       if (cd == (iconv_t)-1) {
682           handle_iconv_error (full_newset, "utf-8", 0);
683           return G10ERR_GENERAL;
684       }
685       iconv_close (cd);
686       cd = iconv_open ("utf-8", full_newset);
687       if (cd == (iconv_t)-1) {
688           handle_iconv_error ("utf-8", full_newset, 0);
689           return G10ERR_GENERAL;
690       }
691       iconv_close (cd);
692       active_charset_name = full_newset;
693       no_translation = 0;
694       active_charset = NULL;
695       use_iconv = 1;
696     }
697 #else /*!USE_GNUPG_ICONV*/
698     else if( !ascii_strcasecmp( newset, "8859-2" ) ) {
699 	active_charset_name = "iso-8859-2";
700         no_translation = 0;
701 	active_charset = latin2_unicode;
702         use_iconv = 0;
703     }
704     else if( !ascii_strcasecmp( newset, "koi8-r" ) ) {
705 	active_charset_name = "koi8-r";
706         no_translation = 0;
707 	active_charset = koi8_unicode;
708         use_iconv = 0;
709     }
710     else
711 	return G10ERR_GENERAL;
712 #endif /*!USE_GNUPG_ICONV*/
713     return 0;
714 }
715 
716 const char*
get_native_charset()717 get_native_charset()
718 {
719     return active_charset_name;
720 }
721 
722 /****************
723  * Convert string, which is in native encoding to UTF8 and return the
724  * new allocated UTF8 string.
725  */
726 char *
native_to_utf8(const char * string)727 native_to_utf8( const char *string )
728 {
729   const byte *s;
730   char *buffer;
731   byte *p;
732   size_t length=0;
733 
734   if (no_translation)
735     { /* Already utf-8 encoded. */
736       buffer = xstrdup (string);
737     }
738   else if( !active_charset && !use_iconv) /* Shortcut implementation
739                                              for Latin-1.  */
740     {
741       for(s=string; *s; s++ )
742         {
743           length++;
744           if( *s & 0x80 )
745             length++;
746 	}
747       buffer = xmalloc( length + 1 );
748       for(p=buffer, s=string; *s; s++ )
749         {
750           if( *s & 0x80 )
751             {
752               *p++ = 0xc0 | ((*s >> 6) & 3);
753               *p++ = 0x80 | ( *s & 0x3f );
754             }
755           else
756             *p++ = *s;
757         }
758       *p = 0;
759     }
760   else       /* Need to use a translation table. */
761     {
762 #ifdef USE_GNUPG_ICONV
763       iconv_t cd;
764       const char *inptr;
765       char *outptr;
766       size_t inbytes, outbytes;
767 
768       cd = iconv_open ("utf-8", active_charset_name);
769       if (cd == (iconv_t)-1)
770         {
771           handle_iconv_error ("utf-8", active_charset_name, 1);
772           return native_to_utf8 (string);
773         }
774 
775       for (s=string; *s; s++ )
776         {
777           length++;
778           if ((*s & 0x80))
779             length += 5; /* We may need up to 6 bytes for the utf8 output. */
780         }
781       buffer = xmalloc (length + 1);
782 
783       inptr = string;
784       inbytes = strlen (string);
785       outptr = buffer;
786       outbytes = length;
787       if ( iconv (cd, (ICONV_CONST char **)&inptr, &inbytes,
788                   &outptr, &outbytes) == (size_t)-1)
789         {
790           static int shown;
791 
792           if (!shown)
793             log_info (_("conversion from `%s' to `%s' failed: %s\n"),
794                       active_charset_name, "utf-8", strerror (errno));
795           shown = 1;
796           /* We don't do any conversion at all but use the strings as is. */
797           strcpy (buffer, string);
798         }
799       else /* Success.  */
800         {
801           *outptr = 0;
802           /* We could realloc the buffer now but I doubt that it makes
803              much sense given that it will get freed anyway soon
804              after.  */
805         }
806       iconv_close (cd);
807 
808 #else /*!USE_GNUPG_ICONV*/
809       for(s=string; *s; s++ )
810         {
811           length++;
812           if( *s & 0x80 )
813             length += 2; /* We may need up to 3 bytes. */
814         }
815       buffer = xmalloc( length + 1 );
816       for(p=buffer, s=string; *s; s++ ) {
817         if( *s & 0x80 ) {
818           ushort val = active_charset[ *s & 0x7f ];
819           if( val < 0x0800 ) {
820             *p++ = 0xc0 | ( (val >> 6) & 0x1f );
821             *p++ = 0x80 | (  val & 0x3f );
822           }
823           else {
824             *p++ = 0xe0 | ( (val >> 12) & 0x0f );
825             *p++ = 0x80 | ( (val >>  6) & 0x3f );
826             *p++ = 0x80 | (  val & 0x3f );
827           }
828         }
829         else
830           *p++ = *s;
831       }
832       *p = 0;
833 #endif /*!USE_GNUPG_ICONV*/
834 
835     }
836   return buffer;
837 }
838 
839 
840 /****************
841  * Convert string, which is in UTF8 to native encoding.  illegal
842  * encodings by some "\xnn" and quote all control characters. A
843  * character with value DELIM will always be quoted, it must be a
844  * vanilla ASCII character.  A DELIM value of -1 is special: it disables
845  * all quoting of control characters.
846  */
847 char *
utf8_to_native(const char * string,size_t length,int delim)848 utf8_to_native( const char *string, size_t length, int delim )
849 {
850     int nleft;
851     int i;
852     byte encbuf[8];
853     int encidx;
854     const byte *s;
855     size_t n;
856     byte *buffer = NULL, *p = NULL;
857     unsigned long val = 0;
858     size_t slen;
859     int resync = 0;
860 
861     /* 1. pass (p==NULL): count the extended utf-8 characters */
862     /* 2. pass (p!=NULL): create string */
863     for( ;; ) {
864 	for( slen=length, nleft=encidx=0, n=0, s=string; slen; s++, slen-- ) {
865 	    if( resync ) {
866 		if( !(*s < 128 || (*s >= 0xc0 && *s <= 0xfd)) ) {
867 		    /* still invalid */
868 		    if( p ) {
869 			sprintf(p, "\\x%02x", *s );
870 			p += 4;
871 		    }
872 		    n += 4;
873 		    continue;
874 		}
875 		resync = 0;
876 	    }
877 	    if( !nleft ) {
878 		if( !(*s & 0x80) ) { /* plain ascii */
879 		    if( delim != -1
880                         && (*s < 0x20 || *s == 0x7f || *s == delim
881                             || (delim && *s=='\\'))) {
882 			n++;
883 			if( p )
884 			    *p++ = '\\';
885 			switch( *s ) {
886 			  case '\n': n++; if( p ) *p++ = 'n'; break;
887 			  case '\r': n++; if( p ) *p++ = 'r'; break;
888 			  case '\f': n++; if( p ) *p++ = 'f'; break;
889 			  case '\v': n++; if( p ) *p++ = 'v'; break;
890 			  case '\b': n++; if( p ) *p++ = 'b'; break;
891 			  case	 0 : n++; if( p ) *p++ = '0'; break;
892 			  default:
893                             n += 3;
894                             if ( p ) {
895                                 sprintf( p, "x%02x", *s );
896                                 p += 3;
897                             }
898                             break;
899 			}
900 		    }
901 		    else {
902 			if( p ) *p++ = *s;
903 			n++;
904 		    }
905 		}
906 		else if( (*s & 0xe0) == 0xc0 ) { /* 110x xxxx */
907 		    val = *s & 0x1f;
908 		    nleft = 1;
909                     encidx = 0;
910 		    encbuf[encidx++] = *s;
911 		}
912 		else if( (*s & 0xf0) == 0xe0 ) { /* 1110 xxxx */
913 		    val = *s & 0x0f;
914 		    nleft = 2;
915                     encidx = 0;
916 		    encbuf[encidx++] = *s;
917 		}
918 		else if( (*s & 0xf8) == 0xf0 ) { /* 1111 0xxx */
919 		    val = *s & 0x07;
920 		    nleft = 3;
921                     encidx = 0;
922 		    encbuf[encidx++] = *s;
923 		}
924 		else if( (*s & 0xfc) == 0xf8 ) { /* 1111 10xx */
925 		    val = *s & 0x03;
926 		    nleft = 4;
927                     encidx = 0;
928 		    encbuf[encidx++] = *s;
929 		}
930 		else if( (*s & 0xfe) == 0xfc ) { /* 1111 110x */
931 		    val = *s & 0x01;
932 		    nleft = 5;
933                     encidx = 0;
934 		    encbuf[encidx++] = *s;
935 		}
936 		else {	/* invalid encoding: print as \xnn */
937 		    if( p ) {
938 			sprintf(p, "\\x%02x", *s );
939 			p += 4;
940 		    }
941 		    n += 4;
942 		    resync = 1;
943 		}
944 	    }
945 	    else if( *s < 0x80 || *s >= 0xc0 ) { /* invalid */
946 		if( p ) {
947                     for(i=0; i < encidx; i++ ) {
948                         sprintf(p, "\\x%02x", encbuf[i] );
949                         p += 4;
950                     }
951 		    sprintf(p, "\\x%02x", *s );
952 		    p += 4;
953 		}
954 		n += 4 + 4*encidx;
955 		nleft = 0;
956                 encidx = 0;
957 		resync = 1;
958 	    }
959 	    else {
960 		encbuf[encidx++] = *s;
961 		val <<= 6;
962 		val |= *s & 0x3f;
963 		if( !--nleft ) { /* ready */
964                     if (no_translation) {
965                         if( p ) {
966                             for(i=0; i < encidx; i++ )
967                                 *p++ = encbuf[i];
968                         }
969                         n += encidx;
970                         encidx = 0;
971                     }
972 #ifdef USE_GNUPG_ICONV
973                     else if(use_iconv) {
974                         /* Our strategy for using iconv is a bit
975                          * strange but it better keeps compatibility
976                          * with previous versions in regard to how
977                          * invalid encodings are displayed.  What we
978                          * do is to keep the utf-8 as is and have the
979                          * real translation step then at the end.
980                          * Yes, I know that this is ugly.  However we
981                          * are short of the 1.4 release and for this
982                          * branch we should not mee too much around
983                          * with iconv things.  One reason for this is
984                          * that we don't know enough about non-GNU
985                          * iconv implementation and want to minimize
986                          * the risk of breaking the code on too many
987                          * platforms.  */
988                         if( p ) {
989                             for(i=0; i < encidx; i++ )
990                                 *p++ = encbuf[i];
991                         }
992                         n += encidx;
993                         encidx = 0;
994                     }
995 #endif /*USE_GNUPG_ICONV*/
996 		    else if( active_charset ) { /* table lookup */
997 			for(i=0; i < 128; i++ ) {
998 			    if( active_charset[i] == val )
999 				break;
1000 			}
1001 			if( i < 128 ) { /* we can print this one */
1002 			    if( p ) *p++ = i+128;
1003 			    n++;
1004 			}
1005 			else { /* we do not have a translation: print utf8 */
1006 			    if( p ) {
1007 				for(i=0; i < encidx; i++ ) {
1008 				    sprintf(p, "\\x%02x", encbuf[i] );
1009 				    p += 4;
1010 				}
1011 			    }
1012 			    n += encidx*4;
1013                             encidx = 0;
1014 			}
1015 		    }
1016 		    else { /* native set */
1017 			if( val >= 0x80 && val < 256 ) {
1018 			    n++;    /* we can simply print this character */
1019 			    if( p ) *p++ = val;
1020 			}
1021 			else { /* we do not have a translation: print utf8 */
1022 			    if( p ) {
1023 				for(i=0; i < encidx; i++ ) {
1024 				    sprintf(p, "\\x%02x", encbuf[i] );
1025 				    p += 4;
1026 				}
1027 			    }
1028 			    n += encidx*4;
1029                             encidx = 0;
1030 			}
1031 		    }
1032 		}
1033 
1034 	    }
1035 	}
1036 	if( !buffer ) { /* allocate the buffer after the first pass */
1037 	    buffer = p = xmalloc( n + 1 );
1038 	}
1039 #ifdef USE_GNUPG_ICONV
1040         else if(use_iconv) {
1041             /* Note: See above for comments.  */
1042             iconv_t cd;
1043             const char *inptr;
1044             char *outbuf, *outptr;
1045             size_t inbytes, outbytes;
1046 
1047             *p = 0;  /* Terminate the buffer. */
1048 
1049             cd = iconv_open (active_charset_name, "utf-8");
1050             if (cd == (iconv_t)-1)
1051                 {
1052                     handle_iconv_error (active_charset_name, "utf-8", 1);
1053                     xfree (buffer);
1054                     return utf8_to_native (string, length, delim);
1055                 }
1056 
1057             /* Allocate a new buffer large enough to hold all possible
1058              * encodings. */
1059             n = p - buffer + 1;
1060             inbytes = n - 1;;
1061             inptr = buffer;
1062             outbytes = n * MB_LEN_MAX;
1063             if (outbytes / MB_LEN_MAX != n)
1064                 BUG (); /* Actually an overflow. */
1065             outbuf = outptr = xmalloc (outbytes);
1066             if ( iconv (cd, (ICONV_CONST char **)&inptr, &inbytes,
1067                         &outptr, &outbytes) == (size_t)-1) {
1068                 static int shown;
1069 
1070                 if (!shown)
1071                   log_info (_("conversion from `%s' to `%s' failed: %s\n"),
1072                             "utf-8", active_charset_name, strerror (errno));
1073                 shown = 1;
1074                 /* Didn't worked out.  Temporary disable the use of
1075                  * iconv and fall back to our old code. */
1076                 xfree (buffer);
1077                 buffer = NULL;
1078                 xfree (outbuf);
1079                 use_iconv = 0;
1080                 outbuf = utf8_to_native (string, length, delim);
1081                 use_iconv = 1;
1082             }
1083             else { /* Success.  */
1084                 *outptr = 0;
1085                 /* We could realloc the buffer now but I doubt that it makes
1086                    much sense given that it will get freed anyway soon
1087                    after.  */
1088                 xfree (buffer);
1089             }
1090             iconv_close (cd);
1091             return outbuf;
1092         }
1093 #endif /*USE_GNUPG_ICONV*/
1094 	else {
1095 	    *p = 0; /* make a string */
1096 	    return buffer;
1097 	}
1098     }
1099 }
1100 
1101 /* This is similar to native_to_utf8, except it can take any input
1102    (which may or may not be UTF8 encoded) and return something that is
1103    (almost) definitely UTF8.  This code is mostly borrowed from
1104    GPA. */
1105 
1106 char *
string_to_utf8(const char * string)1107 string_to_utf8 (const char *string)
1108 {
1109   const char *s;
1110 
1111   if (!string)
1112     return NULL;
1113 
1114   /* Due to a bug in old and not so old PGP versions user IDs have
1115      been copied verbatim into the key.  Thus many users with Umlauts
1116      et al. in their name will see their names garbled.  Although this
1117      is not an issue for me (;-)), I have a couple of friends with
1118      Umlauts in their name, so let's try to make their life easier by
1119      detecting invalid encodings and convert that to Latin-1.  We use
1120      this even for X.509 because it may make things even better given
1121      all the invalid encodings often found in X.509 certificates.  */
1122   for (s = string; *s && !(*s & 0x80); s++)
1123     ;
1124   if (*s && ((s[1] & 0xc0) == 0x80) && ( ((*s & 0xe0) == 0xc0)
1125                                          || ((*s & 0xf0) == 0xe0)
1126                                          || ((*s & 0xf8) == 0xf0)
1127                                          || ((*s & 0xfc) == 0xf8)
1128                                          || ((*s & 0xfe) == 0xfc)) )
1129     {
1130       /* Possible utf-8 character followed by continuation byte.
1131          Although this might still be Latin-1 we better assume that it
1132          is valid utf-8. */
1133       return xstrdup (string);
1134      }
1135   else if (*s && !strchr (string, 0xc3))
1136     {
1137       size_t length=0;
1138       char *buffer,*p;
1139 
1140       /* No 0xC3 character in the string; assume that it is Latin-1.  */
1141 
1142       for(s=string; *s; s++ )
1143         {
1144           length++;
1145           if( *s & 0x80 )
1146             length++;
1147 	}
1148       buffer = xmalloc( length + 1 );
1149       for(p=buffer, s=string; *s; s++ )
1150         {
1151           if( *s & 0x80 )
1152             {
1153               *p++ = 0xc0 | ((*s >> 6) & 3);
1154               *p++ = 0x80 | ( *s & 0x3f );
1155             }
1156           else
1157             *p++ = *s;
1158         }
1159       *p = 0;
1160 
1161       return buffer;
1162     }
1163   else
1164     {
1165       /* Everything else is assumed to be UTF-8.  We do this even that
1166          we know the encoding is not valid.  However as we only test
1167          the first non-ascii character, valid encodings might
1168          follow.  */
1169       return xstrdup (string);
1170     }
1171 }
1172 
1173 /* Same as asprintf but return an allocated buffer suitable to be
1174    freed using xfree.  This function simply dies on memory failure,
1175    thus no extra check is required. */
1176 char *
xasprintf(const char * fmt,...)1177 xasprintf (const char *fmt, ...)
1178 {
1179   va_list ap;
1180   char *buf;
1181 
1182   va_start (ap, fmt);
1183   if (estream_vasprintf (&buf, fmt, ap) < 0)
1184     log_fatal ("estream_asprintf failed: %s\n", strerror (errno));
1185   va_end (ap);
1186   return buf;
1187 }
1188 
1189 /* Same as above but return NULL on memory failure.  */
1190 char *
xtryasprintf(const char * fmt,...)1191 xtryasprintf (const char *fmt, ...)
1192 {
1193   int rc;
1194   va_list ap;
1195   char *buf;
1196 
1197   va_start (ap, fmt);
1198   rc = estream_vasprintf (&buf, fmt, ap);
1199   va_end (ap);
1200   if (rc < 0)
1201     return NULL;
1202   return buf;
1203 }
1204 
1205 
1206 char *
xtryvasprintf(const char * fmt,va_list arg_ptr)1207 xtryvasprintf (const char *fmt, va_list arg_ptr)
1208 {
1209   int rc;
1210   char *buf;
1211 
1212   rc = estream_vasprintf (&buf, fmt, arg_ptr);
1213   if (rc < 0)
1214     return NULL;
1215   return buf;
1216 }
1217 
1218 
1219 static char *
do_strconcat(const char * s1,va_list arg_ptr)1220 do_strconcat (const char *s1, va_list arg_ptr)
1221 {
1222   const char *argv[48];
1223   size_t argc;
1224   size_t needed;
1225   char *buffer, *p;
1226 
1227   argc = 0;
1228   argv[argc++] = s1;
1229   needed = strlen (s1);
1230   while (((argv[argc] = va_arg (arg_ptr, const char *))))
1231     {
1232       needed += strlen (argv[argc]);
1233       if (argc >= DIM (argv)-1)
1234         {
1235           errno = EINVAL;
1236           return NULL;
1237         }
1238       argc++;
1239     }
1240   needed++;
1241   buffer = xtrymalloc (needed);
1242   if (buffer)
1243     {
1244       for (p = buffer, argc=0; argv[argc]; argc++)
1245         p = stpcpy (p, argv[argc]);
1246     }
1247   return buffer;
1248 }
1249 
1250 
1251 /* Concatenate the string S1 with all the following strings up to a
1252    NULL.  Returns a malloced buffer with the new string or NULL on a
1253    malloc error or if too many arguments are given.  */
1254 char *
strconcat(const char * s1,...)1255 strconcat (const char *s1, ...)
1256 {
1257   va_list arg_ptr;
1258   char *result;
1259 
1260   if (!s1)
1261     result = xtrystrdup ("");
1262   else
1263     {
1264       va_start (arg_ptr, s1);
1265       result = do_strconcat (s1, arg_ptr);
1266       va_end (arg_ptr);
1267     }
1268   return result;
1269 }
1270 
1271 
1272 /****************************************************
1273  ******** locale insensitive ctype functions ********
1274  ****************************************************/
1275 /* FIXME: replace them by a table lookup and macros */
1276 int
ascii_isupper(int c)1277 ascii_isupper (int c)
1278 {
1279     return c >= 'A' && c <= 'Z';
1280 }
1281 
1282 int
ascii_islower(int c)1283 ascii_islower (int c)
1284 {
1285     return c >= 'a' && c <= 'z';
1286 }
1287 
1288 int
ascii_memcasecmp(const char * a,const char * b,size_t n)1289 ascii_memcasecmp( const char *a, const char *b, size_t n )
1290 {
1291     if (a == b)
1292         return 0;
1293     for ( ; n; n--, a++, b++ ) {
1294 	if( *a != *b  && ascii_toupper (*a) != ascii_toupper (*b) )
1295             return *a == *b? 0 : (ascii_toupper (*a) - ascii_toupper (*b));
1296     }
1297     return 0;
1298 }
1299 
1300 /* Lowercase all ASCII characters in S.  */
1301 char *
ascii_strlwr(char * s)1302 ascii_strlwr (char *s)
1303 {
1304   char *p;
1305 
1306   for (p = s; *p; p++ )
1307     if (isascii (*p) && *p >= 'A' && *p <= 'Z')
1308       *p |= 0x20;
1309 
1310   return s;
1311 }
1312 
1313 
1314 /*********************************************
1315  ********** missing string functions *********
1316  *********************************************/
1317 
1318 #ifndef HAVE_STPCPY
1319 char *
stpcpy(char * a,const char * b)1320 stpcpy(char *a,const char *b)
1321 {
1322     while( *b )
1323 	*a++ = *b++;
1324     *a = 0;
1325 
1326     return (char*)a;
1327 }
1328 #endif
1329 
1330 #ifndef HAVE_STRLWR
1331 char *
strlwr(char * s)1332 strlwr(char *s)
1333 {
1334     char *p;
1335     for(p=s; *p; p++ )
1336 	*p = tolower(*(unsigned char *)p);
1337     return s;
1338 }
1339 #endif
1340 
1341 #ifndef HAVE_STRCASECMP
1342 int
strcasecmp(const char * a,const char * b)1343 strcasecmp( const char *a, const char *b )
1344 {
1345     for( ; *a && *b; a++, b++ ) {
1346 	if( *a != *b
1347             && toupper(*(const byte *)a) != toupper(*(const byte *)b) )
1348 	    break;
1349     }
1350     return *(const byte*)a - *(const byte*)b;
1351 }
1352 #endif
1353 
1354 #ifndef HAVE_STRNCASECMP
1355 int
strncasecmp(const char * a,const char * b,size_t n)1356 strncasecmp( const char *a, const char *b, size_t n )
1357 {
1358     for( ; n && *a && *b; a++, b++, n--) {
1359 	if( *a != *b
1360             && toupper(*(const byte *)a) != toupper(*(const byte *)b) )
1361 	    break;
1362     }
1363     if (!n)
1364       return 0;
1365     return *(const byte*)a - *(const byte*)b;
1366 }
1367 #endif
1368 
1369 
1370 #ifdef _WIN32
1371 const char *
w32_strerror(int w32_errno)1372 w32_strerror (int w32_errno)
1373 {
1374   static char strerr[256];
1375   int ec = (int)GetLastError ();
1376 
1377   if (w32_errno == 0)
1378     w32_errno = ec;
1379   FormatMessage (FORMAT_MESSAGE_FROM_SYSTEM, NULL, w32_errno,
1380                  MAKELANGID (LANG_NEUTRAL, SUBLANG_DEFAULT),
1381                  strerr, DIM (strerr)-1, NULL);
1382   return strerr;
1383 }
1384 #endif /*_WIN32*/
1385 
1386 
1387 
1388