1 /***************************************************************************
2  *   copyright           : (C) 2002 by Hendrik Sattler                     *
3  *   mail                : post@hendrik-sattler.de                         *
4  *                                                                         *
5  *   This program is free software; you can redistribute it and/or modify  *
6  *   it under the terms of the GNU General Public License as published by  *
7  *   the Free Software Foundation; either version 2 of the License, or     *
8  *   (at your option) any later version.                                   *
9  *                                                                         *
10  ***************************************************************************/
11 
12 //own headers
13 #include <charsets.h>
14 #include <helper.h>
15 #include <gtincl.h>
16 #include <intincl.h>
17 #include "ucs4.h"
18 
19 //standard headers
20 #include <stdio.h>
21 #include <string.h>
22 #include <errno.h>
23 #include <iconv.h>
24 #include <limits.h>
25 
26 
27 static
replace_char_escape(char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft)28 size_t replace_char_escape (char **inbuf, size_t *inbytesleft,
29                             char **outbuf, size_t *outbytesleft)
30 {
31   uint16_t temp = (uint16_t)(*((ucs4char_t*)*inbuf) & UINT16_MAX);
32 
33   if (inbuf == NULL || *inbuf == NULL ||
34       outbuf == NULL || *outbuf == NULL ||
35       *inbytesleft <= 0) {
36     return 0;
37   }
38   if (*outbytesleft < 5) {
39     return (size_t)-1;
40   }
41   /* do this endianess independent */
42   sprintf(*outbuf,"\\%02X%02X",(temp>>8)&0xFF,temp&0xFF);
43   *inbuf += sizeof(ucs4char_t);
44   *inbytesleft -= sizeof(ucs4char_t);
45   *outbuf += 5;
46   *outbytesleft -= 5;
47   return 0;
48 }
49 
50 static
replace_char_questionmark(char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft)51 size_t replace_char_questionmark (char **inbuf, size_t *inbytesleft,
52 				  char **outbuf, size_t *outbytesleft)
53 {
54   if (inbuf == NULL || *inbuf == NULL ||
55       outbuf == NULL || *outbuf == NULL ||
56       *inbytesleft <= 0) {
57     return 0;
58   }
59   if (*outbytesleft < 5) {
60     return (size_t)-1;
61   }
62   sprintf(*outbuf,"?");
63   *inbuf += sizeof(ucs4char_t);
64   *inbytesleft -= sizeof(ucs4char_t);
65   *outbuf += 1;
66   *outbytesleft -= 1;
67   return 0;
68 }
69 
70 static
replace_char(enum repmode replacement_mode,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft)71 size_t replace_char(enum repmode replacement_mode,
72 		    char **inbuf, size_t *inbytesleft,
73 		    char **outbuf, size_t *outbytesleft)
74 {
75   size_t retval = 0;
76   switch(replacement_mode) {
77   case REPMODE_IGNORE:
78     fprintf(stderr,_("%s: Unicode character 0x%lx cannot be converted.\n"),_("Error"),
79             (unsigned long)*((ucs4char_t*)*inbuf));
80     break;
81   case REPMODE_ESCAPE_CHARS:
82     retval = replace_char_escape(inbuf,inbytesleft,outbuf,outbytesleft);
83     break;
84   case REPMODE_QUESTIONMARK:
85     retval = replace_char_questionmark(inbuf,inbytesleft,outbuf,outbytesleft);
86     break;
87   }
88   return retval;
89 }
90 
convert_from_internal(const char * to_code,ucs4char_t * input,enum repmode replacement_mode)91 char* convert_from_internal (const char* to_code,
92 			     ucs4char_t* input,
93 			     enum repmode replacement_mode)
94 {
95   iconv_t cd;
96   size_t status;
97   int estatus;
98   char* retval;
99 
100   size_t insize = sizeof(ucs4char_t);
101   size_t insize_conv;
102   char* inptr = (char*)input;
103   char* inptr_conv = inptr;
104 
105   /* must be at least 5 for replace_char functions */
106   const size_t mult = (MB_LEN_MAX>5)?MB_LEN_MAX:5;
107 
108   /* this should be enough for every possible locale
109    * MB_CUR_MAX can NOT be used!
110    */
111   size_t outsize = ucs4len(input)*mult;
112   size_t outsize_conv = outsize;
113   char* outptr;
114   char* outptr_conv;
115 
116   if (to_code == NULL || input == NULL) return NULL;
117 
118   cd = iconv_open(to_code,ucs4_get_iconv_charset());
119   if (cd == (iconv_t)-1) {
120     fprintf(stderr,_("Error on text conversion from charset \"%s\" to charset \"%s\": %s\n"),
121 	    ucs4_get_iconv_charset(),to_code,strerror(errno));
122     exit(EXIT_FAILURE);
123   }
124 
125   retval = mem_alloc(outsize+mult,1); //not to be modified later
126   outptr = retval;
127   outptr_conv = retval;
128 
129   while (ucs4len((ucs4char_t*)inptr) > 0) {
130     insize_conv = insize;
131     status = iconv(cd,
132 		   (ICONV_CAST)&inptr_conv,&insize_conv,
133 		   &outptr_conv,&outsize_conv);
134     estatus = errno;
135     /* the character conversion may have failed
136      * because the target charset has no such char
137      */
138     if (status > (size_t)0) {
139       insize_conv = insize;
140       //set the vars back to before conversion try
141       inptr_conv = inptr;
142       outptr_conv = outptr;
143       outsize_conv = outsize;
144       status = replace_char(replacement_mode,
145 			    &inptr_conv,&insize_conv,
146 			    &outptr_conv,&outsize_conv);
147       if (status == (size_t)-1) {
148 	//there is only one implemented
149 	estatus = E2BIG;
150       }
151     }
152     /* the character conversion/replacement may be buggy
153      */
154     if (status == (size_t)-1) {
155       switch (estatus) {
156       case E2BIG: //we have to resize outbuf, should never happen
157 	fprintf(stderr,"%s: %s %s\n",_("Error"),
158 	        _("insufficient memory on unicode decoding."),
159                 _("Please report as bug."));
160 	exit(EXIT_FAILURE);
161 	break;
162       case EINVAL:
163       case EILSEQ:
164 	fprintf(stderr,"%s: %s\n",_("Error with internal charset"),strerror(estatus));
165 	exit(EXIT_FAILURE);
166 	break;
167       default:
168 	fprintf(stderr,"%s: %s\n",_("Error"),strerror(estatus));
169 	exit(EXIT_FAILURE);
170 	break;
171       }
172     }
173     //we update the loop-external values, too
174     inptr = inptr_conv;
175     outptr = outptr_conv;
176     outsize = outsize_conv;
177   }
178   iconv_close(cd);
179 
180   return retval;
181 }
182