1 /***************************************************************************
2 * copyright : (C) 2002 by Hendrik Sattler *
3 * mail : post@hendrik-sattler.de *
4 * *
5 * This program is free software; you can redistribute it and/or modify *
6 * it under the terms of the GNU General Public License as published by *
7 * the Free Software Foundation; either version 2 of the License, or *
8 * (at your option) any later version. *
9 * *
10 ***************************************************************************/
11
12 //own headers
13 #include <charsets.h>
14 #include <helper.h>
15 #include <gtincl.h>
16 #include <intincl.h>
17 #include "ucs4.h"
18
19 //standard headers
20 #include <stdio.h>
21 #include <string.h>
22 #include <errno.h>
23 #include <iconv.h>
24 #include <limits.h>
25
26
27 static
replace_char_escape(char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft)28 size_t replace_char_escape (char **inbuf, size_t *inbytesleft,
29 char **outbuf, size_t *outbytesleft)
30 {
31 uint16_t temp = (uint16_t)(*((ucs4char_t*)*inbuf) & UINT16_MAX);
32
33 if (inbuf == NULL || *inbuf == NULL ||
34 outbuf == NULL || *outbuf == NULL ||
35 *inbytesleft <= 0) {
36 return 0;
37 }
38 if (*outbytesleft < 5) {
39 return (size_t)-1;
40 }
41 /* do this endianess independent */
42 sprintf(*outbuf,"\\%02X%02X",(temp>>8)&0xFF,temp&0xFF);
43 *inbuf += sizeof(ucs4char_t);
44 *inbytesleft -= sizeof(ucs4char_t);
45 *outbuf += 5;
46 *outbytesleft -= 5;
47 return 0;
48 }
49
50 static
replace_char_questionmark(char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft)51 size_t replace_char_questionmark (char **inbuf, size_t *inbytesleft,
52 char **outbuf, size_t *outbytesleft)
53 {
54 if (inbuf == NULL || *inbuf == NULL ||
55 outbuf == NULL || *outbuf == NULL ||
56 *inbytesleft <= 0) {
57 return 0;
58 }
59 if (*outbytesleft < 5) {
60 return (size_t)-1;
61 }
62 sprintf(*outbuf,"?");
63 *inbuf += sizeof(ucs4char_t);
64 *inbytesleft -= sizeof(ucs4char_t);
65 *outbuf += 1;
66 *outbytesleft -= 1;
67 return 0;
68 }
69
70 static
replace_char(enum repmode replacement_mode,char ** inbuf,size_t * inbytesleft,char ** outbuf,size_t * outbytesleft)71 size_t replace_char(enum repmode replacement_mode,
72 char **inbuf, size_t *inbytesleft,
73 char **outbuf, size_t *outbytesleft)
74 {
75 size_t retval = 0;
76 switch(replacement_mode) {
77 case REPMODE_IGNORE:
78 fprintf(stderr,_("%s: Unicode character 0x%lx cannot be converted.\n"),_("Error"),
79 (unsigned long)*((ucs4char_t*)*inbuf));
80 break;
81 case REPMODE_ESCAPE_CHARS:
82 retval = replace_char_escape(inbuf,inbytesleft,outbuf,outbytesleft);
83 break;
84 case REPMODE_QUESTIONMARK:
85 retval = replace_char_questionmark(inbuf,inbytesleft,outbuf,outbytesleft);
86 break;
87 }
88 return retval;
89 }
90
convert_from_internal(const char * to_code,ucs4char_t * input,enum repmode replacement_mode)91 char* convert_from_internal (const char* to_code,
92 ucs4char_t* input,
93 enum repmode replacement_mode)
94 {
95 iconv_t cd;
96 size_t status;
97 int estatus;
98 char* retval;
99
100 size_t insize = sizeof(ucs4char_t);
101 size_t insize_conv;
102 char* inptr = (char*)input;
103 char* inptr_conv = inptr;
104
105 /* must be at least 5 for replace_char functions */
106 const size_t mult = (MB_LEN_MAX>5)?MB_LEN_MAX:5;
107
108 /* this should be enough for every possible locale
109 * MB_CUR_MAX can NOT be used!
110 */
111 size_t outsize = ucs4len(input)*mult;
112 size_t outsize_conv = outsize;
113 char* outptr;
114 char* outptr_conv;
115
116 if (to_code == NULL || input == NULL) return NULL;
117
118 cd = iconv_open(to_code,ucs4_get_iconv_charset());
119 if (cd == (iconv_t)-1) {
120 fprintf(stderr,_("Error on text conversion from charset \"%s\" to charset \"%s\": %s\n"),
121 ucs4_get_iconv_charset(),to_code,strerror(errno));
122 exit(EXIT_FAILURE);
123 }
124
125 retval = mem_alloc(outsize+mult,1); //not to be modified later
126 outptr = retval;
127 outptr_conv = retval;
128
129 while (ucs4len((ucs4char_t*)inptr) > 0) {
130 insize_conv = insize;
131 status = iconv(cd,
132 (ICONV_CAST)&inptr_conv,&insize_conv,
133 &outptr_conv,&outsize_conv);
134 estatus = errno;
135 /* the character conversion may have failed
136 * because the target charset has no such char
137 */
138 if (status > (size_t)0) {
139 insize_conv = insize;
140 //set the vars back to before conversion try
141 inptr_conv = inptr;
142 outptr_conv = outptr;
143 outsize_conv = outsize;
144 status = replace_char(replacement_mode,
145 &inptr_conv,&insize_conv,
146 &outptr_conv,&outsize_conv);
147 if (status == (size_t)-1) {
148 //there is only one implemented
149 estatus = E2BIG;
150 }
151 }
152 /* the character conversion/replacement may be buggy
153 */
154 if (status == (size_t)-1) {
155 switch (estatus) {
156 case E2BIG: //we have to resize outbuf, should never happen
157 fprintf(stderr,"%s: %s %s\n",_("Error"),
158 _("insufficient memory on unicode decoding."),
159 _("Please report as bug."));
160 exit(EXIT_FAILURE);
161 break;
162 case EINVAL:
163 case EILSEQ:
164 fprintf(stderr,"%s: %s\n",_("Error with internal charset"),strerror(estatus));
165 exit(EXIT_FAILURE);
166 break;
167 default:
168 fprintf(stderr,"%s: %s\n",_("Error"),strerror(estatus));
169 exit(EXIT_FAILURE);
170 break;
171 }
172 }
173 //we update the loop-external values, too
174 inptr = inptr_conv;
175 outptr = outptr_conv;
176 outsize = outsize_conv;
177 }
178 iconv_close(cd);
179
180 return retval;
181 }
182