1 /*@ S-nail - a mail user agent derived from Berkeley Mail.
2 *@ Implementation of iconv.h.
3 *
4 * Copyright (c) 2000-2004 Gunnar Ritter, Freiburg i. Br., Germany.
5 * Copyright (c) 2012 - 2020 Steffen (Daode) Nurpmeso <steffen@sdaoden.eu>.
6 * SPDX-License-Identifier: BSD-3-Clause
7 */
8 /*
9 * Copyright (c) 1980, 1993
10 * The Regents of the University of California. All rights reserved.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 */
36 #undef su_FILE
37 #define su_FILE iconv
38 #define mx_SOURCE
39
40 #ifndef mx_HAVE_AMALGAMATION
41 # include "mx/nail.h"
42 #endif
43
44 #include <su/cs.h>
45 #include <su/mem.h>
46 #include <su/utf.h>
47
48 #include "mx/iconv.h"
49 /* TODO fake */
50 #include "su/code-in.h"
51
52 #ifdef mx_HAVE_ICONV
53 s32 n_iconv_err_no; /* TODO HACK: part of CTX to not get lost */
54 iconv_t iconvd;
55 #endif
56
57 char *
n_iconv_normalize_name(char const * cset)58 n_iconv_normalize_name(char const *cset){
59 char *cp, c, *tcp, tc;
60 boole any;
61 NYD2_IN;
62
63 /* We need to strip //SUFFIXes off, we want to normalize to all lowercase,
64 * and we perform some slight content testing, too */
65 for(any = FAL0, cp = n_UNCONST(cset); (c = *cp) != '\0'; ++cp){
66 if(!su_cs_is_alnum(c) && !su_cs_is_punct(c)){
67 n_err(_("Invalid character set name %s\n"),
68 n_shexp_quote_cp(cset, FAL0));
69 cset = NULL;
70 goto jleave;
71 }else if(c == '/')
72 break;
73 else if(su_cs_is_upper(c))
74 any = TRU1;
75 }
76
77 if(any || c != '\0'){
78 cp = savestrbuf(cset, P2UZ(cp - cset));
79 for(tcp = cp; (tc = *tcp) != '\0'; ++tcp)
80 *tcp = su_cs_to_lower(tc);
81
82 if(c != '\0' && (n_poption & n_PO_D_V))
83 n_err(_("Stripped off character set suffix: %s -> %s\n"),
84 n_shexp_quote_cp(cset, FAL0), n_shexp_quote_cp(cp, FAL0));
85
86 cset = cp;
87 }
88
89 /* And some names just cannot be used as such */
90 if((!su_cs_cmp_case(cset, "unknown-8bit") ||
91 !su_cs_cmp_case(cset, "binary")) &&
92 (cset = ok_vlook(charset_unknown_8bit)) == NIL)
93 cset = ok_vlook(CHARSET_8BIT_OKEY);
94
95 jleave:
96 NYD2_OU;
97 return n_UNCONST(cset);
98 }
99
100 boole
n_iconv_name_is_ascii(char const * cset)101 n_iconv_name_is_ascii(char const *cset){ /* TODO ctext/su */
102 /* In reversed MIME preference order */
103 static char const * const names[] = {"csASCII", "cp367", "IBM367", "us",
104 "ISO646-US", "ISO_646.irv:1991", "ANSI_X3.4-1986", "iso-ir-6",
105 "ANSI_X3.4-1968", "ASCII", "US-ASCII"};
106 boole rv;
107 char const * const *npp;
108 NYD2_IN;
109
110 npp = &names[NELEM(names)];
111 do if((rv = !su_cs_cmp_case(cset, *--npp)))
112 break;
113 while((rv = (npp != &names[0])));
114 NYD2_OU;
115 return rv;
116 }
117
118 #ifdef mx_HAVE_ICONV
119 iconv_t
n_iconv_open(char const * tocode,char const * fromcode)120 n_iconv_open(char const *tocode, char const *fromcode){
121 iconv_t id;
122 NYD_IN;
123
124 tocode = n_iconv_normalize_name(tocode);
125 fromcode = n_iconv_normalize_name(fromcode);
126
127 id = iconv_open(tocode, fromcode);
128
129 /* If the encoding names are equal at this point, they are just not
130 * understood by iconv(), and we cannot sensibly use it in any way. We do
131 * not perform this as an optimization above since iconv() can otherwise be
132 * used to check the validity of the input even with identical encoding
133 * names */
134 if (id == (iconv_t)-1 && !su_cs_cmp_case(tocode, fromcode))
135 su_err_set_no(su_ERR_NONE);
136 NYD_OU;
137 return id;
138 }
139
140 void
n_iconv_close(iconv_t cd)141 n_iconv_close(iconv_t cd){
142 NYD_IN;
143 iconv_close(cd);
144 if(cd == iconvd)
145 iconvd = (iconv_t)-1;
146 NYD_OU;
147 }
148
149 void
n_iconv_reset(iconv_t cd)150 n_iconv_reset(iconv_t cd){
151 NYD_IN;
152 iconv(cd, NULL, NULL, NULL, NULL);
153 NYD_OU;
154 }
155
156 /* (2012-09-24: export and use it exclusively to isolate prototype problems
157 * (*inb* is 'char const **' except in POSIX) in a single place.
158 * GNU libiconv even allows for configuration time const/non-const..
159 * In the end it's an ugly guess, but we can't do better since make(1) doesn't
160 * support compiler invocations which bail on error, so no -Werror */
161 /* Citrus project? */
162 # if defined _ICONV_H_ && defined __ICONV_F_HIDE_INVALID
163 /* DragonFly 3.2.1 is special TODO newer DragonFly too, but different */
164 # if su_OS_DRAGONFLY
165 # define a_X(X) S(char** __restrict__,S(void*,UNCONST(char*,X)))
166 # else
167 # define a_X(X) S(char const**,S(void*,UNCONST(char*,X)))
168 # endif
169 # elif su_OS_SUNOS || su_OS_SOLARIS
170 # define a_X(X) S(char const** __restrict__,S(void*,UNCONST(char*,X)))
171 # endif
172 # ifndef a_X
173 # define a_X(X) S(char**,S(void*,UNCONST(char*,X)))
174 # endif
175
176 int
n_iconv_buf(iconv_t cd,enum n_iconv_flags icf,char const ** inb,uz * inbleft,char ** outb,uz * outbleft)177 n_iconv_buf(iconv_t cd, enum n_iconv_flags icf,
178 char const **inb, uz *inbleft, char **outb, uz *outbleft){
179 int err;
180 NYD2_IN;
181
182 if((icf & n_ICONV_UNIREPL) && !(n_psonce & n_PSO_UNICODE))
183 icf &= ~n_ICONV_UNIREPL;
184
185 for(;;){
186 uz i;
187
188 if((i = iconv(cd, a_X(inb), inbleft, outb, outbleft)) == 0)
189 break;
190 if(i != (uz)-1){
191 if(!(icf & n_ICONV_IGN_NOREVERSE)){
192 err = su_ERR_NOENT;
193 goto jleave;
194 }
195 break;
196 }
197
198 if((err = su_err_no()) == su_ERR_2BIG)
199 goto jleave;
200
201 if(!(icf & n_ICONV_IGN_ILSEQ) || err != su_ERR_ILSEQ)
202 goto jleave;
203 if(*inbleft > 0){
204 ++(*inb);
205 --(*inbleft);
206 if(icf & n_ICONV_UNIREPL){
207 if(*outbleft >= sizeof(su_utf8_replacer) -1){
208 su_mem_copy(*outb, su_utf8_replacer,
209 sizeof(su_utf8_replacer) -1);
210 *outb += sizeof(su_utf8_replacer) -1;
211 *outbleft -= sizeof(su_utf8_replacer) -1;
212 continue;
213 }
214 }else if(*outbleft > 0){
215 *(*outb)++ = '?';
216 --*outbleft;
217 continue;
218 }
219 err = su_ERR_2BIG;
220 goto jleave;
221 }else if(*outbleft > 0){
222 **outb = '\0';
223 goto jleave;
224 }
225 }
226 err = 0;
227 jleave:
228 n_iconv_err_no = err;
229 NYD2_OU;
230 return err;
231 }
232 # undef a_X
233
234 int
n_iconv_str(iconv_t cd,enum n_iconv_flags icf,struct str * out,struct str const * in,struct str * in_rest_or_null)235 n_iconv_str(iconv_t cd, enum n_iconv_flags icf,
236 struct str *out, struct str const *in, struct str *in_rest_or_null){
237 struct n_string s_b, *s;
238 char const *ib;
239 int err;
240 uz il;
241 NYD2_IN;
242
243 il = in->l;
244 if(!n_string_get_can_book(il) || !n_string_get_can_book(out->l)){
245 err = su_ERR_INVAL;
246 goto j_leave;
247 }
248 ib = in->s;
249
250 s = n_string_creat(&s_b);
251 s = n_string_take_ownership(s, out->s, out->l, 0);
252
253 for(;;){
254 char *ob_base, *ob;
255 uz ol, nol;
256
257 if((nol = ol = s->s_len) < il)
258 nol = il;
259 ASSERT(sizeof(s->s_len) == sizeof(u32));
260 if(nol < 128)
261 nol += 32;
262 else{
263 u64 xnol;
264
265 xnol = (u64)(nol << 1) - (nol >> 4);
266 if(!n_string_can_book(s, xnol)){
267 xnol = ol + 64;
268 if(!n_string_can_book(s, xnol)){
269 err = su_ERR_INVAL;
270 goto jleave;
271 }
272 }
273 nol = (uz)xnol;
274 }
275 s = n_string_resize(s, nol);
276
277 ob = ob_base = &s->s_dat[ol];
278 nol -= ol;
279 err = n_iconv_buf(cd, icf, &ib, &il, &ob, &nol);
280
281 s = n_string_trunc(s, ol + P2UZ(ob - ob_base));
282 if(err == 0 || err != su_ERR_2BIG)
283 break;
284 }
285
286 if(in_rest_or_null != NULL){
287 in_rest_or_null->s = n_UNCONST(ib);
288 in_rest_or_null->l = il;
289 }
290
291 jleave:
292 out->s = n_string_cp(s);
293 out->l = s->s_len;
294 s = n_string_drop_ownership(s);
295 /* n_string_gut(s)*/
296 j_leave:
297 NYD2_OU;
298 return err;
299 }
300
301 char *
n_iconv_onetime_cp(enum n_iconv_flags icf,char const * tocode,char const * fromcode,char const * input)302 n_iconv_onetime_cp(enum n_iconv_flags icf,
303 char const *tocode, char const *fromcode, char const *input){
304 struct str out, in;
305 iconv_t icd;
306 char *rv;
307 NYD2_IN;
308
309 rv = NULL;
310 if(tocode == NULL)
311 tocode = ok_vlook(ttycharset);
312 if(fromcode == NULL)
313 fromcode = "utf-8";
314
315 if((icd = iconv_open(tocode, fromcode)) == (iconv_t)-1)
316 goto jleave;
317
318 in.l = su_cs_len(in.s = n_UNCONST(input)); /* logical */
319 out.s = NULL, out.l = 0;
320 if(!n_iconv_str(icd, icf, &out, &in, NULL))
321 rv = savestrbuf(out.s, out.l);
322 if(out.s != NULL)
323 n_free(out.s);
324
325 iconv_close(icd);
326 jleave:
327 NYD2_OU;
328 return rv;
329 }
330 #endif /* mx_HAVE_ICONV */
331
332 #include "su/code-ou.h"
333 /* s-it-mode */
334