1 /*@ S-nail - a mail user agent derived from Berkeley Mail.
2  *@ Implementation of iconv.h.
3  *
4  * Copyright (c) 2000-2004 Gunnar Ritter, Freiburg i. Br., Germany.
5  * Copyright (c) 2012 - 2020 Steffen (Daode) Nurpmeso <steffen@sdaoden.eu>.
6  * SPDX-License-Identifier: BSD-3-Clause
7  */
8 /*
9  * Copyright (c) 1980, 1993
10  *      The Regents of the University of California.  All rights reserved.
11  *
12  * Redistribution and use in source and binary forms, with or without
13  * modification, are permitted provided that the following conditions
14  * are met:
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice, this list of conditions and the following disclaimer.
17  * 2. Redistributions in binary form must reproduce the above copyright
18  *    notice, this list of conditions and the following disclaimer in the
19  *    documentation and/or other materials provided with the distribution.
20  * 3. Neither the name of the University nor the names of its contributors
21  *    may be used to endorse or promote products derived from this software
22  *    without specific prior written permission.
23  *
24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  */
36 #undef su_FILE
37 #define su_FILE iconv
38 #define mx_SOURCE
39 
40 #ifndef mx_HAVE_AMALGAMATION
41 # include "mx/nail.h"
42 #endif
43 
44 #include <su/cs.h>
45 #include <su/mem.h>
46 #include <su/utf.h>
47 
48 #include "mx/iconv.h"
49 /* TODO fake */
50 #include "su/code-in.h"
51 
52 #ifdef mx_HAVE_ICONV
53 s32 n_iconv_err_no; /* TODO HACK: part of CTX to not get lost */
54 iconv_t iconvd;
55 #endif
56 
57 char *
n_iconv_normalize_name(char const * cset)58 n_iconv_normalize_name(char const *cset){
59    char *cp, c, *tcp, tc;
60    boole any;
61    NYD2_IN;
62 
63    /* We need to strip //SUFFIXes off, we want to normalize to all lowercase,
64     * and we perform some slight content testing, too */
65    for(any = FAL0, cp = n_UNCONST(cset); (c = *cp) != '\0'; ++cp){
66       if(!su_cs_is_alnum(c) && !su_cs_is_punct(c)){
67          n_err(_("Invalid character set name %s\n"),
68             n_shexp_quote_cp(cset, FAL0));
69          cset = NULL;
70          goto jleave;
71       }else if(c == '/')
72          break;
73       else if(su_cs_is_upper(c))
74          any = TRU1;
75    }
76 
77    if(any || c != '\0'){
78       cp = savestrbuf(cset, P2UZ(cp - cset));
79       for(tcp = cp; (tc = *tcp) != '\0'; ++tcp)
80          *tcp = su_cs_to_lower(tc);
81 
82       if(c != '\0' && (n_poption & n_PO_D_V))
83          n_err(_("Stripped off character set suffix: %s -> %s\n"),
84             n_shexp_quote_cp(cset, FAL0), n_shexp_quote_cp(cp, FAL0));
85 
86       cset = cp;
87    }
88 
89    /* And some names just cannot be used as such */
90    if((!su_cs_cmp_case(cset, "unknown-8bit") ||
91             !su_cs_cmp_case(cset, "binary")) &&
92          (cset = ok_vlook(charset_unknown_8bit)) == NIL)
93       cset = ok_vlook(CHARSET_8BIT_OKEY);
94 
95 jleave:
96    NYD2_OU;
97    return n_UNCONST(cset);
98 }
99 
100 boole
n_iconv_name_is_ascii(char const * cset)101 n_iconv_name_is_ascii(char const *cset){ /* TODO ctext/su */
102    /* In reversed MIME preference order */
103    static char const * const names[] = {"csASCII", "cp367", "IBM367", "us",
104          "ISO646-US", "ISO_646.irv:1991", "ANSI_X3.4-1986", "iso-ir-6",
105          "ANSI_X3.4-1968", "ASCII", "US-ASCII"};
106    boole rv;
107    char const * const *npp;
108    NYD2_IN;
109 
110    npp = &names[NELEM(names)];
111    do if((rv = !su_cs_cmp_case(cset, *--npp)))
112       break;
113    while((rv = (npp != &names[0])));
114    NYD2_OU;
115    return rv;
116 }
117 
118 #ifdef mx_HAVE_ICONV
119 iconv_t
n_iconv_open(char const * tocode,char const * fromcode)120 n_iconv_open(char const *tocode, char const *fromcode){
121    iconv_t id;
122    NYD_IN;
123 
124    tocode = n_iconv_normalize_name(tocode);
125    fromcode = n_iconv_normalize_name(fromcode);
126 
127    id = iconv_open(tocode, fromcode);
128 
129    /* If the encoding names are equal at this point, they are just not
130     * understood by iconv(), and we cannot sensibly use it in any way.  We do
131     * not perform this as an optimization above since iconv() can otherwise be
132     * used to check the validity of the input even with identical encoding
133     * names */
134    if (id == (iconv_t)-1 && !su_cs_cmp_case(tocode, fromcode))
135       su_err_set_no(su_ERR_NONE);
136    NYD_OU;
137    return id;
138 }
139 
140 void
n_iconv_close(iconv_t cd)141 n_iconv_close(iconv_t cd){
142    NYD_IN;
143    iconv_close(cd);
144    if(cd == iconvd)
145       iconvd = (iconv_t)-1;
146    NYD_OU;
147 }
148 
149 void
n_iconv_reset(iconv_t cd)150 n_iconv_reset(iconv_t cd){
151    NYD_IN;
152    iconv(cd, NULL, NULL, NULL, NULL);
153    NYD_OU;
154 }
155 
156 /* (2012-09-24: export and use it exclusively to isolate prototype problems
157  * (*inb* is 'char const **' except in POSIX) in a single place.
158  * GNU libiconv even allows for configuration time const/non-const..
159  * In the end it's an ugly guess, but we can't do better since make(1) doesn't
160  * support compiler invocations which bail on error, so no -Werror */
161 /* Citrus project? */
162 # if defined _ICONV_H_ && defined __ICONV_F_HIDE_INVALID
163   /* DragonFly 3.2.1 is special TODO newer DragonFly too, but different */
164 #  if su_OS_DRAGONFLY
165 #   define a_X(X) S(char** __restrict__,S(void*,UNCONST(char*,X)))
166 #  else
167 #   define a_X(X) S(char const**,S(void*,UNCONST(char*,X)))
168 #  endif
169 # elif su_OS_SUNOS || su_OS_SOLARIS
170 #  define a_X(X) S(char const** __restrict__,S(void*,UNCONST(char*,X)))
171 # endif
172 # ifndef a_X
173 #  define a_X(X)  S(char**,S(void*,UNCONST(char*,X)))
174 # endif
175 
176 int
n_iconv_buf(iconv_t cd,enum n_iconv_flags icf,char const ** inb,uz * inbleft,char ** outb,uz * outbleft)177 n_iconv_buf(iconv_t cd, enum n_iconv_flags icf,
178    char const **inb, uz *inbleft, char **outb, uz *outbleft){
179    int err;
180    NYD2_IN;
181 
182    if((icf & n_ICONV_UNIREPL) && !(n_psonce & n_PSO_UNICODE))
183       icf &= ~n_ICONV_UNIREPL;
184 
185    for(;;){
186       uz i;
187 
188       if((i = iconv(cd, a_X(inb), inbleft, outb, outbleft)) == 0)
189          break;
190       if(i != (uz)-1){
191          if(!(icf & n_ICONV_IGN_NOREVERSE)){
192             err = su_ERR_NOENT;
193             goto jleave;
194          }
195          break;
196       }
197 
198       if((err = su_err_no()) == su_ERR_2BIG)
199          goto jleave;
200 
201       if(!(icf & n_ICONV_IGN_ILSEQ) || err != su_ERR_ILSEQ)
202          goto jleave;
203       if(*inbleft > 0){
204          ++(*inb);
205          --(*inbleft);
206          if(icf & n_ICONV_UNIREPL){
207             if(*outbleft >= sizeof(su_utf8_replacer) -1){
208                su_mem_copy(*outb, su_utf8_replacer,
209                   sizeof(su_utf8_replacer) -1);
210                *outb += sizeof(su_utf8_replacer) -1;
211                *outbleft -= sizeof(su_utf8_replacer) -1;
212                continue;
213             }
214          }else if(*outbleft > 0){
215             *(*outb)++ = '?';
216             --*outbleft;
217             continue;
218          }
219          err = su_ERR_2BIG;
220          goto jleave;
221       }else if(*outbleft > 0){
222          **outb = '\0';
223          goto jleave;
224       }
225    }
226    err = 0;
227 jleave:
228    n_iconv_err_no = err;
229    NYD2_OU;
230    return err;
231 }
232 # undef a_X
233 
234 int
n_iconv_str(iconv_t cd,enum n_iconv_flags icf,struct str * out,struct str const * in,struct str * in_rest_or_null)235 n_iconv_str(iconv_t cd, enum n_iconv_flags icf,
236       struct str *out, struct str const *in, struct str *in_rest_or_null){
237    struct n_string s_b, *s;
238    char const *ib;
239    int err;
240    uz il;
241    NYD2_IN;
242 
243    il = in->l;
244    if(!n_string_get_can_book(il) || !n_string_get_can_book(out->l)){
245       err = su_ERR_INVAL;
246       goto j_leave;
247    }
248    ib = in->s;
249 
250    s = n_string_creat(&s_b);
251    s = n_string_take_ownership(s, out->s, out->l, 0);
252 
253    for(;;){
254       char *ob_base, *ob;
255       uz ol, nol;
256 
257       if((nol = ol = s->s_len) < il)
258          nol = il;
259       ASSERT(sizeof(s->s_len) == sizeof(u32));
260       if(nol < 128)
261          nol += 32;
262       else{
263          u64 xnol;
264 
265          xnol = (u64)(nol << 1) - (nol >> 4);
266          if(!n_string_can_book(s, xnol)){
267             xnol = ol + 64;
268             if(!n_string_can_book(s, xnol)){
269                err = su_ERR_INVAL;
270                goto jleave;
271             }
272          }
273          nol = (uz)xnol;
274       }
275       s = n_string_resize(s, nol);
276 
277       ob = ob_base = &s->s_dat[ol];
278       nol -= ol;
279       err = n_iconv_buf(cd, icf, &ib, &il, &ob, &nol);
280 
281       s = n_string_trunc(s, ol + P2UZ(ob - ob_base));
282       if(err == 0 || err != su_ERR_2BIG)
283          break;
284    }
285 
286    if(in_rest_or_null != NULL){
287       in_rest_or_null->s = n_UNCONST(ib);
288       in_rest_or_null->l = il;
289    }
290 
291 jleave:
292    out->s = n_string_cp(s);
293    out->l = s->s_len;
294    s = n_string_drop_ownership(s);
295    /* n_string_gut(s)*/
296 j_leave:
297    NYD2_OU;
298    return err;
299 }
300 
301 char *
n_iconv_onetime_cp(enum n_iconv_flags icf,char const * tocode,char const * fromcode,char const * input)302 n_iconv_onetime_cp(enum n_iconv_flags icf,
303       char const *tocode, char const *fromcode, char const *input){
304    struct str out, in;
305    iconv_t icd;
306    char *rv;
307    NYD2_IN;
308 
309    rv = NULL;
310    if(tocode == NULL)
311       tocode = ok_vlook(ttycharset);
312    if(fromcode == NULL)
313       fromcode = "utf-8";
314 
315    if((icd = iconv_open(tocode, fromcode)) == (iconv_t)-1)
316       goto jleave;
317 
318    in.l = su_cs_len(in.s = n_UNCONST(input)); /* logical */
319    out.s = NULL, out.l = 0;
320    if(!n_iconv_str(icd, icf, &out, &in, NULL))
321       rv = savestrbuf(out.s, out.l);
322    if(out.s != NULL)
323       n_free(out.s);
324 
325    iconv_close(icd);
326 jleave:
327    NYD2_OU;
328    return rv;
329 }
330 #endif /* mx_HAVE_ICONV */
331 
332 #include "su/code-ou.h"
333 /* s-it-mode */
334