xref: /freebsd/usr.bin/iconv/iconv.c (revision 81ad6265)
1 /* $FreeBSD$ */
2 /* $NetBSD: iconv.c,v 1.16 2009/02/20 15:28:21 yamt Exp $ */
3 
4 /*-
5  * SPDX-License-Identifier: BSD-2-Clause
6  *
7  * Copyright (c)2003 Citrus Project,
8  * All rights reserved.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 
32 #include <sys/cdefs.h>
33 #include <sys/capsicum.h>
34 
35 #include <capsicum_helpers.h>
36 #include <err.h>
37 #include <errno.h>
38 #include <getopt.h>
39 #include <iconv.h>
40 #include <limits.h>
41 #include <locale.h>
42 #include <stdbool.h>
43 #include <stdio.h>
44 #include <stdlib.h>
45 #include <string.h>
46 #include <unistd.h>
47 
48 static int		do_conv(FILE *, iconv_t, bool, bool);
49 static int		do_list(unsigned int, const char * const *, void *);
50 static void		usage(void) __dead2;
51 
52 static const struct option long_options[] = {
53 	{"from-code",		required_argument,	NULL, 'f'},
54 	{"list",		no_argument,		NULL, 'l'},
55 	{"silent",		no_argument,		NULL, 's'},
56         {"to-code",		required_argument,	NULL, 't'},
57         {NULL,                  no_argument,            NULL, 0}
58 };
59 
60 static void
61 usage(void)
62 {
63 	(void)fprintf(stderr,
64 	    "Usage:\t%1$s [-cs] -f <from_code> -t <to_code> [file ...]\n"
65 	    "\t%1$s -f <from_code> [-cs] [-t <to_code>] [file ...]\n"
66 	    "\t%1$s -t <to_code> [-cs] [-f <from_code>] [file ...]\n"
67 	    "\t%1$s -l\n", getprogname());
68 	exit(1);
69 }
70 
71 #define INBUFSIZE 1024
72 #define OUTBUFSIZE (INBUFSIZE * 2)
73 static int
74 do_conv(FILE *fp, iconv_t cd, bool silent, bool hide_invalid)
75 {
76 	char inbuf[INBUFSIZE], outbuf[OUTBUFSIZE], *in, *out;
77 	unsigned long long invalids;
78 	size_t inbytes, outbytes, ret;
79 
80 	/*
81 	 * Don't touch ICONV_SET_DISCARD_ILSEQ if -c wasn't specified.  It may
82 	 * be that the user has specified //IGNORE in the -t specification, and
83 	 * we don't want to clobber that.
84 	 */
85 	if (hide_invalid) {
86 		int arg = (int)hide_invalid;
87 		if (iconvctl(cd, ICONV_SET_DISCARD_ILSEQ, (void *)&arg) == -1)
88 			err(EXIT_FAILURE, "iconvctl(DISCARD_ILSEQ, %d)", arg);
89 	}
90 
91 	invalids = 0;
92 	while ((inbytes = fread(inbuf, 1, INBUFSIZE, fp)) > 0) {
93 		in = inbuf;
94 		while (inbytes > 0) {
95 			size_t inval;
96 
97 			out = outbuf;
98 			outbytes = OUTBUFSIZE;
99 			ret = __iconv(cd, &in, &inbytes, &out, &outbytes,
100 			    0, &inval);
101 			invalids += inval;
102 			if (outbytes < OUTBUFSIZE)
103 				(void)fwrite(outbuf, 1, OUTBUFSIZE - outbytes,
104 				    stdout);
105 			if (ret == (size_t)-1 && errno != E2BIG) {
106 				if (errno != EINVAL || in == inbuf)
107 					err(EXIT_FAILURE, "iconv()");
108 
109 				/* incomplete input character */
110 				(void)memmove(inbuf, in, inbytes);
111 				ret = fread(inbuf + inbytes, 1,
112 				    INBUFSIZE - inbytes, fp);
113 				if (ret == 0) {
114 					fflush(stdout);
115 					if (feof(fp))
116 						errx(EXIT_FAILURE,
117 						    "unexpected end of file; "
118 						    "the last character is "
119 						    "incomplete.");
120 					else
121 						err(EXIT_FAILURE, "fread()");
122 				}
123 				in = inbuf;
124 				inbytes += ret;
125 			}
126 		}
127 	}
128 	/* reset the shift state of the output buffer */
129 	outbytes = OUTBUFSIZE;
130 	out = outbuf;
131 	ret = iconv(cd, NULL, NULL, &out, &outbytes);
132 	if (ret == (size_t)-1)
133 		err(EXIT_FAILURE, "iconv()");
134 	if (outbytes < OUTBUFSIZE)
135 		(void)fwrite(outbuf, 1, OUTBUFSIZE - outbytes, stdout);
136 
137 	if (invalids > 0 && !silent)
138 		warnx("warning: invalid characters: %llu", invalids);
139 
140 	return (invalids > 0);
141 }
142 
143 static int
144 do_list(unsigned int n, const char * const *list, void *data __unused)
145 {
146 	unsigned int i;
147 
148 	for(i = 0; i < n; i++) {
149 		printf("%s", list[i]);
150 		if (i < n - 1)
151 			printf(" ");
152 	}
153 	printf("\n");
154 
155 	return (1);
156 }
157 
158 int
159 main(int argc, char **argv)
160 {
161 	iconv_t cd;
162 	FILE *fp;
163 	const char *opt_f, *opt_t;
164 	int ch, i, res;
165 	bool opt_c = false, opt_s = false;
166 
167 	opt_f = opt_t = "";
168 
169 	setlocale(LC_ALL, "");
170 	setprogname(argv[0]);
171 
172 	while ((ch = getopt_long(argc, argv, "csLlf:t:",
173 	    long_options, NULL)) != -1) {
174 		switch (ch) {
175 		case 'c':
176 			opt_c = true;
177 			break;
178 		case 's':
179 			opt_s = true;
180 			break;
181 		case 'l':
182 			/* list */
183 			if (opt_s || opt_c || strcmp(opt_f, "") != 0 ||
184 			    strcmp(opt_t, "") != 0) {
185 				warnx("-l is not allowed with other flags.");
186 				usage();
187 			}
188 			iconvlist(do_list, NULL);
189 			return (EXIT_SUCCESS);
190 		case 'f':
191 			/* from */
192 			if (optarg != NULL)
193 				opt_f = optarg;
194 			break;
195 		case 't':
196 			/* to */
197 			if (optarg != NULL)
198 				opt_t = optarg;
199 			break;
200 		default:
201 			usage();
202 		}
203 	}
204 	argc -= optind;
205 	argv += optind;
206 	if ((strcmp(opt_f, "") == 0) && (strcmp(opt_t, "") == 0))
207 		usage();
208 
209 	if (caph_limit_stdio() < 0)
210 		err(EXIT_FAILURE, "capsicum");
211 
212 	/*
213 	 * Cache NLS data, for strerror, for err(3), before entering capability
214 	 * mode.
215 	 */
216 	caph_cache_catpages();
217 
218 	/*
219 	 * Cache iconv conversion handle before entering sandbox.
220 	 */
221 	cd = iconv_open(opt_t, opt_f);
222 	if (cd == (iconv_t)-1)
223 		err(EXIT_FAILURE, "iconv_open(%s, %s)", opt_t, opt_f);
224 
225 	if (argc == 0) {
226 		if (caph_enter() < 0)
227 			err(EXIT_FAILURE, "unable to enter capability mode");
228 		res = do_conv(stdin, cd, opt_s, opt_c);
229 	} else {
230 		res = 0;
231 		for (i = 0; i < argc; i++) {
232 			fp = (strcmp(argv[i], "-") != 0) ?
233 			    fopen(argv[i], "r") : stdin;
234 			if (fp == NULL)
235 				err(EXIT_FAILURE, "Cannot open `%s'",
236 				    argv[i]);
237 			/* Enter Capsicum sandbox for final input file. */
238 			if (i + 1 == argc && caph_enter() < 0)
239 				err(EXIT_FAILURE,
240 				    "unable to enter capability mode");
241 			res |= do_conv(fp, cd, opt_s, opt_c);
242 			(void)fclose(fp);
243 
244 			/* Reset iconv descriptor state. */
245 			(void)iconv(cd, NULL, NULL, NULL, NULL);
246 		}
247 	}
248 	iconv_close(cd);
249 	return (res == 0 ? EXIT_SUCCESS : EXIT_FAILURE);
250 }
251