1 /*	$NetBSD: idnconv.c,v 1.4 2014/12/10 04:37:56 christos Exp $	*/
2 
3 #ifndef lint
4 static char *rcsid = "Id: idnconv.c,v 1.1 2003/06/04 00:27:07 marka Exp ";
5 #endif
6 
7 /*
8  * Copyright (c) 2000,2001,2002 Japan Network Information Center.
9  * All rights reserved.
10  *
11  * By using this file, you agree to the terms and conditions set forth bellow.
12  *
13  * 			LICENSE TERMS AND CONDITIONS
14  *
15  * The following License Terms and Conditions apply, unless a different
16  * license is obtained from Japan Network Information Center ("JPNIC"),
17  * a Japanese association, Kokusai-Kougyou-Kanda Bldg 6F, 2-3-4 Uchi-Kanda,
18  * Chiyoda-ku, Tokyo 101-0047, Japan.
19  *
20  * 1. Use, Modification and Redistribution (including distribution of any
21  *    modified or derived work) in source and/or binary forms is permitted
22  *    under this License Terms and Conditions.
23  *
24  * 2. Redistribution of source code must retain the copyright notices as they
25  *    appear in each source code file, this License Terms and Conditions.
26  *
27  * 3. Redistribution in binary form must reproduce the Copyright Notice,
28  *    this License Terms and Conditions, in the documentation and/or other
29  *    materials provided with the distribution.  For the purposes of binary
30  *    distribution the "Copyright Notice" refers to the following language:
31  *    "Copyright (c) 2000-2002 Japan Network Information Center.  All rights reserved."
32  *
33  * 4. The name of JPNIC may not be used to endorse or promote products
34  *    derived from this Software without specific prior written approval of
35  *    JPNIC.
36  *
37  * 5. Disclaimer/Limitation of Liability: THIS SOFTWARE IS PROVIDED BY JPNIC
38  *    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
39  *    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
40  *    PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL JPNIC BE LIABLE
41  *    FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
42  *    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
43  *    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
44  *    BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
45  *    WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
46  *    OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
47  *    ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
48  */
49 
50 /*
51  * idnconv -- Codeset converter for named.conf and zone files
52  */
53 
54 #include <config.h>
55 
56 #include <stdio.h>
57 #include <stddef.h>
58 #include <stdlib.h>
59 #include <string.h>
60 #include <errno.h>
61 #ifdef HAVE_LOCALE_H
62 #include <locale.h>
63 #endif
64 
65 #include <idn/result.h>
66 #include <idn/converter.h>
67 #include <idn/normalizer.h>
68 #include <idn/utf8.h>
69 #include <idn/resconf.h>
70 #include <idn/res.h>
71 #include <idn/util.h>
72 #include <idn/version.h>
73 
74 #include "util.h"
75 
76 #define MAX_DELIMITER		10
77 #define MAX_LOCALMAPPER		10
78 #define MAX_MAPPER		10
79 #define MAX_NORMALIZER		10
80 #define MAX_CHEKER		10
81 
82 #define FLAG_REVERSE		0x0001
83 #define FLAG_DELIMMAP		0x0002
84 #define FLAG_LOCALMAP		0x0004
85 #define FLAG_MAP		0x0008
86 #define FLAG_NORMALIZE		0x0010
87 #define FLAG_PROHIBITCHECK	0x0020
88 #define FLAG_UNASSIGNCHECK	0x0040
89 #define FLAG_BIDICHECK		0x0080
90 #define FLAG_ASCIICHECK		0x0100
91 #define FLAG_LENGTHCHECK	0x0200
92 #define FLAG_ROUNDTRIPCHECK	0x0400
93 #define FLAG_SELECTIVE		0x0800
94 
95 #define FLAG_NAMEPREP \
96 	(FLAG_MAP|FLAG_NORMALIZE|FLAG_PROHIBITCHECK|FLAG_UNASSIGNCHECK|\
97 	 FLAG_BIDICHECK)
98 
99 #define DEFAULT_FLAGS \
100 	(FLAG_LOCALMAP|FLAG_NAMEPREP|FLAG_ASCIICHECK|FLAG_LENGTHCHECK|\
101 	FLAG_ROUNDTRIPCHECK|FLAG_SELECTIVE|FLAG_DELIMMAP)
102 
103 int		line_number;		/* current input file line number */
104 static int	flush_every_line = 0;	/* pretty obvious */
105 
106 static int		encode_file(idn_resconf_t conf1, idn_resconf_t conf2,
107 				    FILE *fp, int flags);
108 static int		decode_file(idn_resconf_t conf1, idn_resconf_t conf2,
109 				    FILE *fp, int flags);
110 static int		trim_newline(idnconv_strbuf_t *buf);
111 static idn_result_t	convert_line(idnconv_strbuf_t *from,
112 				     idnconv_strbuf_t *to,
113 				     idn_resconf_t conf,
114 				     idn_action_t actions, int flags);
115 static void		print_usage(char *cmd);
116 static void		print_version(void);
117 static unsigned long	get_ucs(const char *p);
118 
119 int
main(int ac,char ** av)120 main(int ac, char **av) {
121 	char *cmd = *av;
122 	char *cname;
123 	unsigned long delimiters[MAX_DELIMITER];
124 	char *localmappers[MAX_LOCALMAPPER];
125 	char *nameprep_version = NULL;
126 	int ndelimiters = 0;
127 	int nlocalmappers = 0;
128 	char *in_code = NULL;
129 	char *out_code = NULL;
130 	char *resconf_file = NULL;
131 	int no_resconf = 0;
132 	char *encoding_alias = NULL;
133 	int flags = DEFAULT_FLAGS;
134 	FILE *fp;
135 	idn_result_t r;
136 	idn_resconf_t resconf1, resconf2;
137 	idn_converter_t conv;
138 	int exit_value;
139 
140 #ifdef HAVE_SETLOCALE
141 	(void)setlocale(LC_ALL, "");
142 #endif
143 
144 	/*
145 	 * If the command name begins with 'r', reverse mode is assumed.
146 	 */
147 	if ((cname = strrchr(cmd, '/')) != NULL)
148 		cname++;
149 	else
150 		cname = cmd;
151 	if (cname[0] == 'r')
152 		flags |= FLAG_REVERSE;
153 
154 	ac--;
155 	av++;
156 	while (ac > 0 && **av == '-') {
157 
158 #define OPT_MATCH(opt) (strcmp(*av, opt) == 0)
159 #define MUST_HAVE_ARG if (ac < 2) print_usage(cmd)
160 #define APPEND_LIST(array, size, item, what) \
161 	if (size >= (sizeof(array) / sizeof(array[0]))) { \
162 		errormsg("too many " what "\n"); \
163 		exit(1); \
164 	} \
165 	array[size++] = item; \
166 	ac--; av++
167 
168 		if (OPT_MATCH("-in") || OPT_MATCH("-i")) {
169 			MUST_HAVE_ARG;
170 			in_code = av[1];
171 			ac--;
172 			av++;
173 		} else if (OPT_MATCH("-out") || OPT_MATCH("-o")) {
174 			MUST_HAVE_ARG;
175 			out_code = av[1];
176 			ac--;
177 			av++;
178 		} else if (OPT_MATCH("-conf") || OPT_MATCH("-c")) {
179 			MUST_HAVE_ARG;
180 			resconf_file = av[1];
181 			ac--;
182 			av++;
183 		} else if (OPT_MATCH("-nameprep") || OPT_MATCH("-n")) {
184 			MUST_HAVE_ARG;
185 			nameprep_version = av[1];
186 			ac--;
187 			av++;
188 		} else if (OPT_MATCH("-noconf") || OPT_MATCH("-C")) {
189 			no_resconf = 1;
190 		} else if (OPT_MATCH("-reverse") || OPT_MATCH("-r")) {
191 			flags |= FLAG_REVERSE;
192 		} else if (OPT_MATCH("-nolocalmap") || OPT_MATCH("-L")) {
193 			flags &= ~FLAG_LOCALMAP;
194 		} else if (OPT_MATCH("-nonameprep") || OPT_MATCH("-N")) {
195 			flags &= ~FLAG_NAMEPREP;
196 		} else if (OPT_MATCH("-unassigncheck") || OPT_MATCH("-u")) {
197 			flags |= FLAG_UNASSIGNCHECK;
198 		} else if (OPT_MATCH("-nounassigncheck") || OPT_MATCH("-U")) {
199 			flags &= ~FLAG_UNASSIGNCHECK;
200 		} else if (OPT_MATCH("-nobidicheck") || OPT_MATCH("-B")) {
201 			flags &= ~FLAG_BIDICHECK;
202 		} else if (OPT_MATCH("-noasciicheck") || OPT_MATCH("-A")) {
203 			flags &= ~FLAG_ASCIICHECK;
204 		} else if (OPT_MATCH("-nolengthcheck")) {
205 			flags &= ~FLAG_LENGTHCHECK;
206 		} else if (OPT_MATCH("-noroundtripcheck")) {
207 			flags &= ~FLAG_ROUNDTRIPCHECK;
208 		} else if (OPT_MATCH("-whole") || OPT_MATCH("-w")) {
209 			flags &= ~FLAG_SELECTIVE;
210 		} else if (OPT_MATCH("-localmap")) {
211 			MUST_HAVE_ARG;
212 			APPEND_LIST(localmappers, nlocalmappers, av[1],
213 				    "local maps");
214 		} else if (OPT_MATCH("-delimiter")) {
215 			unsigned long v;
216 			MUST_HAVE_ARG;
217 			v = get_ucs(av[1]);
218 			APPEND_LIST(delimiters, ndelimiters, v,
219 				    "delimiter maps");
220 		} else if (OPT_MATCH("-alias") || OPT_MATCH("-a")) {
221 			MUST_HAVE_ARG;
222 			encoding_alias = av[1];
223 			ac--;
224 			av++;
225 		} else if (OPT_MATCH("-flush")) {
226 			flush_every_line = 1;
227 		} else if (OPT_MATCH("-version") || OPT_MATCH("-v")) {
228 			print_version();
229 		} else {
230 			print_usage(cmd);
231 		}
232 #undef OPT_MATCH
233 #undef MUST_HAVE_ARG
234 #undef APPEND_LIST
235 
236 		ac--;
237 		av++;
238 	}
239 
240 	if (ac > 1)
241 		print_usage(cmd);
242 
243 	/* Initialize. */
244 	if ((r = idn_resconf_initialize()) != idn_success) {
245 		errormsg("error initializing library\n");
246 		return (1);
247 	}
248 
249 	/*
250 	 * Create resource contexts.
251 	 * `resconf1' and `resconf2' are almost the same but local and
252 	 * IDN encodings are reversed.
253 	 */
254 	resconf1 = NULL;
255 	resconf2 = NULL;
256 	if (idn_resconf_create(&resconf1) != idn_success ||
257 	    idn_resconf_create(&resconf2) != idn_success) {
258 		errormsg("error initializing configuration contexts\n");
259 		return (1);
260 	}
261 
262 	/* Load configuration file. */
263 	if (no_resconf) {
264 		set_defaults(resconf1);
265 		set_defaults(resconf2);
266 	} else {
267 		load_conf_file(resconf1, resconf_file);
268 		load_conf_file(resconf2, resconf_file);
269 	}
270 
271 	/* Set encoding alias file. */
272 	if (encoding_alias != NULL)
273 		set_encoding_alias(encoding_alias);
274 
275 	/* Set input codeset. */
276 	if (flags & FLAG_REVERSE) {
277 		if (in_code == NULL) {
278 			conv = idn_resconf_getidnconverter(resconf1);
279 			if (conv == NULL) {
280 				errormsg("cannot get the IDN encoding.\n"
281 					 "please specify an appropriate one "
282 			 		 "with `-in' option.\n");
283 				exit(1);
284 			}
285 			idn_resconf_setlocalconverter(resconf2, conv);
286 			idn_converter_destroy(conv);
287 		} else {
288 			set_idncode(resconf1, in_code);
289 			set_localcode(resconf2, in_code);
290 		}
291 	} else {
292 		if (in_code == NULL) {
293 			conv = idn_resconf_getlocalconverter(resconf1);
294 			if (conv == NULL) {
295 				errormsg("cannot get the local encoding.\n"
296 					 "please specify an appropriate one "
297 			 		 "with `-in' option.\n");
298 				exit(1);
299 			}
300 			idn_resconf_setidnconverter(resconf2, conv);
301 			idn_converter_destroy(conv);
302 		} else {
303 			set_localcode(resconf1, in_code);
304 			set_idncode(resconf2, in_code);
305 		}
306 	}
307 
308 	/* Set output codeset. */
309 	if (flags & FLAG_REVERSE) {
310 		if (out_code == NULL) {
311 			conv = idn_resconf_getlocalconverter(resconf1);
312 			if (conv == NULL) {
313 				errormsg("cannot get the local encoding.\n"
314 					 "please specify an appropriate one "
315 			 		 "with `-out' option.\n");
316 				exit(1);
317 			}
318 			idn_resconf_setidnconverter(resconf2, conv);
319 			idn_converter_destroy(conv);
320 		} else {
321 			set_localcode(resconf1, out_code);
322 			set_idncode(resconf2, out_code);
323 		}
324 	} else {
325 		if (out_code == NULL) {
326 			conv = idn_resconf_getidnconverter(resconf1);
327 			if (conv == NULL) {
328 				errormsg("cannot get the IDN encoding.\n"
329 					 "please specify an appropriate one "
330 			 		 "with `-out' option.\n");
331 				exit(1);
332 			}
333 			idn_resconf_setlocalconverter(resconf2, conv);
334 			idn_converter_destroy(conv);
335 		} else {
336 			set_idncode(resconf1, out_code);
337 			set_localcode(resconf2, out_code);
338 		}
339 	}
340 
341 	/* Set delimiter map(s). */
342 	if (ndelimiters > 0) {
343 		set_delimitermapper(resconf1, delimiters, ndelimiters);
344 		set_delimitermapper(resconf2, delimiters, ndelimiters);
345 	}
346 
347 	/* Set local map(s). */
348 	if (nlocalmappers > 0) {
349 		set_localmapper(resconf1, localmappers, nlocalmappers);
350 		set_localmapper(resconf2, localmappers, nlocalmappers);
351 	}
352 
353 	/* Set NAMEPREP version. */
354 	if (nameprep_version != NULL) {
355 		set_nameprep(resconf1, nameprep_version);
356 		set_nameprep(resconf2, nameprep_version);
357 	}
358 
359 	idn_res_enable(1);
360 
361 	/* Open input file. */
362 	if (ac > 0) {
363 		if ((fp = fopen(av[0], "r")) == NULL) {
364 			errormsg("cannot open file %s: %s\n",
365 				 av[0], strerror(errno));
366 			return (1);
367 		}
368 	} else {
369 		fp = stdin;
370 	}
371 
372 	/* Do the conversion. */
373 	if (flags & FLAG_REVERSE)
374 		exit_value = decode_file(resconf1, resconf2, fp, flags);
375 	else
376 		exit_value = encode_file(resconf1, resconf2, fp, flags);
377 
378 	idn_resconf_destroy(resconf1);
379 	idn_resconf_destroy(resconf2);
380 
381 	return exit_value;
382 }
383 
384 static int
encode_file(idn_resconf_t conf1,idn_resconf_t conf2,FILE * fp,int flags)385 encode_file(idn_resconf_t conf1, idn_resconf_t conf2, FILE *fp, int flags) {
386 	idn_result_t r;
387 	idnconv_strbuf_t buf1, buf2;
388 	idn_action_t actions1, actions2;
389 	int nl_trimmed;
390 	int local_ace_hack;
391 	idn_converter_t conv;
392 
393 	/*
394 	 * See if the input codeset is an ACE.
395 	 */
396 	conv = idn_resconf_getlocalconverter(conf1);
397 	if (conv != NULL && idn_converter_isasciicompatible(conv) &&
398 	    (flags & FLAG_SELECTIVE))
399 		local_ace_hack = 1;
400 	else
401 		local_ace_hack = 0;
402 	if (conv != NULL)
403 		idn_converter_destroy(conv);
404 
405 	if (local_ace_hack) {
406 		actions1 = IDN_IDNCONV;
407 		if (flags & FLAG_ROUNDTRIPCHECK)
408 			actions1 |= IDN_RTCHECK;
409 	} else {
410 		actions1 = IDN_LOCALCONV;
411 	}
412 
413 	actions2 = IDN_IDNCONV;
414 	if (flags & FLAG_DELIMMAP)
415 		actions2 |= IDN_DELIMMAP;
416 	if (flags & FLAG_LOCALMAP)
417 		actions2 |= IDN_LOCALMAP;
418 	if (flags & FLAG_MAP)
419 		actions2 |= IDN_MAP;
420 	if (flags & FLAG_NORMALIZE)
421 		actions2 |= IDN_NORMALIZE;
422 	if (flags & FLAG_PROHIBITCHECK)
423 		actions2 |= IDN_PROHCHECK;
424 	if (flags & FLAG_UNASSIGNCHECK)
425 		actions2 |= IDN_UNASCHECK;
426 	if (flags & FLAG_BIDICHECK)
427 		actions2 |= IDN_BIDICHECK;
428 	if (flags & FLAG_ASCIICHECK)
429 		actions2 |= IDN_ASCCHECK;
430 	if (flags & FLAG_LENGTHCHECK)
431 		actions2 |= IDN_LENCHECK;
432 
433 	strbuf_init(&buf1);
434 	strbuf_init(&buf2);
435 	line_number = 1;
436 	while (strbuf_getline(&buf1, fp) != NULL) {
437 		/*
438 		 * Trim newline at the end.  This is needed for
439 		 * those ascii-comatible encodings such as UTF-5 or RACE
440 		 * not to try converting newlines, which will result
441 		 * in `invalid encoding' error.
442 		 */
443 		nl_trimmed = trim_newline(&buf1);
444 
445 		/*
446 		 * Convert input line to UTF-8.
447 		 */
448 		if (local_ace_hack)
449 			r = convert_line(&buf1, &buf2, conf2, actions1,
450 					 FLAG_REVERSE|FLAG_SELECTIVE);
451 		else
452 			r = convert_line(&buf1, &buf2, conf1, actions1,
453 					 0);
454 
455 		if (r != idn_success) {
456 			errormsg("conversion failed at line %d: %s\n",
457 				 line_number,
458 				 idn_result_tostring(r));
459 			goto error;
460 		}
461 		if (!idn_utf8_isvalidstring(strbuf_get(&buf2))) {
462 			errormsg("conversion to utf-8 failed at line %d\n",
463 				 line_number);
464 			goto error;
465 		}
466 
467 		/*
468 		 * Perform local mapping and NAMEPREP, and convert to
469 		 * the output codeset.
470 		 */
471 		r = convert_line(&buf2, &buf1, conf1, actions2,
472 				 flags & FLAG_SELECTIVE);
473 
474 		if (r != idn_success) {
475 			errormsg("error in nameprep or output conversion "
476 				 "at line %d: %s\n",
477 				 line_number, idn_result_tostring(r));
478 			goto error;
479 		}
480 
481 		fputs(strbuf_get(&buf1), stdout);
482 		if (nl_trimmed)
483 			putc('\n', stdout);
484 
485 		if (flush_every_line)
486 			fflush(stdout);
487 
488 		line_number++;
489 	}
490 
491 	strbuf_reset(&buf1);
492 	strbuf_reset(&buf2);
493 	return (0);
494 
495  error:
496 	strbuf_reset(&buf1);
497 	strbuf_reset(&buf2);
498 	return (1);
499 }
500 
501 static int
decode_file(idn_resconf_t conf1,idn_resconf_t conf2,FILE * fp,int flags)502 decode_file(idn_resconf_t conf1, idn_resconf_t conf2, FILE *fp, int flags) {
503 	idn_result_t r;
504 	idnconv_strbuf_t buf1, buf2;
505 	idn_action_t actions1, actions2;
506 	int nl_trimmed;
507 	int local_ace_hack, idn_ace_hack;
508 	idn_converter_t conv;
509 
510 	/*
511 	 * See if the input codeset is an ACE.
512 	 */
513 	conv = idn_resconf_getidnconverter(conf1);
514 	if (conv != NULL && idn_converter_isasciicompatible(conv) &&
515 	    (flags & FLAG_SELECTIVE))
516 		idn_ace_hack = 1;
517 	else
518 		idn_ace_hack = 0;
519 	if (conv != NULL)
520 		idn_converter_destroy(conv);
521 
522 	conv = idn_resconf_getlocalconverter(conf1);
523 	if (conv != NULL && idn_converter_isasciicompatible(conv) &&
524 	    (flags & FLAG_SELECTIVE))
525 		local_ace_hack = 1;
526 	else
527 		local_ace_hack = 0;
528 	if (conv != NULL)
529 		idn_converter_destroy(conv);
530 
531 	actions1 = IDN_IDNCONV;
532 
533 	if (local_ace_hack) {
534 		actions2 = IDN_IDNCONV;
535 		if (flags & FLAG_MAP)
536 			actions2 |= IDN_MAP;
537 		if (flags & FLAG_NORMALIZE)
538 			actions2 |= IDN_NORMALIZE;
539 		if (flags & FLAG_PROHIBITCHECK)
540 			actions2 |= IDN_PROHCHECK;
541 		if (flags & FLAG_UNASSIGNCHECK)
542 			actions2 |= IDN_UNASCHECK;
543 		if (flags & FLAG_BIDICHECK)
544 			actions2 |= IDN_BIDICHECK;
545 		if (flags & FLAG_ASCIICHECK)
546 			actions2 |= IDN_ASCCHECK;
547 		if (flags & FLAG_LENGTHCHECK)
548 			actions2 |= IDN_LENCHECK;
549 	} else {
550 		actions2 = IDN_LOCALCONV;
551 	}
552 
553 	if (flags & FLAG_DELIMMAP)
554 		actions1 |= IDN_DELIMMAP;
555 	if (flags & FLAG_MAP)
556 		actions1 |= IDN_MAP;
557 	if (flags & FLAG_NORMALIZE)
558 		actions1 |= IDN_NORMALIZE;
559 	if (flags & FLAG_NORMALIZE)
560 		actions1 |= IDN_NORMALIZE;
561 	if (flags & FLAG_PROHIBITCHECK)
562 		actions1 |= IDN_PROHCHECK;
563 	if (flags & FLAG_UNASSIGNCHECK)
564 		actions1 |= IDN_UNASCHECK;
565 	if (flags & FLAG_BIDICHECK)
566 		actions1 |= IDN_BIDICHECK;
567 	if (flags & FLAG_ASCIICHECK)
568 		actions1 |= IDN_ASCCHECK;
569 	if (flags & FLAG_ROUNDTRIPCHECK)
570 		actions1 |= IDN_RTCHECK;
571 
572 	strbuf_init(&buf1);
573 	strbuf_init(&buf2);
574 	line_number = 1;
575 	while (strbuf_getline(&buf1, fp) != NULL) {
576 		/*
577 		 * Trim newline at the end.  This is needed for
578 		 * those ascii-comatible encodings such as UTF-5 or RACE
579 		 * not to try converting newlines, which will result
580 		 * in `invalid encoding' error.
581 		 */
582 		nl_trimmed = trim_newline(&buf1);
583 
584 		/*
585 		 * Treat input line as the string encoded in local
586 		 * encoding and convert it to UTF-8 encoded string.
587 		 */
588 		if (local_ace_hack) {
589 			if (strbuf_copy(&buf2, strbuf_get(&buf1)) == NULL)
590 				r = idn_nomemory;
591 			else
592 				r = idn_success;
593 		} else {
594 			r = convert_line(&buf1, &buf2, conf1, IDN_LOCALCONV,
595 					 0);
596 		}
597 		if (r != idn_success) {
598 			errormsg("conversion failed at line %d: %s\n",
599 				 line_number, idn_result_tostring(r));
600 			goto error;
601 		}
602 
603 		/*
604 		 * Convert internationalized domain names in the line.
605 		 */
606 		if (idn_ace_hack) {
607 			r = convert_line(&buf2, &buf1, conf1, actions1,
608 					 FLAG_REVERSE|FLAG_SELECTIVE);
609 		} else {
610 			r = convert_line(&buf2, &buf1, conf1, actions1,
611 					 FLAG_REVERSE);
612 		}
613 		if (r != idn_success) {
614 			errormsg("conversion failed at line %d: %s\n",
615 				 line_number,
616 				 idn_result_tostring(r));
617 			goto error;
618 		}
619 		if (!idn_utf8_isvalidstring(strbuf_get(&buf1))) {
620 			errormsg("conversion to utf-8 failed at line %d\n",
621 				 line_number);
622 			goto error;
623 		}
624 
625 		/*
626 		 * Perform round trip check and convert to the output
627 		 * codeset.
628 		 */
629 		if (local_ace_hack) {
630 			r = convert_line(&buf1, &buf2, conf2, actions2,
631 					 FLAG_SELECTIVE);
632 		} else {
633 			r = convert_line(&buf1, &buf2, conf1, actions2,
634 					 FLAG_REVERSE);
635 		}
636 
637 		if (r != idn_success) {
638 			errormsg("error in nameprep or output conversion "
639 				 "at line %d: %s\n",
640 				 line_number, idn_result_tostring(r));
641 			goto error;
642 		}
643 
644 		fputs(strbuf_get(&buf2), stdout);
645 		if (nl_trimmed)
646 			putc('\n', stdout);
647 
648 		if (flush_every_line)
649 			fflush(stdout);
650 
651 		line_number++;
652 	}
653 	strbuf_reset(&buf1);
654 	strbuf_reset(&buf2);
655 	return (0);
656 
657  error:
658 	strbuf_reset(&buf1);
659 	strbuf_reset(&buf2);
660 	return (1);
661 }
662 
663 static int
trim_newline(idnconv_strbuf_t * buf)664 trim_newline(idnconv_strbuf_t *buf) {
665 	/*
666 	 * If the string in BUF ends with a newline, trim it and
667 	 * return 1.  Otherwise, just return 0 without modifying BUF.
668 	 */
669 	char *s = strbuf_get(buf);
670 	size_t len = strlen(s);
671 
672 	if (s[len - 1] == '\n') {
673 		s[len - 1] = '\0';
674 		return (1);
675 	}
676 
677 	return (0);
678 }
679 
680 static idn_result_t
convert_line(idnconv_strbuf_t * from,idnconv_strbuf_t * to,idn_resconf_t conf,idn_action_t actions,int flags)681 convert_line(idnconv_strbuf_t *from, idnconv_strbuf_t *to,
682 	     idn_resconf_t conf, idn_action_t actions, int flags)
683 {
684 	idn_result_t r = idn_success;
685 	char *from_str = strbuf_get(from);
686 
687 	for (;;) {
688 		char *to_str = strbuf_get(to);
689 		size_t to_size = strbuf_size(to);
690 
691 		switch (flags & (FLAG_REVERSE|FLAG_SELECTIVE)) {
692 		case 0:
693 			r = idn_res_encodename(conf, actions, from_str,
694 					       to_str, to_size);
695 			break;
696 		case FLAG_REVERSE:
697 			r = idn_res_decodename(conf, actions, from_str,
698 					       to_str, to_size);
699 			break;
700 		case FLAG_SELECTIVE:
701 			r = selective_encode(conf, actions, from_str,
702 					     to_str, to_size);
703 			break;
704 		case FLAG_REVERSE|FLAG_SELECTIVE:
705 			r = selective_decode(conf, actions, from_str,
706 					     to_str, to_size);
707 			break;
708 		}
709 		if (r == idn_buffer_overflow) {
710 			/*
711 			 * Conversion is not successful because
712 			 * the size of the target buffer is not enough.
713 			 * Double the size and retry.
714 			 */
715 			if (strbuf_double(to) == NULL) {
716 				/* oops. allocation failed. */
717 				return (idn_nomemory);
718 			}
719 		} else {
720 			break;
721 		}
722 	}
723 	return (r);
724 }
725 
726 static char *options[] = {
727 	"-in INPUT-CODESET   : specifies input codeset name.",
728 	"-i INPUT-CODESET    : synonym for -in",
729 	"-out OUTPUT-CODESET : specifies output codeset name.",
730 	"-o OUTPUT-CODESET   : synonym for -out",
731 	"-conf CONF-FILE     : specifies idnkit configuration file.",
732 	"-c CONF-FILE        : synonym for -conf",
733 	"-noconf             : do not load idnkit configuration file.",
734 	"-C                  : synonym for -noconf",
735 	"-reverse            : specifies reverse conversion.",
736 	"                      (i.e. IDN encoding to local encoding)",
737 	"-r                  : synonym for -reverse",
738 	"-nameprep VERSION   : specifies version name of NAMEPREP.",
739 	"-n VERSION          : synonym for -nameprep",
740 	"-nonameprep         : do not perform NAMEPREP.",
741 	"-N                  : synonym for -nonameprep",
742 	"-localmap MAPPING   : specifies local mapping.",
743 	"-nolocalmap         : do not perform local mapping.",
744 	"-L                  : synonym for -nolocalmap",
745 	"-nounassigncheck    : do not perform unassigned codepoint check.",
746 	"-U                  : synonym for -nounassigncheck",
747 	"-nobidicheck        : do not perform bidirectional text check.",
748 	"-B                  : synonym for -nobidicheck",
749 	"-nolengthcheck      : do not check label length.",
750 	"-noasciicheck       : do not check ASCII range characters.",
751 	"-A                  : synonym for -noasciicheck",
752 	"-noroundtripcheck   : do not perform round trip check.",
753 	"-delimiter U+XXXX   : specifies local delimiter code point.",
754 	"-alias alias-file   : specifies codeset alias file.",
755 	"-a                  : synonym for -alias",
756 	"-flush              : line-buffering mode.",
757 	"-whole              : convert the whole region instead of",
758 	"                      regions containing non-ascii characters.",
759 	"-w                  : synonym for -whole",
760 	"-version            : print version number, then exit.",
761 	"-v                  : synonym for -version",
762 	"",
763 	" The following options can be specified multiple times",
764 	"   -localmap, -delimiter",
765 	NULL,
766 };
767 
768 static void
print_version()769 print_version() {
770 	fprintf(stderr, "idnconv (idnkit) version: %s\n"
771 		"library version: %s\n",
772 		IDNKIT_VERSION,
773 		idn_version_getstring());
774 	exit(0);
775 }
776 
777 static void
print_usage(char * cmd)778 print_usage(char *cmd) {
779 	int i;
780 
781 	fprintf(stderr, "Usage: %s [options..] [file]\n", cmd);
782 
783 	for (i = 0; options[i] != NULL; i++)
784 		fprintf(stderr, "\t%s\n", options[i]);
785 
786 	exit(1);
787 }
788 
789 static unsigned long
get_ucs(const char * p)790 get_ucs(const char *p) {
791 	unsigned long v;
792 	char *end;
793 
794 	/* Skip optional 'U+' */
795 	if (strncmp(p, "U+", 2) == 0)
796 		p += 2;
797 
798 	v = strtoul(p, &end, 16);
799 	if (*end != '\0') {
800 		fprintf(stderr, "invalid UCS code point \"%s\"\n", p);
801 		exit(1);
802 	}
803 
804 	return v;
805 }
806