xref: /freebsd/contrib/libc-vis/vis.c (revision ea2be8ed)
1 /*	$NetBSD: vis.c,v 1.83 2023/08/12 12:48:52 riastradh Exp $	*/
2 
3 /*-
4  * Copyright (c) 1989, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. Neither the name of the University nor the names of its contributors
16  *    may be used to endorse or promote products derived from this software
17  *    without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 
32 /*-
33  * Copyright (c) 1999, 2005 The NetBSD Foundation, Inc.
34  * All rights reserved.
35  *
36  * Redistribution and use in source and binary forms, with or without
37  * modification, are permitted provided that the following conditions
38  * are met:
39  * 1. Redistributions of source code must retain the above copyright
40  *    notice, this list of conditions and the following disclaimer.
41  * 2. Redistributions in binary form must reproduce the above copyright
42  *    notice, this list of conditions and the following disclaimer in the
43  *    documentation and/or other materials provided with the distribution.
44  *
45  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
46  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
47  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
48  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
49  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
50  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
51  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
52  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
53  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
54  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
55  * POSSIBILITY OF SUCH DAMAGE.
56  */
57 
58 #include <sys/cdefs.h>
59 #if defined(LIBC_SCCS) && !defined(lint)
60 __RCSID("$NetBSD: vis.c,v 1.83 2023/08/12 12:48:52 riastradh Exp $");
61 #endif /* LIBC_SCCS and not lint */
62 
63 #include "namespace.h"
64 #include <sys/types.h>
65 #include <sys/param.h>
66 
67 #include <assert.h>
68 #include <vis.h>
69 #include <errno.h>
70 #include <stdint.h>
71 #include <stdlib.h>
72 #include <wchar.h>
73 #include <wctype.h>
74 
75 #ifdef __weak_alias
76 __weak_alias(strvisx,_strvisx)
77 #endif
78 
79 #if !HAVE_VIS || !HAVE_SVIS
80 #include <ctype.h>
81 #include <limits.h>
82 #include <stdio.h>
83 #include <string.h>
84 
85 #define	_DIAGASSERT(x)	assert(x)
86 
87 /*
88  * The reason for going through the trouble to deal with character encodings
89  * in vis(3), is that we use this to safe encode output of commands. This
90  * safe encoding varies depending on the character set. For example if we
91  * display ps output in French, we don't want to display French characters
92  * as M-foo.
93  */
94 
95 static wchar_t *do_svis(wchar_t *, wint_t, int, wint_t, const wchar_t *);
96 
97 #undef BELL
98 #define BELL L'\a'
99 
100 #if defined(LC_C_LOCALE)
101 #define iscgraph(c)      isgraph_l(c, LC_C_LOCALE)
102 #else
103 /* Keep it simple for now, no locale stuff */
104 #define iscgraph(c)	isgraph(c)
105 #ifdef notyet
106 #include <locale.h>
107 static int
iscgraph(int c)108 iscgraph(int c) {
109 	int rv;
110 	char *ol;
111 
112 	ol = setlocale(LC_CTYPE, "C");
113 	rv = isgraph(c);
114 	if (ol)
115 		setlocale(LC_CTYPE, ol);
116 	return rv;
117 }
118 #endif
119 #endif
120 
121 #define ISGRAPH(flags, c) \
122     (((flags) & VIS_NOLOCALE) ? iscgraph(c) : iswgraph(c))
123 
124 #define iswoctal(c)	(((u_char)(c)) >= L'0' && ((u_char)(c)) <= L'7')
125 #define iswwhite(c)	(c == L' ' || c == L'\t' || c == L'\n')
126 #define iswsafe(c)	(c == L'\b' || c == BELL || c == L'\r')
127 #define xtoa(c)		L"0123456789abcdef"[c]
128 #define XTOA(c)		L"0123456789ABCDEF"[c]
129 
130 #define MAXEXTRAS	30
131 
132 static const wchar_t char_shell[] = L"'`\";&<>()|{}]\\$!^~";
133 static const wchar_t char_glob[] = L"*?[#";
134 
135 #if !HAVE_NBTOOL_CONFIG_H
136 #ifndef __NetBSD__
137 /*
138  * On NetBSD MB_LEN_MAX is currently 32 which does not fit on any integer
139  * integral type and it is probably wrong, since currently the maximum
140  * number of bytes and character needs is 6. Until this is fixed, the
141  * loops below are using sizeof(uint64_t) - 1 instead of MB_LEN_MAX, and
142  * the assertion is commented out.
143  */
144 #ifdef __FreeBSD__
145 /*
146  * On FreeBSD including <sys/systm.h> for CTASSERT only works in kernel
147  * mode.
148  */
149 #ifndef CTASSERT
150 #define CTASSERT(x)             _CTASSERT(x, __LINE__)
151 #define _CTASSERT(x, y)         __CTASSERT(x, y)
152 #define __CTASSERT(x, y)        typedef char __assert ## y[(x) ? 1 : -1]
153 #endif
154 #endif /* __FreeBSD__ */
155 CTASSERT(MB_LEN_MAX <= sizeof(uint64_t));
156 #endif /* !__NetBSD__ */
157 #endif
158 
159 /*
160  * This is do_hvis, for HTTP style (RFC 1808)
161  */
162 static wchar_t *
do_hvis(wchar_t * dst,wint_t c,int flags,wint_t nextc,const wchar_t * extra)163 do_hvis(wchar_t *dst, wint_t c, int flags, wint_t nextc, const wchar_t *extra)
164 {
165 	if (iswalnum(c)
166 	    /* safe */
167 	    || c == L'$' || c == L'-' || c == L'_' || c == L'.' || c == L'+'
168 	    /* extra */
169 	    || c == L'!' || c == L'*' || c == L'\'' || c == L'(' || c == L')'
170 	    || c == L',')
171 		dst = do_svis(dst, c, flags, nextc, extra);
172 	else {
173 		*dst++ = L'%';
174 		*dst++ = xtoa(((unsigned int)c >> 4) & 0xf);
175 		*dst++ = xtoa((unsigned int)c & 0xf);
176 	}
177 
178 	return dst;
179 }
180 
181 /*
182  * This is do_mvis, for Quoted-Printable MIME (RFC 2045)
183  * NB: No handling of long lines or CRLF.
184  */
185 static wchar_t *
do_mvis(wchar_t * dst,wint_t c,int flags,wint_t nextc,const wchar_t * extra)186 do_mvis(wchar_t *dst, wint_t c, int flags, wint_t nextc, const wchar_t *extra)
187 {
188 	if ((c != L'\n') &&
189 	    /* Space at the end of the line */
190 	    ((iswspace(c) && (nextc == L'\r' || nextc == L'\n')) ||
191 	    /* Out of range */
192 	    (!iswspace(c) && (c < 33 || (c > 60 && c < 62) || c > 126)) ||
193 	    /* Specific char to be escaped */
194 	    wcschr(L"#$@[\\]^`{|}~", c) != NULL)) {
195 		*dst++ = L'=';
196 		*dst++ = XTOA(((unsigned int)c >> 4) & 0xf);
197 		*dst++ = XTOA((unsigned int)c & 0xf);
198 	} else
199 		dst = do_svis(dst, c, flags, nextc, extra);
200 	return dst;
201 }
202 
203 /*
204  * Output single byte of multibyte character.
205  */
206 static wchar_t *
do_mbyte(wchar_t * dst,wint_t c,int flags,wint_t nextc,int iswextra)207 do_mbyte(wchar_t *dst, wint_t c, int flags, wint_t nextc, int iswextra)
208 {
209 	if (flags & VIS_CSTYLE) {
210 		switch (c) {
211 		case L'\n':
212 			*dst++ = L'\\'; *dst++ = L'n';
213 			return dst;
214 		case L'\r':
215 			*dst++ = L'\\'; *dst++ = L'r';
216 			return dst;
217 		case L'\b':
218 			*dst++ = L'\\'; *dst++ = L'b';
219 			return dst;
220 		case BELL:
221 			*dst++ = L'\\'; *dst++ = L'a';
222 			return dst;
223 		case L'\v':
224 			*dst++ = L'\\'; *dst++ = L'v';
225 			return dst;
226 		case L'\t':
227 			*dst++ = L'\\'; *dst++ = L't';
228 			return dst;
229 		case L'\f':
230 			*dst++ = L'\\'; *dst++ = L'f';
231 			return dst;
232 		case L' ':
233 			*dst++ = L'\\'; *dst++ = L's';
234 			return dst;
235 		case L'\0':
236 			*dst++ = L'\\'; *dst++ = L'0';
237 			if (iswoctal(nextc)) {
238 				*dst++ = L'0';
239 				*dst++ = L'0';
240 			}
241 			return dst;
242 		/* We cannot encode these characters in VIS_CSTYLE
243 		 * because they special meaning */
244 		case L'n':
245 		case L'r':
246 		case L'b':
247 		case L'a':
248 		case L'v':
249 		case L't':
250 		case L'f':
251 		case L's':
252 		case L'0':
253 		case L'M':
254 		case L'^':
255 		case L'$': /* vis(1) -l */
256 			break;
257 		default:
258 			if (ISGRAPH(flags, c) && !iswoctal(c)) {
259 				*dst++ = L'\\';
260 				*dst++ = c;
261 				return dst;
262 			}
263 		}
264 	}
265 	if (iswextra || ((c & 0177) == L' ') || (flags & VIS_OCTAL)) {
266 		*dst++ = L'\\';
267 		*dst++ = (u_char)(((u_int32_t)(u_char)c >> 6) & 03) + L'0';
268 		*dst++ = (u_char)(((u_int32_t)(u_char)c >> 3) & 07) + L'0';
269 		*dst++ =			     (c	      & 07) + L'0';
270 	} else {
271 		if ((flags & VIS_NOSLASH) == 0)
272 			*dst++ = L'\\';
273 
274 		if (c & 0200) {
275 			c &= 0177;
276 			*dst++ = L'M';
277 		}
278 
279 		if (iswcntrl(c)) {
280 			*dst++ = L'^';
281 			if (c == 0177)
282 				*dst++ = L'?';
283 			else
284 				*dst++ = c + L'@';
285 		} else {
286 			*dst++ = L'-';
287 			*dst++ = c;
288 		}
289 	}
290 
291 	return dst;
292 }
293 
294 /*
295  * This is do_vis, the central code of vis.
296  * dst:	      Pointer to the destination buffer
297  * c:	      Character to encode
298  * flags:     Flags word
299  * nextc:     The character following 'c'
300  * extra:     Pointer to the list of extra characters to be
301  *	      backslash-protected.
302  */
303 static wchar_t *
do_svis(wchar_t * dst,wint_t c,int flags,wint_t nextc,const wchar_t * extra)304 do_svis(wchar_t *dst, wint_t c, int flags, wint_t nextc, const wchar_t *extra)
305 {
306 	int iswextra, i, shft;
307 	uint64_t bmsk, wmsk;
308 
309 	iswextra = wcschr(extra, c) != NULL;
310 	if (!iswextra && (ISGRAPH(flags, c) || iswwhite(c) ||
311 	    ((flags & VIS_SAFE) && iswsafe(c)))) {
312 		*dst++ = c;
313 		return dst;
314 	}
315 
316 	/* See comment in istrsenvisx() output loop, below. */
317 	wmsk = 0;
318 	for (i = sizeof(wmsk) - 1; i >= 0; i--) {
319 		shft = i * NBBY;
320 		bmsk = (uint64_t)0xffLL << shft;
321 		wmsk |= bmsk;
322 		if ((c & wmsk) || i == 0)
323 			dst = do_mbyte(dst, (wint_t)(
324 			    (uint64_t)(c & bmsk) >> shft),
325 			    flags, nextc, iswextra);
326 	}
327 
328 	return dst;
329 }
330 
331 typedef wchar_t *(*visfun_t)(wchar_t *, wint_t, int, wint_t, const wchar_t *);
332 
333 /*
334  * Return the appropriate encoding function depending on the flags given.
335  */
336 static visfun_t
getvisfun(int flags)337 getvisfun(int flags)
338 {
339 	if (flags & VIS_HTTPSTYLE)
340 		return do_hvis;
341 	if (flags & VIS_MIMESTYLE)
342 		return do_mvis;
343 	return do_svis;
344 }
345 
346 /*
347  * Expand list of extra characters to not visually encode.
348  */
349 static wchar_t *
makeextralist(int flags,const char * src)350 makeextralist(int flags, const char *src)
351 {
352 	wchar_t *dst, *d;
353 	size_t len;
354 	const wchar_t *s;
355 	mbstate_t mbstate;
356 
357 	bzero(&mbstate, sizeof(mbstate));
358 	len = strlen(src);
359 	if ((dst = calloc(len + MAXEXTRAS, sizeof(*dst))) == NULL)
360 		return NULL;
361 
362 	memset(&mbstate, 0, sizeof(mbstate));
363 	if ((flags & VIS_NOLOCALE)
364 	    || mbsrtowcs(dst, &src, len, &mbstate) == (size_t)-1) {
365 		size_t i;
366 		for (i = 0; i < len; i++)
367 			dst[i] = (wchar_t)(u_char)src[i];
368 		d = dst + len;
369 	} else
370 		d = dst + wcslen(dst);
371 
372 	if (flags & VIS_GLOB)
373 		for (s = char_glob; *s; *d++ = *s++)
374 			continue;
375 
376 	if (flags & VIS_SHELL)
377 		for (s = char_shell; *s; *d++ = *s++)
378 			continue;
379 
380 	if (flags & VIS_SP) *d++ = L' ';
381 	if (flags & VIS_TAB) *d++ = L'\t';
382 	if (flags & VIS_NL) *d++ = L'\n';
383 	if (flags & VIS_DQ) *d++ = L'"';
384 	if ((flags & VIS_NOSLASH) == 0) *d++ = L'\\';
385 	*d = L'\0';
386 
387 	return dst;
388 }
389 
390 /*
391  * istrsenvisx()
392  * 	The main internal function.
393  *	All user-visible functions call this one.
394  */
395 static int
istrsenvisx(char ** mbdstp,size_t * dlen,const char * mbsrc,size_t mblength,int flags,const char * mbextra,int * cerr_ptr)396 istrsenvisx(char **mbdstp, size_t *dlen, const char *mbsrc, size_t mblength,
397     int flags, const char *mbextra, int *cerr_ptr)
398 {
399 	char mbbuf[MB_LEN_MAX];
400 	wchar_t *dst, *src, *pdst, *psrc, *start, *extra;
401 	size_t len, olen;
402 	uint64_t bmsk, wmsk;
403 	wint_t c;
404 	visfun_t f;
405 	int clen = 0, cerr, error = -1, i, shft;
406 	char *mbdst, *mbwrite, *mdst;
407 	size_t mbslength;
408 	size_t maxolen;
409 	mbstate_t mbstate;
410 
411 	_DIAGASSERT(mbdstp != NULL);
412 	_DIAGASSERT(mbsrc != NULL || mblength == 0);
413 	_DIAGASSERT(mbextra != NULL);
414 
415 	mbslength = mblength;
416 	/*
417 	 * When inputing a single character, must also read in the
418 	 * next character for nextc, the look-ahead character.
419 	 */
420 	if (mbslength == 1)
421 		mbslength++;
422 
423 	/*
424 	 * Input (mbsrc) is a char string considered to be multibyte
425 	 * characters.  The input loop will read this string pulling
426 	 * one character, possibly multiple bytes, from mbsrc and
427 	 * converting each to wchar_t in src.
428 	 *
429 	 * The vis conversion will be done using the wide char
430 	 * wchar_t string.
431 	 *
432 	 * This will then be converted back to a multibyte string to
433 	 * return to the caller.
434 	 */
435 
436 	/*
437 	 * Guarantee the arithmetic on input to calloc won't overflow.
438 	 */
439 	if (mbslength > (SIZE_MAX - 1)/16) {
440 		errno = ENOMEM;
441 		return -1;
442 	}
443 
444 	/* Allocate space for the wide char strings */
445 	psrc = pdst = extra = NULL;
446 	mdst = NULL;
447 	if ((psrc = calloc(mbslength + 1, sizeof(*psrc))) == NULL)
448 		return -1;
449 	if ((pdst = calloc((16 * mbslength) + 1, sizeof(*pdst))) == NULL)
450 		goto out;
451 	if (*mbdstp == NULL) {
452 		if ((mdst = calloc((16 * mbslength) + 1, sizeof(*mdst))) == NULL)
453 			goto out;
454 		*mbdstp = mdst;
455 	}
456 
457 	mbdst = *mbdstp;
458 	dst = pdst;
459 	src = psrc;
460 
461 	if (flags & VIS_NOLOCALE) {
462 		/* Do one byte at a time conversion */
463 		cerr = 1;
464 	} else {
465 		/* Use caller's multibyte conversion error flag. */
466 		cerr = cerr_ptr ? *cerr_ptr : 0;
467 	}
468 
469 	/*
470 	 * Input loop.
471 	 * Handle up to mblength characters (not bytes).  We do not
472 	 * stop at NULs because we may be processing a block of data
473 	 * that includes NULs.
474 	 */
475 	memset(&mbstate, 0, sizeof(mbstate));
476 	while (mbslength > 0) {
477 		/* Convert one multibyte character to wchar_t. */
478 		if (!cerr) {
479 			clen = mbrtowc(src, mbsrc,
480 			    (mbslength < MB_LEN_MAX
481 				? mbslength
482 				: MB_LEN_MAX),
483 			    &mbstate);
484 			assert(clen < 0 || (size_t)clen <= mbslength);
485 			assert(clen <= MB_LEN_MAX);
486 		}
487 		if (cerr || clen < 0) {
488 			/* Conversion error, process as a byte instead. */
489 			*src = (wint_t)(u_char)*mbsrc;
490 			clen = 1;
491 			cerr = 1;
492 		}
493 		if (clen == 0) {
494 			/*
495 			 * NUL in input gives 0 return value. process
496 			 * as single NUL byte and keep going.
497 			 */
498 			clen = 1;
499 		}
500 		/*
501 		 * Let n := MIN(mbslength, MB_LEN_MAX).  We have:
502 		 *
503 		 *	mbslength >= 1
504 		 *	mbrtowc(..., n, &mbstate) <= n,
505 		 *		by the contract of mbrtowc
506 		 *
507 		 *  clen is either
508 		 *  (a) mbrtowc(..., n, &mbstate), in which case
509 		 *      clen <= n <= mbslength; or
510 		 *  (b) 1, in which case clen = 1 <= mbslength.
511 		 */
512 		assert(clen > 0);
513 		assert((size_t)clen <= mbslength);
514 		/* Advance buffer character pointer. */
515 		src++;
516 		/* Advance input pointer by number of bytes read. */
517 		mbsrc += clen;
518 		/* Decrement input byte count. */
519 		mbslength -= clen;
520 	}
521 	len = src - psrc;
522 	src = psrc;
523 
524 	/*
525 	 * In the single character input case, we will have actually
526 	 * processed two characters, c and nextc.  Reset len back to
527 	 * just a single character.
528 	 */
529 	if (mblength < len)
530 		len = mblength;
531 
532 	/* Convert extra argument to list of characters for this mode. */
533 	extra = makeextralist(flags, mbextra);
534 	if (!extra) {
535 		if (dlen && *dlen == 0) {
536 			errno = ENOSPC;
537 			goto out;
538 		}
539 		*mbdst = '\0';	/* can't create extra, return "" */
540 		error = 0;
541 		goto out;
542 	}
543 
544 	/* Look up which processing function to call. */
545 	f = getvisfun(flags);
546 
547 	/*
548 	 * Main processing loop.
549 	 * Call do_Xvis processing function one character at a time
550 	 * with next character available for look-ahead.
551 	 */
552 	for (start = dst; len > 0; len--) {
553 		c = *src++;
554 		dst = (*f)(dst, c, flags, len >= 1 ? *src : L'\0', extra);
555 		if (dst == NULL) {
556 			errno = ENOSPC;
557 			goto out;
558 		}
559 	}
560 
561 	/* Terminate the string in the buffer. */
562 	*dst = L'\0';
563 
564 	/*
565 	 * Output loop.
566 	 * Convert wchar_t string back to multibyte output string.
567 	 * If we have hit a multi-byte conversion error on input,
568 	 * output byte-by-byte here.  Else use wctomb().
569 	 */
570 	len = wcslen(start);
571 	if (dlen) {
572 		maxolen = *dlen;
573 		if (maxolen == 0) {
574 			errno = ENOSPC;
575 			goto out;
576 		}
577 	} else {
578 		if (len > (SIZE_MAX - 1)/MB_LEN_MAX) {
579 			errno = ENOSPC;
580 			goto out;
581 		}
582 		maxolen = len*MB_LEN_MAX + 1;
583 	}
584 	olen = 0;
585 	memset(&mbstate, 0, sizeof(mbstate));
586 	for (dst = start; len > 0; len--) {
587 		if (!cerr) {
588 			/*
589 			 * If we have at least MB_CUR_MAX bytes in the buffer,
590 			 * we'll just do the conversion in-place into mbdst.  We
591 			 * need to be a little more conservative when we get to
592 			 * the end of the buffer, as we may not have MB_CUR_MAX
593 			 * bytes but we may not need it.
594 			 */
595 			if (maxolen - olen > MB_CUR_MAX)
596 				mbwrite = mbdst;
597 			else
598 				mbwrite = mbbuf;
599 			clen = wcrtomb(mbwrite, *dst, &mbstate);
600 			if (clen > 0 && mbwrite != mbdst) {
601 				/*
602 				 * Don't break past our output limit, noting
603 				 * that maxolen includes the nul terminator so
604 				 * we can't write past maxolen - 1 here.
605 				 */
606 				if (olen + clen >= maxolen) {
607 					errno = ENOSPC;
608 					goto out;
609 				}
610 
611 				memcpy(mbdst, mbwrite, clen);
612 			}
613 		}
614 		if (cerr || clen < 0) {
615 			/*
616 			 * Conversion error, process as a byte(s) instead.
617 			 * Examine each byte and higher-order bytes for
618 			 * data.  E.g.,
619 			 *	0x000000000000a264 -> a2 64
620 			 *	0x000000001f00a264 -> 1f 00 a2 64
621 			 */
622 			clen = 0;
623 			wmsk = 0;
624 			for (i = sizeof(wmsk) - 1; i >= 0; i--) {
625 				shft = i * NBBY;
626 				bmsk = (uint64_t)0xffLL << shft;
627 				wmsk |= bmsk;
628 				if ((*dst & wmsk) || i == 0) {
629 					if (olen + clen + 1 >= maxolen) {
630 						errno = ENOSPC;
631 						goto out;
632 					}
633 
634 					mbdst[clen++] = (char)(
635 					    (uint64_t)(*dst & bmsk) >>
636 					    shft);
637 				}
638 			}
639 			cerr = 1;
640 		}
641 
642 		/*
643 		 * We'll be dereferencing mbdst[clen] after this to write the
644 		 * nul terminator; the above paths should have checked for a
645 		 * possible overflow already.
646 		 */
647 		assert(olen + clen < maxolen);
648 
649 		/* Advance output pointer by number of bytes written. */
650 		mbdst += clen;
651 		/* Advance buffer character pointer. */
652 		dst++;
653 		/* Incrment output character count. */
654 		olen += clen;
655 	}
656 
657 	/* Terminate the output string. */
658 	assert(olen < maxolen);
659 	*mbdst = '\0';
660 
661 	if (flags & VIS_NOLOCALE) {
662 		/* Pass conversion error flag out. */
663 		if (cerr_ptr)
664 			*cerr_ptr = cerr;
665 	}
666 
667 	free(extra);
668 	free(pdst);
669 	free(psrc);
670 
671 	return (int)olen;
672 out:
673 	free(extra);
674 	free(pdst);
675 	free(psrc);
676 	free(mdst);
677 	return error;
678 }
679 
680 static int
istrsenvisxl(char ** mbdstp,size_t * dlen,const char * mbsrc,int flags,const char * mbextra,int * cerr_ptr)681 istrsenvisxl(char **mbdstp, size_t *dlen, const char *mbsrc,
682     int flags, const char *mbextra, int *cerr_ptr)
683 {
684 	return istrsenvisx(mbdstp, dlen, mbsrc,
685 	    mbsrc != NULL ? strlen(mbsrc) : 0, flags, mbextra, cerr_ptr);
686 }
687 
688 #endif
689 
690 #if !HAVE_SVIS
691 /*
692  *	The "svis" variants all take an "extra" arg that is a pointer
693  *	to a NUL-terminated list of characters to be encoded, too.
694  *	These functions are useful e. g. to encode strings in such a
695  *	way so that they are not interpreted by a shell.
696  */
697 
698 char *
svis(char * mbdst,int c,int flags,int nextc,const char * mbextra)699 svis(char *mbdst, int c, int flags, int nextc, const char *mbextra)
700 {
701 	char cc[2];
702 	int ret;
703 
704 	cc[0] = c;
705 	cc[1] = nextc;
706 
707 	ret = istrsenvisx(&mbdst, NULL, cc, 1, flags, mbextra, NULL);
708 	if (ret < 0)
709 		return NULL;
710 	return mbdst + ret;
711 }
712 
713 char *
snvis(char * mbdst,size_t dlen,int c,int flags,int nextc,const char * mbextra)714 snvis(char *mbdst, size_t dlen, int c, int flags, int nextc, const char *mbextra)
715 {
716 	char cc[2];
717 	int ret;
718 
719 	cc[0] = c;
720 	cc[1] = nextc;
721 
722 	ret = istrsenvisx(&mbdst, &dlen, cc, 1, flags, mbextra, NULL);
723 	if (ret < 0)
724 		return NULL;
725 	return mbdst + ret;
726 }
727 
728 int
strsvis(char * mbdst,const char * mbsrc,int flags,const char * mbextra)729 strsvis(char *mbdst, const char *mbsrc, int flags, const char *mbextra)
730 {
731 	return istrsenvisxl(&mbdst, NULL, mbsrc, flags, mbextra, NULL);
732 }
733 
734 int
strsnvis(char * mbdst,size_t dlen,const char * mbsrc,int flags,const char * mbextra)735 strsnvis(char *mbdst, size_t dlen, const char *mbsrc, int flags, const char *mbextra)
736 {
737 	return istrsenvisxl(&mbdst, &dlen, mbsrc, flags, mbextra, NULL);
738 }
739 
740 int
strsvisx(char * mbdst,const char * mbsrc,size_t len,int flags,const char * mbextra)741 strsvisx(char *mbdst, const char *mbsrc, size_t len, int flags, const char *mbextra)
742 {
743 	return istrsenvisx(&mbdst, NULL, mbsrc, len, flags, mbextra, NULL);
744 }
745 
746 int
strsnvisx(char * mbdst,size_t dlen,const char * mbsrc,size_t len,int flags,const char * mbextra)747 strsnvisx(char *mbdst, size_t dlen, const char *mbsrc, size_t len, int flags,
748     const char *mbextra)
749 {
750 	return istrsenvisx(&mbdst, &dlen, mbsrc, len, flags, mbextra, NULL);
751 }
752 
753 int
strsenvisx(char * mbdst,size_t dlen,const char * mbsrc,size_t len,int flags,const char * mbextra,int * cerr_ptr)754 strsenvisx(char *mbdst, size_t dlen, const char *mbsrc, size_t len, int flags,
755     const char *mbextra, int *cerr_ptr)
756 {
757 	return istrsenvisx(&mbdst, &dlen, mbsrc, len, flags, mbextra, cerr_ptr);
758 }
759 #endif
760 
761 #if !HAVE_VIS
762 /*
763  * vis - visually encode characters
764  */
765 char *
vis(char * mbdst,int c,int flags,int nextc)766 vis(char *mbdst, int c, int flags, int nextc)
767 {
768 	char cc[2];
769 	int ret;
770 
771 	cc[0] = c;
772 	cc[1] = nextc;
773 
774 	ret = istrsenvisx(&mbdst, NULL, cc, 1, flags, "", NULL);
775 	if (ret < 0)
776 		return NULL;
777 	return mbdst + ret;
778 }
779 
780 char *
nvis(char * mbdst,size_t dlen,int c,int flags,int nextc)781 nvis(char *mbdst, size_t dlen, int c, int flags, int nextc)
782 {
783 	char cc[2];
784 	int ret;
785 
786 	cc[0] = c;
787 	cc[1] = nextc;
788 
789 	ret = istrsenvisx(&mbdst, &dlen, cc, 1, flags, "", NULL);
790 	if (ret < 0)
791 		return NULL;
792 	return mbdst + ret;
793 }
794 
795 /*
796  * strvis - visually encode characters from src into dst
797  *
798  *	Dst must be 4 times the size of src to account for possible
799  *	expansion.  The length of dst, not including the trailing NULL,
800  *	is returned.
801  */
802 
803 int
strvis(char * mbdst,const char * mbsrc,int flags)804 strvis(char *mbdst, const char *mbsrc, int flags)
805 {
806 	return istrsenvisxl(&mbdst, NULL, mbsrc, flags, "", NULL);
807 }
808 
809 int
strnvis(char * mbdst,size_t dlen,const char * mbsrc,int flags)810 strnvis(char *mbdst, size_t dlen, const char *mbsrc, int flags)
811 {
812 	return istrsenvisxl(&mbdst, &dlen, mbsrc, flags, "", NULL);
813 }
814 
815 int
stravis(char ** mbdstp,const char * mbsrc,int flags)816 stravis(char **mbdstp, const char *mbsrc, int flags)
817 {
818 	*mbdstp = NULL;
819 	return istrsenvisxl(mbdstp, NULL, mbsrc, flags, "", NULL);
820 }
821 
822 /*
823  * strvisx - visually encode characters from src into dst
824  *
825  *	Dst must be 4 times the size of src to account for possible
826  *	expansion.  The length of dst, not including the trailing NULL,
827  *	is returned.
828  *
829  *	Strvisx encodes exactly len characters from src into dst.
830  *	This is useful for encoding a block of data.
831  */
832 
833 int
strvisx(char * mbdst,const char * mbsrc,size_t len,int flags)834 strvisx(char *mbdst, const char *mbsrc, size_t len, int flags)
835 {
836 	return istrsenvisx(&mbdst, NULL, mbsrc, len, flags, "", NULL);
837 }
838 
839 int
strnvisx(char * mbdst,size_t dlen,const char * mbsrc,size_t len,int flags)840 strnvisx(char *mbdst, size_t dlen, const char *mbsrc, size_t len, int flags)
841 {
842 	return istrsenvisx(&mbdst, &dlen, mbsrc, len, flags, "", NULL);
843 }
844 
845 int
strenvisx(char * mbdst,size_t dlen,const char * mbsrc,size_t len,int flags,int * cerr_ptr)846 strenvisx(char *mbdst, size_t dlen, const char *mbsrc, size_t len, int flags,
847     int *cerr_ptr)
848 {
849 	return istrsenvisx(&mbdst, &dlen, mbsrc, len, flags, "", cerr_ptr);
850 }
851 #endif
852