xref: /netbsd/lib/libc/gen/vis.c (revision 6550d01e)
1 /*	$NetBSD: vis.c,v 1.41 2009/11/23 10:08:47 plunky Exp $	*/
2 
3 /*-
4  * Copyright (c) 1989, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. Neither the name of the University nor the names of its contributors
16  *    may be used to endorse or promote products derived from this software
17  *    without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 
32 /*-
33  * Copyright (c) 1999, 2005 The NetBSD Foundation, Inc.
34  * All rights reserved.
35  *
36  * Redistribution and use in source and binary forms, with or without
37  * modification, are permitted provided that the following conditions
38  * are met:
39  * 1. Redistributions of source code must retain the above copyright
40  *    notice, this list of conditions and the following disclaimer.
41  * 2. Redistributions in binary form must reproduce the above copyright
42  *    notice, this list of conditions and the following disclaimer in the
43  *    documentation and/or other materials provided with the distribution.
44  *
45  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
46  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
47  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
48  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
49  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
50  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
51  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
52  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
53  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
54  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
55  * POSSIBILITY OF SUCH DAMAGE.
56  */
57 
58 #include <sys/cdefs.h>
59 #if defined(LIBC_SCCS) && !defined(lint)
60 __RCSID("$NetBSD: vis.c,v 1.41 2009/11/23 10:08:47 plunky Exp $");
61 #endif /* LIBC_SCCS and not lint */
62 
63 #include "namespace.h"
64 #include <sys/types.h>
65 
66 #include <assert.h>
67 #include <vis.h>
68 #include <stdlib.h>
69 
70 #ifdef __weak_alias
71 __weak_alias(strsvis,_strsvis)
72 __weak_alias(strsvisx,_strsvisx)
73 __weak_alias(strvis,_strvis)
74 __weak_alias(strvisx,_strvisx)
75 __weak_alias(svis,_svis)
76 __weak_alias(vis,_vis)
77 #endif
78 
79 #if !HAVE_VIS || !HAVE_SVIS
80 #include <ctype.h>
81 #include <limits.h>
82 #include <stdio.h>
83 #include <string.h>
84 
85 static char *do_svis(char *, int, int, int, const char *);
86 
87 #undef BELL
88 #define BELL '\a'
89 
90 #define isoctal(c)	(((u_char)(c)) >= '0' && ((u_char)(c)) <= '7')
91 #define iswhite(c)	(c == ' ' || c == '\t' || c == '\n')
92 #define issafe(c)	(c == '\b' || c == BELL || c == '\r')
93 #define xtoa(c)		"0123456789abcdef"[c]
94 #define XTOA(c)		"0123456789ABCDEF"[c]
95 
96 #define MAXEXTRAS	5
97 
98 #define MAKEEXTRALIST(flag, extra, orig_str)				      \
99 do {									      \
100 	const char *orig = orig_str;					      \
101 	const char *o = orig;						      \
102 	char *e;							      \
103 	while (*o++)							      \
104 		continue;						      \
105 	extra = malloc((size_t)((o - orig) + MAXEXTRAS));		      \
106 	if (!extra) break;						      \
107 	for (o = orig, e = extra; (*e++ = *o++) != '\0';)		      \
108 		continue;						      \
109 	e--;								      \
110 	if (flag & VIS_SP) *e++ = ' ';					      \
111 	if (flag & VIS_TAB) *e++ = '\t';				      \
112 	if (flag & VIS_NL) *e++ = '\n';					      \
113 	if ((flag & VIS_NOSLASH) == 0) *e++ = '\\';			      \
114 	*e = '\0';							      \
115 } while (/*CONSTCOND*/0)
116 
117 /*
118  * This is do_hvis, for HTTP style (RFC 1808)
119  */
120 static char *
121 do_hvis(char *dst, int c, int flag, int nextc, const char *extra)
122 {
123 
124 	if ((isascii(c) && isalnum(c))
125 	    /* safe */
126 	    || c == '$' || c == '-' || c == '_' || c == '.' || c == '+'
127 	    /* extra */
128 	    || c == '!' || c == '*' || c == '\'' || c == '(' || c == ')'
129 	    || c == ',') {
130 		dst = do_svis(dst, c, flag, nextc, extra);
131 	} else {
132 		*dst++ = '%';
133 		*dst++ = xtoa(((unsigned int)c >> 4) & 0xf);
134 		*dst++ = xtoa((unsigned int)c & 0xf);
135 	}
136 
137 	return dst;
138 }
139 
140 /*
141  * This is do_mvis, for Quoted-Printable MIME (RFC 2045)
142  * NB: No handling of long lines or CRLF.
143  */
144 static char *
145 do_mvis(char *dst, int c, int flag, int nextc, const char *extra)
146 {
147 	if ((c != '\n') &&
148 	    /* Space at the end of the line */
149 	    ((isspace(c) && (nextc == '\r' || nextc == '\n')) ||
150 	    /* Out of range */
151 	    (!isspace(c) && (c < 33 || (c > 60 && c < 62) || c > 126)) ||
152 	    /* Specific char to be escaped */
153 	    strchr("#$@[\\]^`{|}~", c) != NULL)) {
154 		*dst++ = '=';
155 		*dst++ = XTOA(((unsigned int)c >> 4) & 0xf);
156 		*dst++ = XTOA((unsigned int)c & 0xf);
157 	} else {
158 		dst = do_svis(dst, c, flag, nextc, extra);
159 	}
160 	return dst;
161 }
162 
163 /*
164  * This is do_vis, the central code of vis.
165  * dst:	      Pointer to the destination buffer
166  * c:	      Character to encode
167  * flag:      Flag word
168  * nextc:     The character following 'c'
169  * extra:     Pointer to the list of extra characters to be
170  *	      backslash-protected.
171  */
172 static char *
173 do_svis(char *dst, int c, int flag, int nextc, const char *extra)
174 {
175 	int isextra;
176 	isextra = strchr(extra, c) != NULL;
177 	if (!isextra && isascii(c) && (isgraph(c) || iswhite(c) ||
178 	    ((flag & VIS_SAFE) && issafe(c)))) {
179 		*dst++ = c;
180 		return dst;
181 	}
182 	if (flag & VIS_CSTYLE) {
183 		switch (c) {
184 		case '\n':
185 			*dst++ = '\\'; *dst++ = 'n';
186 			return dst;
187 		case '\r':
188 			*dst++ = '\\'; *dst++ = 'r';
189 			return dst;
190 		case '\b':
191 			*dst++ = '\\'; *dst++ = 'b';
192 			return dst;
193 		case BELL:
194 			*dst++ = '\\'; *dst++ = 'a';
195 			return dst;
196 		case '\v':
197 			*dst++ = '\\'; *dst++ = 'v';
198 			return dst;
199 		case '\t':
200 			*dst++ = '\\'; *dst++ = 't';
201 			return dst;
202 		case '\f':
203 			*dst++ = '\\'; *dst++ = 'f';
204 			return dst;
205 		case ' ':
206 			*dst++ = '\\'; *dst++ = 's';
207 			return dst;
208 		case '\0':
209 			*dst++ = '\\'; *dst++ = '0';
210 			if (isoctal(nextc)) {
211 				*dst++ = '0';
212 				*dst++ = '0';
213 			}
214 			return dst;
215 		default:
216 			if (isgraph(c)) {
217 				*dst++ = '\\'; *dst++ = c;
218 				return dst;
219 			}
220 		}
221 	}
222 	if (isextra || ((c & 0177) == ' ') || (flag & VIS_OCTAL)) {
223 		*dst++ = '\\';
224 		*dst++ = (u_char)(((u_int32_t)(u_char)c >> 6) & 03) + '0';
225 		*dst++ = (u_char)(((u_int32_t)(u_char)c >> 3) & 07) + '0';
226 		*dst++ =			     (c	      & 07) + '0';
227 	} else {
228 		if ((flag & VIS_NOSLASH) == 0) *dst++ = '\\';
229 		if (c & 0200) {
230 			c &= 0177; *dst++ = 'M';
231 		}
232 		if (iscntrl(c)) {
233 			*dst++ = '^';
234 			if (c == 0177)
235 				*dst++ = '?';
236 			else
237 				*dst++ = c + '@';
238 		} else {
239 			*dst++ = '-'; *dst++ = c;
240 		}
241 	}
242 	return dst;
243 }
244 
245 typedef char *(*visfun_t)(char *, int, int, int, const char *);
246 
247 /*
248  * Return the appropriate encoding function depending on the flags given.
249  */
250 static visfun_t
251 getvisfun(int flag)
252 {
253 	if (flag & VIS_HTTPSTYLE)
254 		return do_hvis;
255 	if (flag & VIS_MIMESTYLE)
256 		return do_mvis;
257 	return do_svis;
258 }
259 
260 /*
261  * svis - visually encode characters, also encoding the characters
262  *	  pointed to by `extra'
263  */
264 char *
265 svis(char *dst, int c, int flag, int nextc, const char *extra)
266 {
267 	char *nextra = NULL;
268 	visfun_t f;
269 
270 	_DIAGASSERT(dst != NULL);
271 	_DIAGASSERT(extra != NULL);
272 	MAKEEXTRALIST(flag, nextra, extra);
273 	if (!nextra) {
274 		*dst = '\0';		/* can't create nextra, return "" */
275 		return dst;
276 	}
277 	f = getvisfun(flag);
278 	dst = (*f)(dst, c, flag, nextc, nextra);
279 	free(nextra);
280 	*dst = '\0';
281 	return dst;
282 }
283 
284 
285 /*
286  * strsvis, strsvisx - visually encode characters from src into dst
287  *
288  *	Extra is a pointer to a \0-terminated list of characters to
289  *	be encoded, too. These functions are useful e. g. to
290  *	encode strings in such a way so that they are not interpreted
291  *	by a shell.
292  *
293  *	Dst must be 4 times the size of src to account for possible
294  *	expansion.  The length of dst, not including the trailing NULL,
295  *	is returned.
296  *
297  *	Strsvisx encodes exactly len bytes from src into dst.
298  *	This is useful for encoding a block of data.
299  */
300 int
301 strsvis(char *dst, const char *csrc, int flag, const char *extra)
302 {
303 	int c;
304 	char *start;
305 	char *nextra = NULL;
306 	const unsigned char *src = (const unsigned char *)csrc;
307 	visfun_t f;
308 
309 	_DIAGASSERT(dst != NULL);
310 	_DIAGASSERT(src != NULL);
311 	_DIAGASSERT(extra != NULL);
312 	MAKEEXTRALIST(flag, nextra, extra);
313 	if (!nextra) {
314 		*dst = '\0';		/* can't create nextra, return "" */
315 		return 0;
316 	}
317 	f = getvisfun(flag);
318 	for (start = dst; (c = *src++) != '\0'; /* empty */)
319 		dst = (*f)(dst, c, flag, *src, nextra);
320 	free(nextra);
321 	*dst = '\0';
322 	return (int)(dst - start);
323 }
324 
325 
326 int
327 strsvisx(char *dst, const char *csrc, size_t len, int flag, const char *extra)
328 {
329 	unsigned char c;
330 	char *start;
331 	char *nextra = NULL;
332 	const unsigned char *src = (const unsigned char *)csrc;
333 	visfun_t f;
334 
335 	_DIAGASSERT(dst != NULL);
336 	_DIAGASSERT(src != NULL);
337 	_DIAGASSERT(extra != NULL);
338 	MAKEEXTRALIST(flag, nextra, extra);
339 	if (! nextra) {
340 		*dst = '\0';		/* can't create nextra, return "" */
341 		return 0;
342 	}
343 
344 	f = getvisfun(flag);
345 	for (start = dst; len > 0; len--) {
346 		c = *src++;
347 		dst = (*f)(dst, c, flag, len > 1 ? *src : '\0', nextra);
348 	}
349 	free(nextra);
350 	*dst = '\0';
351 	return (int)(dst - start);
352 }
353 #endif
354 
355 #if !HAVE_VIS
356 /*
357  * vis - visually encode characters
358  */
359 char *
360 vis(char *dst, int c, int flag, int nextc)
361 {
362 	char *extra = NULL;
363 	unsigned char uc = (unsigned char)c;
364 	visfun_t f;
365 
366 	_DIAGASSERT(dst != NULL);
367 
368 	MAKEEXTRALIST(flag, extra, "");
369 	if (! extra) {
370 		*dst = '\0';		/* can't create extra, return "" */
371 		return dst;
372 	}
373 	f = getvisfun(flag);
374 	dst = (*f)(dst, uc, flag, nextc, extra);
375 	free(extra);
376 	*dst = '\0';
377 	return dst;
378 }
379 
380 
381 /*
382  * strvis, strvisx - visually encode characters from src into dst
383  *
384  *	Dst must be 4 times the size of src to account for possible
385  *	expansion.  The length of dst, not including the trailing NULL,
386  *	is returned.
387  *
388  *	Strvisx encodes exactly len bytes from src into dst.
389  *	This is useful for encoding a block of data.
390  */
391 int
392 strvis(char *dst, const char *src, int flag)
393 {
394 	char *extra = NULL;
395 	int rv;
396 
397 	MAKEEXTRALIST(flag, extra, "");
398 	if (!extra) {
399 		*dst = '\0';		/* can't create extra, return "" */
400 		return 0;
401 	}
402 	rv = strsvis(dst, src, flag, extra);
403 	free(extra);
404 	return rv;
405 }
406 
407 
408 int
409 strvisx(char *dst, const char *src, size_t len, int flag)
410 {
411 	char *extra = NULL;
412 	int rv;
413 
414 	MAKEEXTRALIST(flag, extra, "");
415 	if (!extra) {
416 		*dst = '\0';		/* can't create extra, return "" */
417 		return 0;
418 	}
419 	rv = strsvisx(dst, src, len, flag, extra);
420 	free(extra);
421 	return rv;
422 }
423 #endif
424