1 /*	$NetBSD: printable.c,v 1.3 2020/03/18 19:05:22 christos Exp $	*/
2 
3 /*++
4 /* NAME
5 /*	printable 3
6 /* SUMMARY
7 /*	mask non-printable characters
8 /* SYNOPSIS
9 /*	#include <stringops.h>
10 /*
11 /*	int	util_utf8_enable;
12 /*
13 /*	char	*printable(buffer, replacement)
14 /*	char	*buffer;
15 /*	int	replacement;
16 /*
17 /*	char	*printable_except(buffer, replacement, except)
18 /*	char	*buffer;
19 /*	int	replacement;
20 /*	const char *except;
21 /* DESCRIPTION
22 /*	printable() replaces non-printable characters
23 /*	in its input with the given replacement.
24 /*
25 /*	util_utf8_enable controls whether UTF8 is considered printable.
26 /*	With util_utf8_enable equal to zero, non-ASCII text is replaced.
27 /*
28 /*	Arguments:
29 /* .IP buffer
30 /*	The null-terminated input string.
31 /* .IP replacement
32 /*	Replacement value for characters in \fIbuffer\fR that do not
33 /*	pass the ASCII isprint(3) test or that are not valid UTF8.
34 /* .IP except
35 /*	Null-terminated sequence of non-replaced ASCII characters.
36 /* LICENSE
37 /* .ad
38 /* .fi
39 /*	The Secure Mailer license must be distributed with this software.
40 /* AUTHOR(S)
41 /*	Wietse Venema
42 /*	IBM T.J. Watson Research
43 /*	P.O. Box 704
44 /*	Yorktown Heights, NY 10598, USA
45 /*
46 /*	Wietse Venema
47 /*	Google, Inc.
48 /*	111 8th Avenue
49 /*	New York, NY 10011, USA
50 /*--*/
51 
52 /* System library. */
53 
54 #include "sys_defs.h"
55 #include <ctype.h>
56 #include <string.h>
57 
58 /* Utility library. */
59 
60 #include "stringops.h"
61 
62 int util_utf8_enable = 0;
63 
64 /* printable -  binary compatibility */
65 
66 #undef printable
67 
68 char   *printable(char *, int);
69 
printable(char * string,int replacement)70 char   *printable(char *string, int replacement)
71 {
72     return (printable_except(string, replacement, (char *) 0));
73 }
74 
75 /* printable_except -  pass through printable or other preserved characters */
76 
printable_except(char * string,int replacement,const char * except)77 char   *printable_except(char *string, int replacement, const char *except)
78 {
79     unsigned char *cp;
80     int     ch;
81 
82     /*
83      * XXX Replace invalid UTF8 sequences (too short, over-long encodings,
84      * out-of-range code points, etc). See valid_utf8_string.c.
85      */
86     cp = (unsigned char *) string;
87     while ((ch = *cp) != 0) {
88 	if (ISASCII(ch) && (ISPRINT(ch) || (except && strchr(except, ch)))) {
89 	    /* ok */
90 	} else if (util_utf8_enable && ch >= 194 && ch <= 254
91 		   && cp[1] >= 128 && cp[1] < 192) {
92 	    /* UTF8; skip the rest of the bytes in the character. */
93 	    while (cp[1] >= 128 && cp[1] < 192)
94 		cp++;
95 	} else {
96 	    /* Not ASCII and not UTF8. */
97 	    *cp = replacement;
98 	}
99 	cp++;
100     }
101     return (string);
102 }
103