1 /*++
2 /* NAME
3 /*	printable 3
4 /* SUMMARY
5 /*	mask non-printable characters
6 /* SYNOPSIS
7 /*	#include <stringops.h>
8 /*
9 /*	int	util_utf8_enable;
10 /*
11 /*	char	*printable(buffer, replacement)
12 /*	char	*buffer;
13 /*	int	replacement;
14 /*
15 /*	char	*printable_except(buffer, replacement, except)
16 /*	char	*buffer;
17 /*	int	replacement;
18 /*	const char *except;
19 /* DESCRIPTION
20 /*	printable() replaces non-printable characters
21 /*	in its input with the given replacement.
22 /*
23 /*	util_utf8_enable controls whether UTF8 is considered printable.
24 /*	With util_utf8_enable equal to zero, non-ASCII text is replaced.
25 /*
26 /*	Arguments:
27 /* .IP buffer
28 /*	The null-terminated input string.
29 /* .IP replacement
30 /*	Replacement value for characters in \fIbuffer\fR that do not
31 /*	pass the ASCII isprint(3) test or that are not valid UTF8.
32 /* .IP except
33 /*	Null-terminated sequence of non-replaced ASCII characters.
34 /* LICENSE
35 /* .ad
36 /* .fi
37 /*	The Secure Mailer license must be distributed with this software.
38 /* AUTHOR(S)
39 /*	Wietse Venema
40 /*	IBM T.J. Watson Research
41 /*	P.O. Box 704
42 /*	Yorktown Heights, NY 10598, USA
43 /*
44 /*	Wietse Venema
45 /*	Google, Inc.
46 /*	111 8th Avenue
47 /*	New York, NY 10011, USA
48 /*--*/
49 
50 /* System library. */
51 
52 #include "sys_defs.h"
53 #include <ctype.h>
54 #include <string.h>
55 
56 /* Utility library. */
57 
58 #include "stringops.h"
59 
60 int util_utf8_enable = 0;
61 
62 /* printable -  binary compatibility */
63 
64 #undef printable
65 
66 char   *printable(char *, int);
67 
printable(char * string,int replacement)68 char   *printable(char *string, int replacement)
69 {
70     return (printable_except(string, replacement, (char *) 0));
71 }
72 
73 /* printable_except -  pass through printable or other preserved characters */
74 
printable_except(char * string,int replacement,const char * except)75 char   *printable_except(char *string, int replacement, const char *except)
76 {
77     unsigned char *cp;
78     int     ch;
79 
80     /*
81      * XXX Replace invalid UTF8 sequences (too short, over-long encodings,
82      * out-of-range code points, etc). See valid_utf8_string.c.
83      */
84     cp = (unsigned char *) string;
85     while ((ch = *cp) != 0) {
86 	if (ISASCII(ch) && (ISPRINT(ch) || (except && strchr(except, ch)))) {
87 	    /* ok */
88 	} else if (util_utf8_enable && ch >= 194 && ch <= 254
89 		   && cp[1] >= 128 && cp[1] < 192) {
90 	    /* UTF8; skip the rest of the bytes in the character. */
91 	    while (cp[1] >= 128 && cp[1] < 192)
92 		cp++;
93 	} else {
94 	    /* Not ASCII and not UTF8. */
95 	    *cp = replacement;
96 	}
97 	cp++;
98     }
99     return (string);
100 }
101