xref: /dragonfly/lib/libc/gen/vis.3 (revision ef2b2b9d)
1.\"	$NetBSD: vis.3,v 1.49 2017/08/05 20:22:29 wiz Exp $
2.\"
3.\" Copyright (c) 1989, 1991, 1993
4.\"	The Regents of the University of California.  All rights reserved.
5.\"
6.\" Redistribution and use in source and binary forms, with or without
7.\" modification, are permitted provided that the following conditions
8.\" are met:
9.\" 1. Redistributions of source code must retain the above copyright
10.\"    notice, this list of conditions and the following disclaimer.
11.\" 2. Redistributions in binary form must reproduce the above copyright
12.\"    notice, this list of conditions and the following disclaimer in the
13.\"    documentation and/or other materials provided with the distribution.
14.\" 3. Neither the name of the University nor the names of its contributors
15.\"    may be used to endorse or promote products derived from this software
16.\"    without specific prior written permission.
17.\"
18.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
19.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21.\" ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
22.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28.\" SUCH DAMAGE.
29.\"
30.\"     @(#)vis.3	8.1 (Berkeley) 6/9/93
31.\"
32.Dd June 10, 2018
33.Dt VIS 3
34.Os
35.Sh NAME
36.Nm vis ,
37.Nm nvis ,
38.Nm strvis ,
39.Nm stravis ,
40.Nm strnvis ,
41.Nm strvisx ,
42.Nm strnvisx ,
43.Nm strenvisx ,
44.Nm svis ,
45.Nm snvis ,
46.Nm strsvis ,
47.Nm strsnvis ,
48.Nm strsvisx ,
49.Nm strsnvisx ,
50.Nm strsenvisx
51.Nd visually encode characters
52.Sh LIBRARY
53.Lb libc
54.Sh SYNOPSIS
55.In vis.h
56.Ft char *
57.Fn vis "char *dst" "int c" "int flag" "int nextc"
58.Ft char *
59.Fn nvis "char *dst" "size_t dlen" "int c" "int flag" "int nextc"
60.Ft int
61.Fn strvis "char *dst" "const char *src" "int flag"
62.Ft int
63.Fn stravis "char **dst" "const char *src" "int flag"
64.Ft int
65.Fn strnvis "char *dst" "const char *src" "size_t len" "int flag"
66.Ft int
67.Fn strvisx "char *dst" "const char *src" "size_t len" "int flag"
68.Ft int
69.Fn strnvisx "char *dst" "size_t dlen" "const char *src" "size_t len" "int flag"
70.Ft int
71.Fn strenvisx "char *dst" "size_t dlen" "const char *src" "size_t len" "int flag" "int *cerr_ptr"
72.Ft char *
73.Fn svis "char *dst" "int c" "int flag" "int nextc" "const char *extra"
74.Ft char *
75.Fn snvis "char *dst" "size_t dlen" "int c" "int flag" "int nextc" "const char *extra"
76.Ft int
77.Fn strsvis "char *dst" "const char *src" "int flag" "const char *extra"
78.Ft int
79.Fn strsnvis "char *dst" "size_t dlen" "const char *src" "int flag" "const char *extra"
80.Ft int
81.Fn strsvisx "char *dst" "const char *src" "size_t len" "int flag" "const char *extra"
82.Ft int
83.Fn strsnvisx "char *dst" "size_t dlen" "const char *src" "size_t len" "int flag" "const char *extra"
84.Ft int
85.Fn strsenvisx "char *dst" "size_t dlen" "const char *src" "size_t len" "int flag" "const char *extra" "int *cerr_ptr"
86.Sh DESCRIPTION
87The
88.Fn vis
89function
90copies into
91.Fa dst
92a string which represents the character
93.Fa c .
94If
95.Fa c
96needs no encoding, it is copied in unaltered.
97The string is null terminated, and a pointer to the end of the string is
98returned.
99The maximum length of any encoding is four
100bytes (not including the trailing
101.Dv NUL ) ;
102thus, when
103encoding a set of characters into a buffer, the size of the buffer should
104be four times the number of bytes encoded, plus one for the trailing
105.Dv NUL .
106The
107.Fa flag
108parameter is used for altering the default range of
109characters considered for encoding and for altering the visual
110representation.
111The additional character,
112.Fa nextc ,
113is only used when selecting the
114.Dv VIS_CSTYLE
115encoding format (explained below).
116.Pp
117The
118.Fn strvis ,
119.Fn stravis ,
120.Fn strnvis ,
121.Fn strvisx ,
122and
123.Fn strnvisx
124functions copy into
125.Fa dst
126a visual representation of
127the string
128.Fa src .
129The
130.Fn strvis
131and
132.Fn strnvis
133functions encode characters from
134.Fa src
135up to the
136first
137.Dv NUL .
138The
139.Fn strvisx
140and
141.Fn strnvisx
142functions encode exactly
143.Fa len
144characters from
145.Fa src
146(this
147is useful for encoding a block of data that may contain
148.Dv NUL Ns 's ) .
149Both forms
150.Dv NUL
151terminate
152.Fa dst .
153The size of
154.Fa dst
155must be four times the number
156of bytes encoded from
157.Fa src
158(plus one for the
159.Dv NUL ) .
160Both
161forms return the number of characters in
162.Fa dst
163(not including the trailing
164.Dv NUL ) .
165The
166.Fn stravis
167function allocates space dynamically to hold the string.
168The
169.Dq Nm n
170versions of the functions also take an additional argument
171.Fa dlen
172that indicates the length of the
173.Fa dst
174buffer.
175If
176.Fa dlen
177is not large enough to fit the converted string then the
178.Fn strnvis
179and
180.Fn strnvisx
181functions return \-1 and set
182.Va errno
183to
184.Er ENOSPC .
185The
186.Fn strenvisx
187function takes an additional argument,
188.Fa cerr_ptr ,
189that is used to pass in and out a multibyte conversion error flag.
190This is useful when processing single characters at a time when
191it is possible that the locale may be set to something other
192than the locale of the characters in the input data.
193.Pp
194The functions
195.Fn svis ,
196.Fn snvis ,
197.Fn strsvis ,
198.Fn strsnvis ,
199.Fn strsvisx ,
200.Fn strsnvisx ,
201and
202.Fn strsenvisx
203correspond to
204.Fn vis ,
205.Fn nvis ,
206.Fn strvis ,
207.Fn strnvis ,
208.Fn strvisx ,
209.Fn strnvisx ,
210and
211.Fn strenvisx
212but have an additional argument
213.Fa extra ,
214pointing to a
215.Dv NUL
216terminated list of characters.
217These characters will be copied encoded or backslash-escaped into
218.Fa dst .
219These functions are useful e.g. to remove the special meaning
220of certain characters to shells.
221.Pp
222The encoding is a unique, invertible representation composed entirely of
223graphic characters; it can be decoded back into the original form using
224the
225.Xr unvis 3 ,
226.Xr strunvis 3
227or
228.Xr strnunvis 3
229functions.
230.Pp
231There are two parameters that can be controlled: the range of
232characters that are encoded (applies only to
233.Fn vis ,
234.Fn nvis ,
235.Fn strvis ,
236.Fn strnvis ,
237.Fn strvisx ,
238and
239.Fn strnvisx ) ,
240and the type of representation used.
241By default, all non-graphic characters,
242except space, tab, and newline are encoded (see
243.Xr isgraph 3 ) .
244The following flags
245alter this:
246.Bl -tag -width ".Dv VIS_HTTPSTYLE"
247.It Dv VIS_ALL
248Encode all characters, whether visible or not.
249.It Dv VIS_DQ
250Also encode double quotes.
251.It Dv VIS_GLOB
252Also encode the magic characters
253.Ql ( * ,
254.Ql \&? ,
255.Ql \&[ ,
256and
257.Ql # )
258recognized by
259.Xr glob 3 .
260.It Dv VIS_SHELL
261Also encode the meta characters used by shells (in addition to the glob
262characters):
263.Ql ( ' ,
264.Ql ` ,
265.Ql \&" ,
266.Ql \&; ,
267.Ql & ,
268.Ql < ,
269.Ql > ,
270.Ql \&( ,
271.Ql \&) ,
272.Ql \&| ,
273.Ql \&] ,
274.Ql \e ,
275.Ql $ ,
276.Ql \&! ,
277.Ql \&^ ,
278and
279.Ql ~ ) .
280.It Dv VIS_SP
281Also encode space.
282.It Dv VIS_TAB
283Also encode tab.
284.It Dv VIS_NL
285Also encode newline.
286.It Dv VIS_WHITE
287Synonym for
288.Dv VIS_SP | VIS_TAB | VIS_NL .
289.It Dv VIS_META
290Synonym for
291.Dv VIS_WHITE | VIS_GLOB | VIS_SHELL .
292.It Dv VIS_SAFE
293Only encode
294.Dq unsafe
295characters.
296Unsafe means control characters which may cause common terminals to perform
297unexpected functions.
298Currently this form allows space, tab, newline, backspace, bell, and
299return \(em in addition to all graphic characters \(em unencoded.
300.El
301.Pp
302(The above flags have no effect for
303.Fn svis ,
304.Fn snvis ,
305.Fn strsvis ,
306.Fn strsnvis ,
307.Fn strsvisx ,
308and
309.Fn strsnvisx .
310When using these functions, place all graphic characters to be
311encoded in an array pointed to by
312.Fa extra .
313In general, the backslash character should be included in this array, see the
314warning on the use of the
315.Dv VIS_NOSLASH
316flag below).
317.Pp
318There are six forms of encoding.
319All forms use the backslash character
320.Ql \e
321to introduce a special
322sequence; two backslashes are used to represent a real backslash,
323except
324.Dv VIS_HTTPSTYLE
325that uses
326.Ql % ,
327or
328.Dv VIS_MIMESTYLE
329that uses
330.Ql = .
331These are the visual formats:
332.Bl -tag -width ".Dv VIS_HTTPSTYLE"
333.It (default)
334Use an
335.Ql M
336to represent meta characters (characters with the 8th
337bit set), and use caret
338.Ql ^
339to represent control characters (see
340.Xr iscntrl 3 ) .
341The following formats are used:
342.Bl -tag -width xxxxx
343.It Dv \e^C
344Represents the control character
345.Ql C .
346Spans characters
347.Ql \e000
348through
349.Ql \e037 ,
350and
351.Ql \e177
352(as
353.Ql \e^? ) .
354.It Dv \eM-C
355Represents character
356.Ql C
357with the 8th bit set.
358Spans characters
359.Ql \e241
360through
361.Ql \e376 .
362.It Dv \eM^C
363Represents control character
364.Ql C
365with the 8th bit set.
366Spans characters
367.Ql \e200
368through
369.Ql \e237 ,
370and
371.Ql \e377
372(as
373.Ql \eM^? ) .
374.It Dv \e040
375Represents
376.Tn ASCII
377space.
378.It Dv \e240
379Represents Meta-space.
380.El
381.It Dv VIS_CSTYLE
382Use C-style backslash sequences to represent standard non-printable
383characters.
384The following sequences are used to represent the indicated characters:
385.Pp
386.Bl -tag -width ".Li \e0" -offset indent -compact
387.It Li \ea
388.Dv BEL No (007)
389.It Li \eb
390.Dv BS No (010)
391.It Li \ef
392.Dv NP No (014)
393.It Li \en
394.Dv NL No (012)
395.It Li \er
396.Dv CR No (015)
397.It Li \et
398.Dv HT No (011)
399.It Li \ev
400.Dv VT No (013)
401.It Li \e0
402.Dv NUL No (000)
403.El
404.Pp
405When using this format, the
406.Fa nextc
407parameter is looked at to determine if a
408.Dv NUL
409character can be encoded as
410.Ql \e0
411instead of
412.Ql \e000 .
413If
414.Fa nextc
415is an octal digit, the latter representation is used to
416avoid ambiguity.
417.Pp
418Non-printable characters without C-style
419backslash sequences use the default representation.
420.It Dv VIS_OCTAL
421Use a three digit octal sequence.
422The form is
423.Ql \eddd
424where
425.Em d
426represents an octal digit.
427.It Dv VIS_CSTYLE \&| Dv VIS_OCTAL
428Same as
429.Dv VIS_CSTYLE
430except that non-printable characters without C-style
431backslash sequences use a three digit octal sequence.
432.It Dv VIS_HTTPSTYLE
433Use URI encoding as described in RFC 1738.
434The form is
435.Ql %xx
436where
437.Em x
438represents a lower case hexadecimal digit.
439.It Dv VIS_MIMESTYLE
440Use MIME Quoted-Printable encoding as described in RFC 2045, only don't
441break lines and don't handle CRLF.
442The form is
443.Ql =XX
444where
445.Em X
446represents an upper case hexadecimal digit.
447.El
448.Pp
449There is one additional flag,
450.Dv VIS_NOSLASH ,
451which inhibits the
452doubling of backslashes and the backslash before the default
453format (that is, control characters are represented by
454.Ql ^C
455and
456meta characters as
457.Ql M-C ) .
458With this flag set, the encoding is
459ambiguous and non-invertible.
460.Sh MULTIBYTE CHARACTER SUPPORT
461These functions support multibyte character input.
462The encoding conversion is influenced by the setting of the
463.Ev LC_CTYPE
464environment variable which defines the set of characters
465that can be copied without encoding.
466.Pp
467If
468.Dv VIS_NOLOCALE
469is set, processing is done assuming the C locale and overriding
470any other environment settings.
471.Pp
472When 8-bit data is present in the input,
473.Ev LC_CTYPE
474must be set to the correct locale or to the C locale.
475If the locales of the data and the conversion are mismatched,
476multibyte character recognition may fail and encoding will be performed
477byte-by-byte instead.
478.Pp
479As noted above,
480.Fa dst
481must be four times the number of bytes processed from
482.Fa src .
483But note that each multibyte character can be up to
484.Dv MB_LEN_MAX
485bytes
486.\" (see
487.\" .Xr multibyte 3 )
488so in terms of multibyte characters,
489.Fa dst
490must be four times
491.Dv MB_LEN_MAX
492times the number of characters processed from
493.Fa src .
494.Sh ENVIRONMENT
495.Bl -tag -width ".Ev LC_CTYPE"
496.It Ev LC_CTYPE
497Specify the locale of the input data.
498Set to C if the input data locale is unknown.
499.El
500.Sh ERRORS
501The functions
502.Fn nvis
503and
504.Fn snvis
505will return
506.Dv NULL
507and the functions
508.Fn strnvis ,
509.Fn strnvisx ,
510.Fn strsnvis ,
511and
512.Fn strsnvisx ,
513will return \-1 when the
514.Fa dlen
515destination buffer size is not enough to perform the conversion while
516setting
517.Va errno
518to:
519.Bl -tag -width ".Bq Er ENOSPC"
520.It Bq Er ENOSPC
521The destination buffer size is not large enough to perform the conversion.
522.El
523.Sh SEE ALSO
524.Xr unvis 1 ,
525.Xr vis 1 ,
526.Xr glob 3 ,
527.\" .Xr multibyte 3 ,
528.Xr unvis 3
529.Rs
530.%A T. Berners-Lee
531.%T Uniform Resource Locators (URL)
532.%O "RFC 1738"
533.Re
534.Rs
535.%T "Multipurpose Internet Mail Extensions (MIME) Part One: Format of Internet Message Bodies"
536.%O "RFC 2045"
537.Re
538.Sh HISTORY
539The
540.Fn vis ,
541.Fn strvis ,
542and
543.Fn strvisx
544functions first appeared in
545.Bx 4.4 .
546The
547.Fn svis ,
548.Fn strsvis ,
549and
550.Fn strsvisx
551functions appeared in
552.Nx 1.5 .
553The buffer size limited versions of the functions
554.Po Fn nvis ,
555.Fn strnvis ,
556.Fn strnvisx ,
557.Fn snvis ,
558.Fn strsnvis ,
559and
560.Fn strsnvisx Pc
561appeared in
562.Nx 6.0
563and
564.Fx 9.2 .
565Multibyte character support was added in
566.Nx 7.0
567and
568.Fx 9.2 .
569The
570.Dv VIS_ALL
571flag first appeared in
572.Ox 4.9 .
573