xref: /freebsd/contrib/libc-vis/unvis.3 (revision 1d386b48)
1.\"	$NetBSD: unvis.3,v 1.30 2019/05/08 15:37:41 bad Exp $
2.\"
3.\" Copyright (c) 1989, 1991, 1993
4.\"	The Regents of the University of California.  All rights reserved.
5.\"
6.\" Redistribution and use in source and binary forms, with or without
7.\" modification, are permitted provided that the following conditions
8.\" are met:
9.\" 1. Redistributions of source code must retain the above copyright
10.\"    notice, this list of conditions and the following disclaimer.
11.\" 2. Redistributions in binary form must reproduce the above copyright
12.\"    notice, this list of conditions and the following disclaimer in the
13.\"    documentation and/or other materials provided with the distribution.
14.\" 3. Neither the name of the University nor the names of its contributors
15.\"    may be used to endorse or promote products derived from this software
16.\"    without specific prior written permission.
17.\"
18.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
19.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21.\" ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
22.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28.\" SUCH DAMAGE.
29.\"
30.\"     @(#)unvis.3	8.2 (Berkeley) 12/11/93
31.\"
32.Dd May 8, 2019
33.Dt UNVIS 3
34.Os
35.Sh NAME
36.Nm unvis ,
37.Nm strunvis ,
38.Nm strnunvis ,
39.Nm strunvisx ,
40.Nm strnunvisx
41.Nd decode a visual representation of characters
42.Sh LIBRARY
43.Lb libc
44.Sh SYNOPSIS
45.In vis.h
46.Ft int
47.Fn unvis "char *cp" "int c" "int *astate" "int flag"
48.Ft int
49.Fn strunvis "char *dst" "const char *src"
50.Ft int
51.Fn strnunvis "char *dst" "size_t dlen" "const char *src"
52.Ft int
53.Fn strunvisx "char *dst" "const char *src" "int flag"
54.Ft int
55.Fn strnunvisx "char *dst" "size_t dlen" "const char *src" "int flag"
56.Sh DESCRIPTION
57The
58.Fn unvis ,
59.Fn strunvis
60and
61.Fn strunvisx
62functions
63are used to decode a visual representation of characters, as produced
64by the
65.Xr vis 3
66function, back into
67the original form.
68.Pp
69The
70.Fn unvis
71function is called with successive characters in
72.Ar c
73until a valid sequence is recognized, at which time the decoded
74character is available at the character pointed to by
75.Ar cp .
76.Pp
77The
78.Fn strunvis
79function decodes the characters pointed to by
80.Ar src
81into the buffer pointed to by
82.Ar dst .
83The
84.Fn strunvis
85function simply copies
86.Ar src
87to
88.Ar dst ,
89decoding any escape sequences along the way,
90and returns the number of characters placed into
91.Ar dst ,
92or \-1 if an
93invalid escape sequence was detected.
94The size of
95.Ar dst
96should be equal to the size of
97.Ar src
98(that is, no expansion takes place during decoding).
99.Pp
100The
101.Fn strunvisx
102and
103.Fn strnunvisx
104functions do the same as the
105.Fn strunvis
106and
107.Fn strnunvis
108functions,
109but take a flag that specifies the style the string
110.Ar src
111is encoded with.
112The meaning of the flag is the same as explained below for
113.Fn unvis .
114.Pp
115The
116.Fn unvis
117function implements a state machine that can be used to decode an
118arbitrary stream of bytes.
119All state associated with the bytes being decoded is stored outside the
120.Fn unvis
121function (that is, a pointer to the state is passed in), so
122calls decoding different streams can be freely intermixed.
123To start decoding a stream of bytes, first initialize an integer to zero.
124Call
125.Fn unvis
126with each successive byte, along with a pointer
127to this integer, and a pointer to a destination character.
128The
129.Fn unvis
130function has several return codes that must be handled properly.
131They are:
132.Bl -tag -width UNVIS_VALIDPUSH
133.It Li \&0 No (zero)
134Another character is necessary; nothing has been recognized yet.
135.It Dv UNVIS_VALID
136A valid character has been recognized and is available at the location
137pointed to by
138.Fa cp .
139.It Dv UNVIS_VALIDPUSH
140A valid character has been recognized and is available at the location
141pointed to by
142.Fa cp ;
143however, the character currently passed in should be passed in again.
144.It Dv UNVIS_NOCHAR
145A valid sequence was detected, but no character was produced.
146This return code is necessary to indicate a logical break between characters.
147.It Dv UNVIS_SYNBAD
148An invalid escape sequence was detected, or the decoder is in an unknown state.
149The decoder is placed into the starting state.
150.El
151.Pp
152When all bytes in the stream have been processed, call
153.Fn unvis
154one more time with flag set to
155.Dv UNVIS_END
156to extract any remaining character (the character passed in is ignored).
157.Pp
158The
159.Fa flag
160argument is also used to specify the encoding style of the source.
161If set to
162.Dv VIS_NOESCAPE
163.Fn unvis
164will not decode backslash escapes.
165If set to
166.Dv VIS_HTTPSTYLE
167or
168.Dv VIS_HTTP1808 ,
169.Fn unvis
170will decode URI strings as specified in RFC 1808.
171If set to
172.Dv VIS_HTTP1866 ,
173.Fn unvis
174will decode entity references and numeric character references
175as specified in RFC 1866.
176If set to
177.Dv VIS_MIMESTYLE ,
178.Fn unvis
179will decode MIME Quoted-Printable strings as specified in RFC 2045.
180If set to
181.Dv VIS_NOESCAPE ,
182.Fn unvis
183will not decode
184.Ql \e
185quoted characters.
186.Pp
187The following code fragment illustrates a proper use of
188.Fn unvis .
189.Bd -literal -offset indent
190int state = 0;
191char out;
192
193while ((ch = getchar()) != EOF) {
194again:
195	switch(unvis(&out, ch, &state, 0)) {
196	case 0:
197	case UNVIS_NOCHAR:
198		break;
199	case UNVIS_VALID:
200		(void)putchar(out);
201		break;
202	case UNVIS_VALIDPUSH:
203		(void)putchar(out);
204		goto again;
205	case UNVIS_SYNBAD:
206		errx(EXIT_FAILURE, "Bad character sequence!");
207	}
208}
209if (unvis(&out, '\e0', &state, UNVIS_END) == UNVIS_VALID)
210	(void)putchar(out);
211.Ed
212.Sh ERRORS
213The functions
214.Fn strunvis ,
215.Fn strnunvis ,
216.Fn strunvisx ,
217and
218.Fn strnunvisx
219will return \-1 on error and set
220.Va errno
221to:
222.Bl -tag -width Er
223.It Bq Er EINVAL
224An invalid escape sequence was detected, or the decoder is in an unknown state.
225.El
226.Pp
227In addition the functions
228.Fn strnunvis
229and
230.Fn strnunvisx
231will can also set
232.Va errno
233on error to:
234.Bl -tag -width Er
235.It Bq Er ENOSPC
236Not enough space to perform the conversion.
237.El
238.Sh SEE ALSO
239.Xr unvis 1 ,
240.Xr vis 1 ,
241.Xr vis 3
242.Rs
243.%A R. Fielding
244.%T Relative Uniform Resource Locators
245.%O RFC1808
246.Re
247.Sh HISTORY
248The
249.Fn unvis
250function
251first appeared in
252.Bx 4.4 .
253The
254.Fn strnunvis
255and
256.Fn strnunvisx
257functions appeared in
258.Nx 6.0
259and
260.Fx 9.2 .
261.Sh BUGS
262The names
263.Dv VIS_HTTP1808
264and
265.Dv VIS_HTTP1866
266are wrong.
267Percent-encoding was defined in RFC 1738, the original RFC for URL.
268RFC 1866 defines HTML 2.0, an application of SGML, from which it
269inherits concepts of numeric character references and entity
270references.
271