1.\" $NetBSD: unvis.3,v 1.29 2017/10/24 19:14:55 abhinav Exp $ 2.\" 3.\" Copyright (c) 1989, 1991, 1993 4.\" The Regents of the University of California. All rights reserved. 5.\" 6.\" Redistribution and use in source and binary forms, with or without 7.\" modification, are permitted provided that the following conditions 8.\" are met: 9.\" 1. Redistributions of source code must retain the above copyright 10.\" notice, this list of conditions and the following disclaimer. 11.\" 2. Redistributions in binary form must reproduce the above copyright 12.\" notice, this list of conditions and the following disclaimer in the 13.\" documentation and/or other materials provided with the distribution. 14.\" 3. Neither the name of the University nor the names of its contributors 15.\" may be used to endorse or promote products derived from this software 16.\" without specific prior written permission. 17.\" 18.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 19.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 22.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28.\" SUCH DAMAGE. 29.\" 30.\" @(#)unvis.3 8.2 (Berkeley) 12/11/93 31.\" 32.Dd April 9, 2018 33.Dt UNVIS 3 34.Os 35.Sh NAME 36.Nm unvis , 37.Nm strunvis , 38.Nm strnunvis , 39.Nm strunvisx , 40.Nm strnunvisx 41.Nd decode a visual representation of characters 42.Sh LIBRARY 43.Lb libc 44.Sh SYNOPSIS 45.In vis.h 46.Ft int 47.Fn unvis "char *cp" "int c" "int *astate" "int flag" 48.Ft int 49.Fn strunvis "char *dst" "const char *src" 50.Ft int 51.Fn strnunvis "char *dst" "size_t dlen" "const char *src" 52.Ft int 53.Fn strunvisx "char *dst" "const char *src" "int flag" 54.Ft int 55.Fn strnunvisx "char *dst" "size_t dlen" "const char *src" "int flag" 56.Sh DESCRIPTION 57The 58.Fn unvis , 59.Fn strunvis 60and 61.Fn strunvisx 62functions 63are used to decode a visual representation of characters, as produced 64by the 65.Xr vis 3 66function, back into 67the original form. 68.Pp 69The 70.Fn unvis 71function is called with successive characters in 72.Ar c 73until a valid sequence is recognized, at which time the decoded 74character is available at the character pointed to by 75.Ar cp . 76.Pp 77The 78.Fn strunvis 79function decodes the characters pointed to by 80.Ar src 81into the buffer pointed to by 82.Ar dst . 83The 84.Fn strunvis 85function simply copies 86.Ar src 87to 88.Ar dst , 89decoding any escape sequences along the way, 90and returns the number of characters placed into 91.Ar dst , 92or \-1 if an 93invalid escape sequence was detected. 94The size of 95.Ar dst 96should be equal to the size of 97.Ar src 98(that is, no expansion takes place during decoding). 99.Pp 100The 101.Fn strunvisx 102function does the same as the 103.Fn strunvis 104function, 105but it allows you to add a flag that specifies the style the string 106.Ar src 107is encoded with. 108Currently, the supported flags are: 109.Dv VIS_HTTPSTYLE 110and 111.Dv VIS_MIMESTYLE . 112.Pp 113The 114.Fn unvis 115function implements a state machine that can be used to decode an 116arbitrary stream of bytes. 117All state associated with the bytes being decoded is stored outside the 118.Fn unvis 119function (that is, a pointer to the state is passed in), so 120calls decoding different streams can be freely intermixed. 121To start decoding a stream of bytes, first initialize an integer to zero. 122Call 123.Fn unvis 124with each successive byte, along with a pointer 125to this integer, and a pointer to a destination character. 126The 127.Fn unvis 128function has several return codes that must be handled properly. 129They are: 130.Bl -tag -width ".Dv UNVIS_VALIDPUSH" 131.It Li \&0 No (zero) 132Another character is necessary; nothing has been recognized yet. 133.It Dv UNVIS_VALID 134A valid character has been recognized and is available at the location 135pointed to by 136.Fa cp . 137.It Dv UNVIS_VALIDPUSH 138A valid character has been recognized and is available at the location 139pointed to by 140.Fa cp ; 141however, the character currently passed in should be passed in again. 142.It Dv UNVIS_NOCHAR 143A valid sequence was detected, but no character was produced. 144This return code is necessary to indicate a logical break between characters. 145.It Dv UNVIS_SYNBAD 146An invalid escape sequence was detected, or the decoder is in an unknown state. 147The decoder is placed into the starting state. 148.El 149.Pp 150When all bytes in the stream have been processed, call 151.Fn unvis 152one more time with flag set to 153.Dv UNVIS_END 154to extract any remaining character (the character passed in is ignored). 155.Pp 156The 157.Fa flag 158argument is also used to specify the encoding style of the source. 159If set to 160.Dv VIS_HTTPSTYLE 161or 162.Dv VIS_HTTP1808 , 163.Fn unvis 164will decode URI strings as specified in RFC 1808. 165If set to 166.Dv VIS_HTTP1866 , 167.Fn unvis 168will decode entity references and numeric character references 169as specified in RFC 1866. 170If set to 171.Dv VIS_MIMESTYLE , 172.Fn unvis 173will decode MIME Quoted-Printable strings as specified in RFC 2045. 174If set to 175.Dv VIS_NOESCAPE , 176.Fn unvis 177will not decode 178.Ql \e 179quoted characters. 180.Pp 181The following code fragment illustrates a proper use of 182.Fn unvis . 183.Bd -literal -offset indent 184int state = 0; 185char out; 186 187while ((ch = getchar()) != EOF) { 188again: 189 switch(unvis(&out, ch, &state, 0)) { 190 case 0: 191 case UNVIS_NOCHAR: 192 break; 193 case UNVIS_VALID: 194 (void)putchar(out); 195 break; 196 case UNVIS_VALIDPUSH: 197 (void)putchar(out); 198 goto again; 199 case UNVIS_SYNBAD: 200 errx(EXIT_FAILURE, "Bad character sequence!"); 201 } 202} 203if (unvis(&out, '\e0', &state, UNVIS_END) == UNVIS_VALID) 204 (void)putchar(out); 205.Ed 206.Sh ERRORS 207The functions 208.Fn strunvis , 209.Fn strnunvis , 210.Fn strunvisx , 211and 212.Fn strnunvisx 213will return \-1 on error and set 214.Va errno 215to: 216.Bl -tag -width Er 217.It Bq Er EINVAL 218An invalid escape sequence was detected, or the decoder is in an unknown state. 219.El 220.Pp 221In addition the functions 222.Fn strnunvis 223and 224.Fn strnunvisx 225will can also set 226.Va errno 227on error to: 228.Bl -tag -width Er 229.It Bq Er ENOSPC 230Not enough space to perform the conversion. 231.El 232.Sh SEE ALSO 233.Xr unvis 1 , 234.Xr vis 1 , 235.Xr vis 3 236.Rs 237.%A R. Fielding 238.%T Relative Uniform Resource Locators 239.%O RFC1808 240.Re 241.Sh HISTORY 242The 243.Fn unvis 244function 245first appeared in 246.Bx 4.4 . 247The 248.Fn strnunvis 249and 250.Fn strnunvisx 251functions appeared in 252.Nx 6.0 . 253.Sh BUGS 254The names 255.Dv VIS_HTTP1808 256and 257.Dv VIS_HTTP1866 258are wrong. 259Percent-encoding was defined in RFC 1738, the original RFC for URL. 260RFC 1866 defines HTML 2.0, an application of SGML, from which it 261inherits concepts of numeric character references and entity 262references. 263