1 /*
2  * $LynxId: HTParse.h,v 1.26 2021/07/05 20:56:50 tom Exp $
3  *				HTParse:  URL parsing in the WWW Library
4  *				HTPARSE
5  *
6  *  This module of the WWW library contains code to parse URLs and various
7  *  related things.
8  *  Implemented by HTParse.c .
9  */
10 #ifndef HTPARSE_H
11 #define HTPARSE_H
12 
13 #ifndef HTUTILS_H
14 #include <HTUtils.h>
15 #endif
16 
17 #ifdef __cplusplus
18 extern "C" {
19 #endif
20 
21 #define RFC_3986_UNRESERVED(c) (isalnum(UCH(c)) || strchr("-._~", UCH(c)) != 0)
22 #define RFC_3986_GEN_DELIMS(c) ((c) != 0 && strchr(":/?#[]@", UCH(c)) != 0)
23 #define RFC_3986_SUB_DELIMS(c) ((c) != 0 && strchr("!$&'()*+,;=", UCH(c)) != 0)
24 
25 /*
26  *  The following are flag bits which may be ORed together to form
27  *  a number to give the 'wanted' argument to HTParse.
28  */
29 #define PARSE_ACCESS            16
30 #define PARSE_HOST               8
31 #define PARSE_PATH               4
32 #define PARSE_ANCHOR             2
33 #define PARSE_PUNCTUATION        1
34 #define PARSE_ALL               31
35 #define PARSE_ALL_WITHOUT_ANCHOR  (PARSE_ALL ^ PARSE_ANCHOR)
36 /*
37  *  Additional flag bits for more details on components already
38  *  covered by the above.  The PARSE_PATH above doesn't really
39  *  strictly refer to the path component in the sense of the URI
40  *  specs only, but rather to that combined with a possible query
41  *  component. - kw
42  */
43 #define PARSE_STRICTPATH        32
44 #define PARSE_QUERY             64
45 /*
46  *  The following are valid mask values.  The terms are the BNF names
47  *  in the URL document.
48  */
49 #define URL_XALPHAS     UCH(1)
50 #define URL_XPALPHAS    UCH(2)
51 #define URL_PATH        UCH(4)
52 
53 #ifdef USE_IDN2
54     typedef enum {
55 	LYidna2003 = 1,
56 	LYidna2008,
57 	LYidnaTR46,
58 	LYidnaCompat
59     } HTIdnaModes;
60 
61     extern int LYidnaMode;
62 #endif
63 
64 /*	Strip white space off a string.				HTStrip()
65  *	-------------------------------
66  *
67  * On exit,
68  *	Return value points to first non-white character, or to 0 if none.
69  *	All trailing white space is OVERWRITTEN with zero.
70  */
71     extern char *HTStrip(char *s);
72 
73 /*
74  *	Parse a port number
75  *	-------------------
76  *
77  * On entry,
78  *	host            A pointer to hostname possibly followed by port
79  *
80  * On exit,
81  *	returns         A pointer to the ":" before the port
82  *	sets            the port number via the pointer portp.
83  */
84     extern char *HTParsePort(char *host, int *portp);
85 
86 /*	Parse a Name relative to another name.			HTParse()
87  *	--------------------------------------
88  *
89  *	This returns those parts of a name which are given (and requested)
90  *	substituting bits from the related name where necessary.
91  *
92  * On entry,
93  *	aName		A filename given
94  *      relatedName     A name relative to which aName is to be parsed
95  *      wanted          A mask for the bits which are wanted.
96  *
97  * On exit,
98  *	returns		A pointer to a malloc'd string which MUST BE FREED
99  */
100     extern char *HTParse(const char *aName,
101 			 const char *relatedName,
102 			 int wanted);
103 
104 /*	HTParseAnchor(), fast HTParse() specialization
105  *	----------------------------------------------
106  *
107  * On exit,
108  *	returns		A pointer within input string (probably to its end '\0')
109  */
110     extern const char *HTParseAnchor(const char *aName);
111 
112 /*	Simplify a filename.				HTSimplify()
113  *	--------------------
114  *
115  *  A unix-style file is allowed to contain the sequence xxx/../ which may
116  *  be replaced by "" , and the sequence "/./" which may be replaced by "/".
117  *  Simplification helps us recognize duplicate filenames.
118  */
119     extern void HTSimplify(char *filename, BOOL absolute);
120 
121 /*	Make Relative Name.					HTRelative()
122  *	-------------------
123  *
124  * This function creates and returns a string which gives an expression of
125  * one address as related to another.  Where there is no relation, an absolute
126  * address is returned.
127  *
128  *  On entry,
129  *	Both names must be absolute, fully qualified names of nodes
130  *	(no anchor bits)
131  *
132  *  On exit,
133  *	The return result points to a newly allocated name which, if
134  *	parsed by HTParse relative to relatedName, will yield aName.
135  *	The caller is responsible for freeing the resulting name later.
136  *
137  */
138     extern char *HTRelative(const char *aName,
139 			    const char *relatedName);
140 
141 /*		Escape undesirable characters using %		HTEscape()
142  *		-------------------------------------
143  *
144  *	This function takes a pointer to a string in which
145  *	some characters may be unacceptable are unescaped.
146  *	It returns a string which has these characters
147  *	represented by a '%' character followed by two hex digits.
148  *
149  *	Unlike HTUnEscape(), this routine returns a malloc'd string.
150  */
151     extern char *HTEscape(const char *str,
152 			  unsigned mask);
153 
154 /*		Escape unsafe characters using %		HTEscapeUnsafe()
155  *		--------------------------------
156  *
157  *	This function takes a pointer to a string in which
158  *	some characters may be that may be unsafe are unescaped.
159  *	It returns a string which has these characters
160  *	represented by a '%' character followed by two hex digits.
161  *
162  *	Unlike HTUnEscape(), this routine returns a malloc'd string.
163  */
164     extern char *HTEscapeUnsafe(const char *str);
165 
166 /*	Escape undesirable characters using % but space to +.	HTEscapeSP()
167  *	-----------------------------------------------------
168  *
169  *	This function takes a pointer to a string in which
170  *	some characters may be unacceptable are unescaped.
171  *	It returns a string which has these characters
172  *	represented by a '%' character followed by two hex digits,
173  *	except that spaces are converted to '+' instead of %2B.
174  *
175  *	Unlike HTUnEscape(), this routine returns a malloc'd string.
176  */
177     extern char *HTEscapeSP(const char *str,
178 			    unsigned mask);
179 
180 /*	Decode %xx escaped characters.				HTUnEscape()
181  *	------------------------------
182  *
183  *	This function takes a pointer to a string in which some
184  *	characters may have been encoded in %xy form, where xy is
185  *	the acsii hex code for character 16x+y.
186  *	The string is converted in place, as it will never grow.
187  */
188     extern char *HTUnEscape(char *str);
189 
190 /*	Decode some %xx escaped characters.		      HTUnEscapeSome()
191  *	-----------------------------------			Klaus Weide
192  *							    (kweide@tezcat.com)
193  *	This function takes a pointer to a string in which some
194  *	characters may have been encoded in %xy form, where xy is
195  *	the acsii hex code for character 16x+y, and a pointer to
196  *	a second string containing one or more characters which
197  *	should be unescaped if escaped in the first string.
198  *	The first string is converted in place, as it will never grow.
199  */
200     extern char *HTUnEscapeSome(char *str,
201 				const char *do_trans);
202 
203 /*
204  *  Turn a string which is not a RFC 822 token into a quoted-string. - KW
205  */
206     extern void HTMake822Word(char **str,
207 			      int quoted);
208 
209 #ifdef __cplusplus
210 }
211 #endif
212 #endif				/* HTPARSE_H */
213