1 /*
2 
3 
4   					W3C Sample Code Library libwww URI Management
5 
6 
7 !
8   URI Management
9 !
10 */
11 
12 /*
13 **	(c) COPYRIGHT MIT 1995.
14 **	Please first read the full copyright statement in the file COPYRIGH.
15 */
16 
17 /*
18 
19 This module contains code to parse URIs and various related things such as:
20 
21 	   o
22 	     Parse a URI for tokens
23   o
24 	     Canonicalization of URIs
25   o
26 	     Search a URI for illegal characters in order to prevent
27     security holes
28 
29 
30 This module is implemented by HTParse.c, and it is
31 a part of the  W3C Sample Code
32 Library.
33 */
34 
35 #ifndef HTPARSE_H
36 #define HTPARSE_H
37 
38 #include "HTEscape.h"
39 
40 #ifdef __cplusplus
41 extern "C" {
42 #endif
43 
44 /*
45 .
46   Parsing URIs
47 .
48 
49 These functions can be used to get information in a URI.
50 (
51   Parse a URI relative to another URI
52 )
53 
54 This returns those parts of a name which are given (and requested) substituting
55 bits from the related name where necessary. The aName argument
56 is the (possibly relative) URI to be parsed, the relatedName
57 is the URI which the aName is to be parsed relative to. Passing
58 an empty string means that the aName is an absolute URI. The
59 following are flag bits which may be OR'ed together to form a number to give
60 the 'wanted' argument to HTParse. As an example we have the URL:
61 "/TheProject.html#news"
62 */
63 
64 #define PARSE_ACCESS		16	/* Access scheme, e.g. "HTTP" */
65 #define PARSE_HOST		 8	/* Host name, e.g. "www.w3.org" */
66 #define PARSE_PATH		 4	/* URL Path, e.g. "pub/WWW/TheProject.html" */
67 
68 #define PARSE_VIEW               2      /* Fragment identifier, e.g. "news" */
69 #define PARSE_FRAGMENT           PARSE_VIEW
70 #define PARSE_ANCHOR		 PARSE_VIEW
71 
72 #define PARSE_PUNCTUATION	 1	/* Include delimiters, e.g, "/" and ":" */
73 #define PARSE_ALL		31
74 
75 /*
76 
77 where the format of a URI is as follows: "ACCESS :// HOST / PATH #
78 ANCHOR"
79 
80 PUNCTUATION means any delimiter like '/', ':', '#' between the
81 tokens above. The string returned by the function must be freed by the caller.
82 */
83 
84 extern char * HTParse  (const char * aName, const char * relatedName,
85 			int wanted);
86 
87 /*
88 (
89   Create a Relative (Partial) URI
90 )
91 
92 This function creates and returns a string which gives an expression of one
93 address as related to another. Where there is no relation, an absolute address
94 is retured.
95 
96 
97     On entry,
98 
99     Both names must be absolute, fully qualified names of nodes (no anchor bits)
100 
101     On exit,
102 
103     The return result points to a newly allocated name which, if parsed by HTParse
104     relative to relatedName, will yield aName. The caller is responsible for
105     freeing the resulting name later.
106 
107 */
108 
109 extern char * HTRelative (const char * aName, const char *relatedName);
110 
111 /*
112 .
113   Is a URL Relative or Absolute?
114 .
115 
116 Search the URL and determine whether it is a relative or absolute URL. We
117 check to see if there is a ":" before any "/", "?", and "#". If this is the
118 case then we say it is absolute. Otherwise we say it is relative.
119 */
120 
121 extern BOOL HTURL_isAbsolute (const char * url);
122 
123 /*
124 .
125   URL Canonicalization
126 .
127 
128 Canonicalization of URIs is a difficult job, but it saves a lot of down loads
129 and double entries in the cache if we do a good job. A URI is allowed to
130 contain the seqeunce xxx/../ which may be replaced by "" , and the seqeunce
131 "/./" which may be replaced by "/". Simplification helps us recognize duplicate
132 URIs. Thus, the following transformations are done:
133 
134 	   o
135 	     /etc/junk/../fred becomes /etc/fred
136 	   o
137 	     /etc/junk/./fred becomes /etc/junk/fred
138 
139 
140 but we should NOT change
141 
142 	   o
143 	     http://fred.xxx.edu/../.. or
144 	   o
145 	     ../../albert.html
146 
147 
148 In the same manner, the following prefixed are preserved:
149 
150 	   o
151 	     ./<etc>
152 	   o
153 	     //<etc>
154 
155 
156 In order to avoid empty URIs the following URIs become:
157 
158 	   o
159 	     /fred/.. becomes /fred/..
160 	   o
161 	     /fred/././.. becomes /fred/..
162 	   o
163 	     /fred/.././junk/.././ becomes /fred/..
164 
165 
166 If more than one set of `://' is found (several proxies in cascade) then
167 only the part after the last `://' is simplified.
168 */
169 
170 extern char *HTSimplify (char **filename);
171 
172 /*
173 .
174   Prevent Security Holes
175 .
176 
177 In many telnet like protocols, it can be very dangerous to allow a full ASCII
178 character set to be in a URI. Therefore we have to strip them out.
179 HTCleanTelnetString() makes sure that the given string doesn't
180 contain characters that could cause security holes, such as newlines in ftp,
181 gopher, news or telnet URLs; more specifically: allows everything between
182 hexadesimal ASCII 20-7E, and also A0-FE, inclusive.
183 
184 
185     str
186 
187     the string that is *modified* if necessary. The string will be truncated
188     at the first illegal character that is encountered.
189 
190     returns
191 
192     YES, if the string was modified. NO, otherwise.
193 
194 */
195 
196 extern BOOL HTCleanTelnetString (char * str);
197 
198 /*
199 */
200 
201 #ifdef __cplusplus
202 }
203 #endif
204 
205 #endif	/* HTPARSE_H */
206 
207 /*
208 
209 
210 
211   @(#) $Id$
212 
213 */
214