1 /* 2 3 4 W3C Sample Code Library libwww URI Management 5 6 7 ! 8 URI Management 9 ! 10 */ 11 12 /* 13 ** (c) COPYRIGHT MIT 1995. 14 ** Please first read the full copyright statement in the file COPYRIGH. 15 */ 16 17 /* 18 19 This module contains code to parse URIs and various related things such as: 20 21 o 22 Parse a URI for tokens 23 o 24 Canonicalization of URIs 25 o 26 Search a URI for illegal characters in order to prevent 27 security holes 28 29 30 This module is implemented by HTParse.c, and it is 31 a part of the W3C Sample Code 32 Library. 33 */ 34 35 #ifndef HTPARSE_H 36 #define HTPARSE_H 37 38 #include "HTEscape.h" 39 40 #ifdef __cplusplus 41 extern "C" { 42 #endif 43 44 /* 45 . 46 Parsing URIs 47 . 48 49 These functions can be used to get information in a URI. 50 ( 51 Parse a URI relative to another URI 52 ) 53 54 This returns those parts of a name which are given (and requested) substituting 55 bits from the related name where necessary. The aName argument 56 is the (possibly relative) URI to be parsed, the relatedName 57 is the URI which the aName is to be parsed relative to. Passing 58 an empty string means that the aName is an absolute URI. The 59 following are flag bits which may be OR'ed together to form a number to give 60 the 'wanted' argument to HTParse. As an example we have the URL: 61 "/TheProject.html#news" 62 */ 63 64 #define PARSE_ACCESS 16 /* Access scheme, e.g. "HTTP" */ 65 #define PARSE_HOST 8 /* Host name, e.g. "www.w3.org" */ 66 #define PARSE_PATH 4 /* URL Path, e.g. "pub/WWW/TheProject.html" */ 67 68 #define PARSE_VIEW 2 /* Fragment identifier, e.g. "news" */ 69 #define PARSE_FRAGMENT PARSE_VIEW 70 #define PARSE_ANCHOR PARSE_VIEW 71 72 #define PARSE_PUNCTUATION 1 /* Include delimiters, e.g, "/" and ":" */ 73 #define PARSE_ALL 31 74 75 /* 76 77 where the format of a URI is as follows: "ACCESS :// HOST / PATH # 78 ANCHOR" 79 80 PUNCTUATION means any delimiter like '/', ':', '#' between the 81 tokens above. The string returned by the function must be freed by the caller. 82 */ 83 84 extern char * HTParse (const char * aName, const char * relatedName, 85 int wanted); 86 87 /* 88 ( 89 Create a Relative (Partial) URI 90 ) 91 92 This function creates and returns a string which gives an expression of one 93 address as related to another. Where there is no relation, an absolute address 94 is retured. 95 96 97 On entry, 98 99 Both names must be absolute, fully qualified names of nodes (no anchor bits) 100 101 On exit, 102 103 The return result points to a newly allocated name which, if parsed by HTParse 104 relative to relatedName, will yield aName. The caller is responsible for 105 freeing the resulting name later. 106 107 */ 108 109 extern char * HTRelative (const char * aName, const char *relatedName); 110 111 /* 112 . 113 Is a URL Relative or Absolute? 114 . 115 116 Search the URL and determine whether it is a relative or absolute URL. We 117 check to see if there is a ":" before any "/", "?", and "#". If this is the 118 case then we say it is absolute. Otherwise we say it is relative. 119 */ 120 121 extern BOOL HTURL_isAbsolute (const char * url); 122 123 /* 124 . 125 URL Canonicalization 126 . 127 128 Canonicalization of URIs is a difficult job, but it saves a lot of down loads 129 and double entries in the cache if we do a good job. A URI is allowed to 130 contain the seqeunce xxx/../ which may be replaced by "" , and the seqeunce 131 "/./" which may be replaced by "/". Simplification helps us recognize duplicate 132 URIs. Thus, the following transformations are done: 133 134 o 135 /etc/junk/../fred becomes /etc/fred 136 o 137 /etc/junk/./fred becomes /etc/junk/fred 138 139 140 but we should NOT change 141 142 o 143 http://fred.xxx.edu/../.. or 144 o 145 ../../albert.html 146 147 148 In the same manner, the following prefixed are preserved: 149 150 o 151 ./<etc> 152 o 153 //<etc> 154 155 156 In order to avoid empty URIs the following URIs become: 157 158 o 159 /fred/.. becomes /fred/.. 160 o 161 /fred/././.. becomes /fred/.. 162 o 163 /fred/.././junk/.././ becomes /fred/.. 164 165 166 If more than one set of `://' is found (several proxies in cascade) then 167 only the part after the last `://' is simplified. 168 */ 169 170 extern char *HTSimplify (char **filename); 171 172 /* 173 . 174 Prevent Security Holes 175 . 176 177 In many telnet like protocols, it can be very dangerous to allow a full ASCII 178 character set to be in a URI. Therefore we have to strip them out. 179 HTCleanTelnetString() makes sure that the given string doesn't 180 contain characters that could cause security holes, such as newlines in ftp, 181 gopher, news or telnet URLs; more specifically: allows everything between 182 hexadesimal ASCII 20-7E, and also A0-FE, inclusive. 183 184 185 str 186 187 the string that is *modified* if necessary. The string will be truncated 188 at the first illegal character that is encountered. 189 190 returns 191 192 YES, if the string was modified. NO, otherwise. 193 194 */ 195 196 extern BOOL HTCleanTelnetString (char * str); 197 198 /* 199 */ 200 201 #ifdef __cplusplus 202 } 203 #endif 204 205 #endif /* HTPARSE_H */ 206 207 /* 208 209 210 211 @(#) $Id$ 212 213 */ 214