1 #ifndef URI_UTIL_H 2 #define URI_UTIL_H 3 4 #include "net.h" 5 6 /* 7 * Generic URI parsing. 8 */ 9 10 enum uri_parse_flags { 11 /* Scheme part 'scheme:' is already parsed externally. */ 12 URI_PARSE_SCHEME_EXTERNAL = BIT(0), 13 /* Allow '#fragment' part in URI */ 14 URI_PARSE_ALLOW_FRAGMENT_PART = BIT(1), 15 }; 16 17 struct uri_host { 18 const char *name; 19 struct ip_addr ip; 20 }; 21 22 struct uri_authority { 23 /* encoded userinfo part; e.g. "user:pass" */ 24 const char *enc_userinfo; 25 26 struct uri_host host; 27 in_port_t port; /* 0 means no port specified */ 28 }; 29 30 struct uri_parser { 31 pool_t pool; 32 const char *error; 33 34 const unsigned char *begin, *cur, *end; 35 36 string_t *tmpbuf; 37 38 bool allow_pct_nul:1; 39 }; 40 41 /* parse one instance of percent encoding. Returns 1 for success, 42 0 if none is preset at the current parser position, and -1 in 43 case of error. The decoded character is returned in ch_r upon 44 success */ 45 int uri_parse_pct_encoded(struct uri_parser *parser, 46 unsigned char *ch_r); 47 48 /* parse characters as long as these comply with the the 'unreserved' 49 syntax. Returns 1 if characters were found, 0 if none were found, 50 and -1 if there was an error */ 51 int uri_parse_unreserved(struct uri_parser *parser, string_t *part); 52 /* the same as uri_parse_unreserved(), but the allowed characters are 53 extended to 'unreserved / pct-encoded', meaning that percent encoding 54 is allowed */ 55 int uri_parse_unreserved_pct(struct uri_parser *parser, string_t *part); 56 57 /* decode percent-encoded data from the 'data' parameter, up until the 58 'until' parameter. If the latter is NULL, data is decoded up until the 59 '\0' character. The decoded data is allocated on the parser pool and 60 returned in decoded_r. Any errors are written to the parser object. */ 61 bool uri_data_decode(struct uri_parser *parser, const char *data, 62 const char *until, const char **decoded_r) ATTR_NULL(3); 63 64 /* cut the 'scheme ":"' part from the URI. The uri_p pointer is updated to 65 point just past the ":". Returns 0 on success and -1 on error. The 66 result is returned in the scheme_r parameter. This can be NULL to use 67 this function for merely checking the presence of a valid scheme. */ 68 int uri_cut_scheme(const char **uri_p, const char **scheme_r) 69 ATTR_NULL(2); 70 71 /* parse the URI 'scheme ":"' part. Returns 1 if successful, 0 if the first 72 character is not valid for a scheme, and -1 in case of error. The 73 result parameter scheme_r can be NULL to use this function for merely 74 checking the presence of a valid scheme. */ 75 int uri_parse_scheme(struct uri_parser *parser, const char **scheme_r) 76 ATTR_NULL(2); 77 78 /* parse the URI 'reg-name' syntax. Returns 1 if successful, 0 if the first 79 character is not valid for a host name, and -1 in case of error. The 80 result parameter reg_name_r can be NULL to use this function for merely 81 checking the presence of a valid host name. The result is allocated from 82 the data stack. 83 */ 84 int uri_parse_reg_name(struct uri_parser *parser, 85 const char **reg_name_r) ATTR_NULL(2); 86 /* parse the URI 'reg-name' part as an Internet host name, which is a 87 sequence of domain name labels separated by '.', as defined in 88 Section 3.5 of RFC 1034 and Section 2.1 of RFC 1123. Returns 1 if 89 successful, 0 if the first character is not valid for a host name, 90 and -1 in case of error. The result parameter host_name_r can be NULL 91 to use this function for merely checking the presence of a valid host 92 name. The result is allocated from the data stack. 93 */ 94 int uri_parse_host_name(struct uri_parser *parser, 95 const char **host_name_r) ATTR_NULL(2); 96 /* parse the URI 'host' syntax, which is either an IP address literal or 97 a an Internet host name, as defined in Section 3.5 of RFC 1034 and 98 Section 2.1 of RFC 1123. An IP address literal is always allowed. 99 Returns 1 if successful, 0 if the first character is not valid for a 100 host name, and -1 in case of error. The provided host struct is filled 101 in with the parsed data, all allocated from the parser pool. The host 102 parameter can be NULL to use this function for merely checking for 103 valid 'host' syntax. 104 */ 105 int uri_parse_host(struct uri_parser *parser, 106 struct uri_host *host) ATTR_NULL(2); 107 108 /* parse the URI 'authority' syntax. Returns 1 if successful, 0 if the 109 first character is not valid for the 'authority' syntax and -1 in case 110 of error. The provided uri_authority struct is filled in with the parsed 111 data, all allocated from the parser pool. The auth parameter can be 112 NULL to use this function for merely checking for valid 'authority' 113 syntax. 114 */ 115 int uri_parse_authority(struct uri_parser *parser, 116 struct uri_authority *auth) ATTR_NULL(2); 117 /* identical to uri_parse_authority(), except that this function parses 118 '"//" authority', rather than 'authority'. 119 */ 120 int uri_parse_slashslash_authority(struct uri_parser *parser, 121 struct uri_authority *auth) ATTR_NULL(2); 122 /* identical to uri_parse_authority(), except that this function parses 123 the registered name ('reg-name' syntax) as an Internet host name, as 124 defined in Section 3.5 of RFC 1034 and Section 2.1 of RFC 1123. 125 */ 126 int uri_parse_host_authority(struct uri_parser *parser, 127 struct uri_authority *auth) ATTR_NULL(2); 128 /* identical to uri_parse_slashslash_authority(), except that this 129 function parses the registered name ('reg-name' syntax) as an Internet 130 host name, as defined in Section 3.5 of RFC 1034 and Section 2.1 of 131 RFC 1123. 132 */ 133 int uri_parse_slashslash_host_authority(struct uri_parser *parser, 134 struct uri_authority *auth) ATTR_NULL(2); 135 136 /* parse the URI 'segment' syntax. Returns 1 if successful, 0 if the first 137 character is not valid for the 'segment' syntax and -1 in case of 138 error. The result is allocated from the parser pool. Percent encoding is 139 not decoded in the result. The result parameter can be NULL to use this 140 function for merely checking for valid 'segment' syntax. 141 */ 142 int uri_parse_path_segment(struct uri_parser *parser, 143 const char **segment_r) ATTR_NULL(2); 144 /* parse the URI 'path' syntax. This also resolves '..' and '.' segments in 145 the path. If the path is relative, the relative_r parameter indicates 146 how many segments the base path must be moved towards root (as caused by 147 leading '..' segments). Returns 1 if successful, 0 if the first character 148 is not valid for the 'segment' syntax and -1 in case of error. The result 149 is a NULL-terminated string list allocated from the parser pool. Percent 150 encoding is not decoded in the result. The result parameter can be NULL 151 to use this function for merely checking for valid 'path' syntax. 152 */ 153 int uri_parse_path(struct uri_parser *parser, int *relative_r, 154 const char *const **path_r) ATTR_NULL(2,3); 155 156 /* parse the URI 'query' syntax. Returns 1 if successful, 0 if the first 157 character is not valid for the 'query' syntax and -1 in case of 158 error. The result is allocated from the parser pool. Percent encoding is 159 not decoded in the result. The result parameter can be NULL to use this 160 function for merely checking for valid 'query' syntax. 161 */ 162 int uri_parse_query(struct uri_parser *parser, 163 const char **query_r) ATTR_NULL(2); 164 /* parse the URI 'fragment' syntax. Returns 1 if successful, 0 if the first 165 character is not valid for the 'fragment' syntax and -1 in case of 166 error. The result is allocated from the parser pool. Percent encoding is 167 not decoded in the result. The result parameter can be NULL to use this 168 function for merely checking for valid 'fragment' syntax. 169 */ 170 int uri_parse_fragment(struct uri_parser *parser, 171 const char **fragment_r) ATTR_NULL(2); 172 173 /* initialize the URI parser with the provided data */ 174 void uri_parser_init_data(struct uri_parser *parser, 175 pool_t pool, const unsigned char *data, size_t size); 176 /* initialize the URI parser with the provided '\0'-terminated string */ 177 void uri_parser_init(struct uri_parser *parser, 178 pool_t pool, const char *uri); 179 180 /* returns the temporary buffer associated with this parser. Can be used 181 for higher-level parsing activities. */ 182 string_t *uri_parser_get_tmpbuf(struct uri_parser *parser, 183 size_t size); 184 185 /* Parse a generic (RFC3986) absolute URI for validity. 186 Returns 0 if valid and -1 otherwise. Note that some URI formats like 187 "sip", "aix" and "aaa" violate RFC3986 and will currently fail with 188 this function. 189 */ 190 int uri_parse_absolute_generic(struct uri_parser *parser, 191 enum uri_parse_flags flags); 192 193 /* 194 * Generic URI manipulation 195 */ 196 197 /* copy uri_host struct from src to dest and allocate it on pool */ 198 void uri_host_copy(pool_t pool, struct uri_host *dest, 199 const struct uri_host *src); 200 201 /* 202 * Generic URI validation 203 */ 204 205 /* Check whether the provided data is a valid absolute RFC3986 URI. 206 Returns 0 if valid and -1 otherwise. */ 207 int uri_check_data(const unsigned char *data, size_t size, 208 enum uri_parse_flags flags, const char **error_r); 209 /* Check whether the provided string is a valid absolute RFC3986 URI. 210 Returns 0 if valid and -1 otherwise. */ 211 int uri_check(const char *uri, enum uri_parse_flags, 212 const char **error_r); 213 214 /* 215 * Generic URI construction 216 */ 217 218 /* encodes the '\0'-terminated data using the percent encoding. The 219 esc_table is a 256 byte lookup table. If none of the esc_mask bits are 220 set at the character's position in the esc_table, a character needs 221 to be encoded. Also, when esc_extra contains a character, it needs to 222 be encoded. All other characters are copied verbatim to the out buffer. 223 */ 224 void uri_data_encode(string_t *out, 225 const unsigned char esc_table[256], 226 unsigned char esc_mask, const char *esc_extra, 227 const char *data) ATTR_NULL(4); 228 229 /* append the provided scheme to the out buffer */ 230 void uri_append_scheme(string_t *out, const char *scheme); 231 232 /* append partial user data (i.e. some part of what comes before '@') to 233 the out buffer. No '@' is produced. Characters are percent-encoded when 234 necessary. Characters in esc are always percent-encoded, even when these 235 are valid 'userinfo' characters. */ 236 void uri_append_user_data(string_t *out, 237 const char *esc, const char *data) ATTR_NULL(2); 238 /* append userinfo and '@' to the out buffer. Characters in userinfo are 239 percent-encoded when necessary.*/ 240 void uri_append_userinfo(string_t *out, const char *userinfo); 241 242 /* append the host name to the out buffer. Characters are percent-encoded 243 when necessary.*/ 244 void uri_append_host_name(string_t *out, const char *name); 245 /* append the host IP address to the out buffer. */ 246 void uri_append_host_ip(string_t *out, const struct ip_addr *host_ip); 247 /* encode the URI host struct to the out buffer. */ 248 void uri_append_host(string_t *out, const struct uri_host *host); 249 /* append the port to the out buffer. */ 250 void uri_append_port(string_t *out, in_port_t port); 251 252 /* append partial path segment data to the out buffer. No '/' is produced. 253 Characters are percent-encoded when necessary. Characters in esc are 254 always percent-encoded, even when these are valid 'segment' characters. 255 */ 256 void uri_append_path_segment_data(string_t *out, 257 const char *esc, const char *data) ATTR_NULL(2); 258 /* append a full path segment to the out buffer. A leading '/' is 259 produced. Characters are percent-encoded when necessary. */ 260 void uri_append_path_segment(string_t *out, const char *segment); 261 /* append partial path data to the out buffer. The data may include '/', 262 which is not encoded. Characters are percent-encoded when necessary. 263 Characters in esc are always percent-encoded, even when these are 264 valid 'path' characters.*/ 265 void uri_append_path_data(string_t *out, 266 const char *esc, const char *data) ATTR_NULL(2); 267 /* append a full path to the out buffer. A leading '/' is produced. The 268 data may include more '/', which is not encoded. Characters are 269 percent-encoded when necessary. 270 */ 271 void uri_append_path(string_t *out, const char *path); 272 273 /* append partial query data to the out buffer. No leading '?' is 274 produced. Characters are percent-encoded when necessary. Characters 275 in esc are always percent-encoded, even when these are valid 'query' 276 characters.*/ 277 void uri_append_query_data(string_t *out, 278 const char *esc, const char *data) ATTR_NULL(2); 279 /* append a full URI query part to the out buffer. A leading '?' is 280 produced. Characters are percent-encoded when necessary. */ 281 void uri_append_query(string_t *out, const char *query); 282 283 /* append partial fragment data to the out buffer. No leading '#' is 284 produced. Characters are percent-encoded when necessary. Characters 285 in esc are always percent-encoded, even when these are valid 286 'fragment' characters.*/ 287 void uri_append_fragment_data(string_t *out, 288 const char *esc, const char *data) ATTR_NULL(2); 289 /* append a full URI fragment part to the out buffer. A leading '#' is 290 produced. Characters are percent-encoded when necessary. */ 291 void uri_append_fragment(string_t *out, const char *fragment); 292 293 /* append data to the out buffer and escape any reserved character */ 294 void uri_append_unreserved(string_t *out, const char *data); 295 /* append data to the out buffer and escape any reserved character except '/' */ 296 void uri_append_unreserved_path(string_t *out, const char *data); 297 298 #endif 299