1 #ifndef URI_UTIL_H
2 #define URI_UTIL_H
3 
4 #include "net.h"
5 
6 /*
7  * Generic URI parsing.
8  */
9 
10 enum uri_parse_flags {
11 	/* Scheme part 'scheme:' is already parsed externally. */
12 	URI_PARSE_SCHEME_EXTERNAL = BIT(0),
13 	/* Allow '#fragment' part in URI */
14 	URI_PARSE_ALLOW_FRAGMENT_PART = BIT(1),
15 };
16 
17 struct uri_host {
18 	const char *name;
19 	struct ip_addr ip;
20 };
21 
22 struct uri_authority {
23 	/* encoded userinfo part; e.g. "user:pass" */
24 	const char *enc_userinfo;
25 
26 	struct uri_host host;
27 	in_port_t port; /* 0 means no port specified */
28 };
29 
30 struct uri_parser {
31 	pool_t pool;
32 	const char *error;
33 
34 	const unsigned char *begin, *cur, *end;
35 
36 	string_t *tmpbuf;
37 
38 	bool allow_pct_nul:1;
39 };
40 
41 /* parse one instance of percent encoding. Returns 1 for success,
42    0 if none is preset at the current parser position, and -1 in
43    case of error. The decoded character is returned in ch_r upon
44    success */
45 int uri_parse_pct_encoded(struct uri_parser *parser,
46 		      unsigned char *ch_r);
47 
48 /* parse characters as long as these comply with the the 'unreserved'
49    syntax. Returns 1 if characters were found, 0 if none were found,
50    and -1 if there was an error */
51 int uri_parse_unreserved(struct uri_parser *parser, string_t *part);
52 /* the same as uri_parse_unreserved(), but the allowed characters are
53    extended to 'unreserved / pct-encoded', meaning that percent encoding
54    is allowed */
55 int uri_parse_unreserved_pct(struct uri_parser *parser, string_t *part);
56 
57 /* decode percent-encoded data from the 'data' parameter, up until the
58    'until' parameter. If the latter is NULL, data is decoded up until the
59    '\0' character. The decoded data is allocated on the parser pool and
60    returned in decoded_r. Any errors are written to the parser object. */
61 bool uri_data_decode(struct uri_parser *parser, const char *data,
62 		     const char *until, const char **decoded_r) ATTR_NULL(3);
63 
64 /* cut the 'scheme ":"' part from the URI. The uri_p pointer is updated to
65    point just past the ":". Returns 0 on success and -1 on error. The
66    result is returned in the scheme_r parameter. This can be NULL to use
67    this function for merely checking the presence of a valid scheme. */
68 int uri_cut_scheme(const char **uri_p, const char **scheme_r)
69 	ATTR_NULL(2);
70 
71 /* parse the URI 'scheme ":"' part. Returns 1 if successful, 0 if the first
72    character is not valid for a scheme, and -1 in case of error. The
73    result parameter scheme_r can be NULL to use this function for merely
74    checking the presence of a valid scheme. */
75 int uri_parse_scheme(struct uri_parser *parser, const char **scheme_r)
76 	ATTR_NULL(2);
77 
78 /* parse the URI 'reg-name' syntax. Returns 1 if successful, 0 if the first
79    character is not valid for a host name, and -1 in case of error. The
80    result parameter reg_name_r can be NULL to use this function for merely
81    checking the presence of a valid host name. The result is allocated from
82    the data stack.
83  */
84 int uri_parse_reg_name(struct uri_parser *parser,
85 	const char **reg_name_r) ATTR_NULL(2);
86 /* parse the URI 'reg-name' part as an Internet host name, which is a
87    sequence of domain name labels separated by '.', as defined in
88    Section 3.5 of RFC 1034 and Section 2.1 of RFC 1123. Returns 1 if
89    successful, 0 if the first character is not valid for a host name,
90    and -1 in case of error. The result parameter host_name_r can be NULL
91    to use this function for merely checking the presence of a valid host
92    name. The result is allocated from the data stack.
93  */
94 int uri_parse_host_name(struct uri_parser *parser,
95 	const char **host_name_r) ATTR_NULL(2);
96 /* parse the URI 'host' syntax, which is either an IP address literal or
97    a an Internet host name, as defined in Section 3.5 of RFC 1034 and
98    Section 2.1 of RFC 1123. An IP address literal is always allowed.
99    Returns 1 if successful, 0 if the first character is not valid for a
100    host name, and -1 in case of error. The provided host struct is filled
101    in with the parsed data, all allocated from the parser pool. The host
102    parameter can be NULL to use this function for merely checking for
103    valid 'host' syntax.
104  */
105 int uri_parse_host(struct uri_parser *parser,
106 	struct uri_host *host) ATTR_NULL(2);
107 
108 /* parse the URI 'authority' syntax. Returns 1 if successful, 0 if the
109    first character is not valid for the 'authority' syntax and -1 in case
110    of error. The provided uri_authority struct is filled in with the parsed
111    data, all allocated from the parser pool. The auth parameter can be
112    NULL to use this function for merely checking for valid 'authority'
113    syntax.
114  */
115 int uri_parse_authority(struct uri_parser *parser,
116 	struct uri_authority *auth) ATTR_NULL(2);
117 /* identical to uri_parse_authority(), except that this function parses
118    '"//" authority', rather than 'authority'.
119  */
120 int uri_parse_slashslash_authority(struct uri_parser *parser,
121 	struct uri_authority *auth) ATTR_NULL(2);
122 /* identical to uri_parse_authority(), except that this function parses
123    the registered name ('reg-name' syntax) as an Internet host name, as
124    defined in Section 3.5 of RFC 1034 and Section 2.1 of RFC 1123.
125  */
126 int uri_parse_host_authority(struct uri_parser *parser,
127 	struct uri_authority *auth) ATTR_NULL(2);
128 /* identical to uri_parse_slashslash_authority(), except that this
129    function parses the registered name ('reg-name' syntax) as an Internet
130    host name, as defined in Section 3.5 of RFC 1034 and Section 2.1 of
131    RFC 1123.
132  */
133 int uri_parse_slashslash_host_authority(struct uri_parser *parser,
134 	struct uri_authority *auth) ATTR_NULL(2);
135 
136 /* parse the URI 'segment' syntax. Returns 1 if successful, 0 if the first
137    character is not valid for the 'segment' syntax and -1 in case of
138    error. The result is allocated from the parser pool. Percent encoding is
139    not decoded in the result. The result parameter can be NULL to use this
140    function for merely checking for valid 'segment' syntax.
141  */
142 int uri_parse_path_segment(struct uri_parser *parser,
143 	const char **segment_r) ATTR_NULL(2);
144 /* parse the URI 'path' syntax. This also resolves '..' and '.' segments in
145    the path. If the path is relative, the relative_r parameter indicates
146    how many segments the base path must be moved towards root (as caused by
147    leading '..' segments). Returns 1 if successful, 0 if the first character
148    is not valid for the 'segment' syntax and -1 in case of error. The result
149    is a NULL-terminated string list allocated from the parser pool. Percent
150    encoding is not decoded in the result. The result parameter can be NULL
151    to use this function for merely checking for valid 'path' syntax.
152  */
153 int uri_parse_path(struct uri_parser *parser, int *relative_r,
154 		   const char *const **path_r) ATTR_NULL(2,3);
155 
156 /* parse the URI 'query' syntax. Returns 1 if successful, 0 if the first
157    character is not valid for the 'query' syntax and -1 in case of
158    error. The result is allocated from the parser pool. Percent encoding is
159    not decoded in the result. The result parameter can be NULL to use this
160    function for merely checking for valid 'query' syntax.
161  */
162 int uri_parse_query(struct uri_parser *parser,
163 	const char **query_r) ATTR_NULL(2);
164 /* parse the URI 'fragment' syntax. Returns 1 if successful, 0 if the first
165    character is not valid for the 'fragment' syntax and -1 in case of
166    error. The result is allocated from the parser pool. Percent encoding is
167    not decoded in the result. The result parameter can be NULL to use this
168    function for merely checking for valid 'fragment' syntax.
169  */
170 int uri_parse_fragment(struct uri_parser *parser,
171 	const char **fragment_r) ATTR_NULL(2);
172 
173 /* initialize the URI parser with the provided data */
174 void uri_parser_init_data(struct uri_parser *parser,
175 	pool_t pool, const unsigned char *data, size_t size);
176 /* initialize the URI parser with the provided '\0'-terminated string */
177 void uri_parser_init(struct uri_parser *parser,
178 	pool_t pool, const char *uri);
179 
180 /* returns the temporary buffer associated with this parser. Can be used
181    for higher-level parsing activities. */
182 string_t *uri_parser_get_tmpbuf(struct uri_parser *parser,
183 	size_t size);
184 
185 /* Parse a generic (RFC3986) absolute URI for validity.
186    Returns 0 if valid and -1 otherwise. Note that some URI formats like
187    "sip", "aix" and "aaa" violate RFC3986 and will currently fail with
188    this function.
189  */
190 int uri_parse_absolute_generic(struct uri_parser *parser,
191 	enum uri_parse_flags flags);
192 
193 /*
194  * Generic URI manipulation
195  */
196 
197 /* copy uri_host struct from src to dest and allocate it on pool */
198 void uri_host_copy(pool_t pool, struct uri_host *dest,
199 	const struct uri_host *src);
200 
201 /*
202  * Generic URI validation
203  */
204 
205 /* Check whether the provided data is a valid absolute RFC3986 URI.
206    Returns 0 if valid and -1 otherwise. */
207 int uri_check_data(const unsigned char *data, size_t size,
208 	enum uri_parse_flags flags, const char **error_r);
209 /* Check whether the provided string is a valid absolute RFC3986 URI.
210    Returns 0 if valid and -1 otherwise. */
211 int uri_check(const char *uri, enum uri_parse_flags,
212 	const char **error_r);
213 
214 /*
215  * Generic URI construction
216  */
217 
218 /* encodes the '\0'-terminated data using the percent encoding. The
219    esc_table is a 256 byte lookup table. If none of the esc_mask bits are
220    set at the character's position in the esc_table, a character needs
221    to be encoded. Also, when esc_extra contains a character, it needs to
222    be encoded. All other characters are copied verbatim to the out buffer.
223  */
224 void uri_data_encode(string_t *out,
225 	const unsigned char esc_table[256],
226 	unsigned char esc_mask, const char *esc_extra,
227 	const char *data) ATTR_NULL(4);
228 
229 /* append the provided scheme to the out buffer */
230 void uri_append_scheme(string_t *out, const char *scheme);
231 
232 /* append partial user data (i.e. some part of what comes before '@') to
233    the out buffer. No '@' is produced. Characters are percent-encoded when
234    necessary. Characters in esc are always percent-encoded, even when these
235    are valid 'userinfo' characters. */
236 void uri_append_user_data(string_t *out,
237 	const char *esc, const char *data) ATTR_NULL(2);
238 /* append userinfo and '@' to the out buffer. Characters in userinfo are
239    percent-encoded when necessary.*/
240 void uri_append_userinfo(string_t *out, const char *userinfo);
241 
242 /* append the host name to the out buffer. Characters are percent-encoded
243    when necessary.*/
244 void uri_append_host_name(string_t *out, const char *name);
245 /* append the host IP address to the out buffer. */
246 void uri_append_host_ip(string_t *out, const struct ip_addr *host_ip);
247 /* encode the URI host struct to the out buffer. */
248 void uri_append_host(string_t *out, const struct uri_host *host);
249 /* append the port to the out buffer. */
250 void uri_append_port(string_t *out, in_port_t port);
251 
252 /* append partial path segment data to the out buffer. No '/' is produced.
253    Characters are percent-encoded when necessary. Characters in esc are
254    always percent-encoded, even when these are valid 'segment' characters.
255  */
256 void uri_append_path_segment_data(string_t *out,
257 	const char *esc, const char *data) ATTR_NULL(2);
258 /* append a full path segment to the out buffer. A leading '/' is
259    produced. Characters are percent-encoded when necessary. */
260 void uri_append_path_segment(string_t *out, const char *segment);
261 /* append partial path data to the out buffer. The data may include '/',
262    which is not encoded. Characters are percent-encoded when necessary.
263    Characters in esc are always percent-encoded, even when these are
264    valid 'path' characters.*/
265 void uri_append_path_data(string_t *out,
266 	const char *esc, const char *data) ATTR_NULL(2);
267 /* append a full path to the out buffer. A leading '/' is produced. The
268    data may include more '/', which is not encoded. Characters are
269    percent-encoded when necessary.
270  */
271 void uri_append_path(string_t *out, const char *path);
272 
273 /* append partial query data to the out buffer. No leading '?' is
274    produced. Characters are percent-encoded when necessary. Characters
275    in esc are always percent-encoded, even when these are valid 'query'
276    characters.*/
277 void uri_append_query_data(string_t *out,
278 	const char *esc, const char *data) ATTR_NULL(2);
279 /* append a full URI query part to the out buffer. A leading '?' is
280    produced. Characters are percent-encoded when necessary. */
281 void uri_append_query(string_t *out, const char *query);
282 
283 /* append partial fragment data to the out buffer. No leading '#' is
284    produced. Characters are percent-encoded when necessary. Characters
285    in esc are always percent-encoded, even when these are valid
286   'fragment' characters.*/
287 void uri_append_fragment_data(string_t *out,
288 	const char *esc, const char *data) ATTR_NULL(2);
289 /* append a full URI fragment part to the out buffer. A leading '#' is
290    produced. Characters are percent-encoded when necessary. */
291 void uri_append_fragment(string_t *out, const char *fragment);
292 
293 /* append data to the out buffer and escape any reserved character */
294 void uri_append_unreserved(string_t *out, const char *data);
295 /* append data to the out buffer and escape any reserved character except '/' */
296 void uri_append_unreserved_path(string_t *out, const char *data);
297 
298 #endif
299