1 /*
2    HTTP URI handling
3    Copyright (C) 1999-2003, Joe Orton <joe@manyfish.co.uk>
4 
5    This library is free software; you can redistribute it and/or
6    modify it under the terms of the GNU Library General Public
7    License as published by the Free Software Foundation; either
8    version 2 of the License, or (at your option) any later version.
9 
10    This library is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13    Library General Public License for more details.
14 
15    You should have received a copy of the GNU Library General Public
16    License along with this library; if not, write to the Free
17    Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
18    MA 02111-1307, USA
19 
20 */
21 
22 #include "config.h"
23 
24 #ifdef HAVE_STRING_H
25 #include <string.h>
26 #endif
27 #ifdef HAVE_STRINGS_H
28 #include <strings.h>
29 #endif
30 #ifdef HAVE_UNISTD_H
31 #include <unistd.h>
32 #endif
33 #ifdef HAVE_STDLIB_H
34 #include <stdlib.h>
35 #endif
36 
37 #include <ctype.h>
38 
39 #include "ne_utils.h" /* for 'min' */
40 #include "ne_string.h" /* for ne_buffer */
41 #include "ne_alloc.h"
42 #include "ne_uri.h"
43 
ne_path_parent(const char * uri)44 char *ne_path_parent(const char *uri)
45 {
46     size_t len = strlen(uri);
47     const char *pnt = uri + len - 1;
48     /* skip trailing slash (parent of "/foo/" is "/") */
49     if (pnt >= uri && *pnt == '/')
50 	pnt--;
51     /* find previous slash */
52     while (pnt > uri && *pnt != '/')
53 	pnt--;
54     if (pnt < uri || (pnt == uri && *pnt != '/'))
55 	return NULL;
56     return ne_strndup(uri, pnt - uri + 1);
57 }
58 
ne_path_has_trailing_slash(const char * uri)59 int ne_path_has_trailing_slash(const char *uri)
60 {
61     size_t len = strlen(uri);
62     return ((len > 0) &&
63 	    (uri[len-1] == '/'));
64 }
65 
ne_uri_defaultport(const char * scheme)66 unsigned int ne_uri_defaultport(const char *scheme)
67 {
68     /* RFC2616/3.2.3 says use case-insensitive comparisons here. */
69     if (strcasecmp(scheme, "http") == 0)
70 	return 80;
71     else if (strcasecmp(scheme, "https") == 0)
72 	return 443;
73     else
74 	return 0;
75 }
76 
77 /* TODO: Also, maybe stop malloc'ing here, take a "char *" uri, modify
78  * it in-place, and have fields point inside passed uri.  More work
79  * for the caller then though. */
80 /* TODO: not a proper URI parser */
ne_uri_parse(const char * uri,ne_uri * parsed)81 int ne_uri_parse(const char *uri, ne_uri *parsed)
82 {
83     const char *pnt, *slash, *colon, *atsign, *openbk;
84 
85     parsed->port = 0;
86     parsed->host = NULL;
87     parsed->path = NULL;
88     parsed->scheme = NULL;
89     parsed->authinfo = NULL;
90 
91     if (uri[0] == '\0') {
92 	return -1;
93     }
94 
95     pnt = strstr(uri, "://");
96     if (pnt) {
97 	parsed->scheme = ne_strndup(uri, pnt - uri);
98 	pnt += 3; /* start of hostport segment */
99     } else {
100 	pnt = uri;
101     }
102 
103     atsign = strchr(pnt, '@');
104     slash = strchr(pnt, '/');
105     openbk = strchr(pnt, '[');
106 
107     /* Check for an authinfo segment in the hostport segment. */
108     if (atsign != NULL && (slash == NULL || atsign < slash)) {
109 	parsed->authinfo = ne_strndup(pnt, atsign - pnt);
110 	pnt = atsign + 1;
111     }
112 
113     if (openbk && (!slash || openbk < slash)) {
114 	const char *closebk = strchr(openbk, ']');
115 	if (closebk == NULL)
116 	    return -1;
117 	colon = strchr(closebk + 1, ':');
118     } else {
119 	colon = strchr(pnt, ':');
120     }
121 
122     if (slash == NULL) {
123 	parsed->path = ne_strdup("/");
124 	if (colon == NULL) {
125 	    parsed->host = ne_strdup(pnt);
126 	} else {
127 	    parsed->port = atoi(colon+1);
128 	    parsed->host = ne_strndup(pnt, colon - pnt);
129 	}
130     } else {
131 	if (colon == NULL || colon > slash) {
132 	    /* No port segment */
133 	    if (slash != uri) {
134 		parsed->host = ne_strndup(pnt, slash - pnt);
135 	    } else {
136 		/* No hostname segment. */
137 	    }
138 	} else {
139 	    /* Port segment */
140 	    parsed->port = atoi(colon + 1);
141 	    parsed->host = ne_strndup(pnt, colon - pnt);
142 	}
143 	parsed->path = ne_strdup(slash);
144     }
145 
146     return 0;
147 }
148 
ne_uri_free(ne_uri * u)149 void ne_uri_free(ne_uri *u)
150 {
151     if (u->host) ne_free(u->host);
152     if (u->path) ne_free(u->path);
153     if (u->scheme) ne_free(u->scheme);
154     if (u->authinfo) ne_free(u->authinfo);
155     memset(u, 0, sizeof *u);
156 }
157 
ne_path_unescape(const char * uri)158 char *ne_path_unescape(const char *uri)
159 {
160     const char *pnt;
161     char *ret, *retpos, buf[5] = { "0x00\0" };
162     retpos = ret = ne_malloc(strlen(uri) + 1);
163     for (pnt = uri; *pnt != '\0'; pnt++) {
164 	if (*pnt == '%') {
165 	    if (!isxdigit((unsigned char) pnt[1]) ||
166 		!isxdigit((unsigned char) pnt[2])) {
167 		/* Invalid URI */
168                 ne_free(ret);
169 		return NULL;
170 	    }
171 	    buf[2] = *++pnt; buf[3] = *++pnt; /* bit faster than memcpy */
172 	    *retpos++ = (char)strtol(buf, NULL, 16);
173 	} else {
174 	    *retpos++ = *pnt;
175 	}
176     }
177     *retpos = '\0';
178     return ret;
179 }
180 
181 /* RFC2396 spake:
182  * "Data must be escaped if it does not have a representation
183  * using an unreserved character".
184  */
185 
186 /* Lookup table: character classes from 2396. (This is overkill) */
187 
188 #define SP 0   /* space    = <US-ASCII coded character 20 hexadecimal>                 */
189 #define CO 0   /* control  = <US-ASCII coded characters 00-1F and 7F hexadecimal>      */
190 #define DE 0   /* delims   = "<" | ">" | "#" | "%" | <">                               */
191 #define UW 0   /* unwise   = "{" | "}" | "|" | "\" | "^" | "[" | "]" | "`"             */
192 #define MA 1   /* mark     = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"       */
193 #define AN 2   /* alphanum = alpha | digit                                             */
194 #define RE 2   /* reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," */
195 
196 static const char uri_chars[128] = {
197 /*                +2      +4      +6      +8     +10     +12     +14     */
198 /*   0 */ CO, CO, CO, CO, CO, CO, CO, CO, CO, CO, CO, CO, CO, CO, CO, CO,
199 /*  16 */ CO, CO, CO, CO, CO, CO, CO, CO, CO, CO, CO, CO, CO, CO, CO, CO,
200 /*  32 */ SP, MA, DE, DE, RE, DE, RE, MA, MA, MA, MA, RE, RE, MA, MA, RE,
201 /*  48 */ AN, AN, AN, AN, AN, AN, AN, AN, AN, AN, RE, RE, DE, RE, DE, RE,
202 /*  64 */ RE, AN, AN, AN, AN, AN, AN, AN, AN, AN, AN, AN, AN, AN, AN, AN,
203 /*  80 */ AN, AN, AN, AN, AN, AN, AN, AN, AN, AN, AN, UW, UW, UW, UW, MA,
204 /*  96 */ UW, AN, AN, AN, AN, AN, AN, AN, AN, AN, AN, AN, AN, AN, AN, AN,
205 /* 112 */ AN, AN, AN, AN, AN, AN, AN, AN, AN, AN, AN, UW, UW, UW, MA, CO
206 };
207 
208 #define ESCAPE(ch) (((const signed char)(ch) < 0 || \
209 		uri_chars[(unsigned int)(ch)] == 0))
210 
211 #undef SP
212 #undef CO
213 #undef DE
214 #undef UW
215 #undef MA
216 #undef AN
217 #undef RE
218 
ne_path_escape(const char * abs_path)219 char *ne_path_escape(const char *abs_path)
220 {
221     const char *pnt;
222     char *ret, *retpos;
223     int count = 0;
224     for (pnt = abs_path; *pnt != '\0'; pnt++) {
225 	if (ESCAPE(*pnt)) {
226 	    count++;
227 	}
228     }
229     if (count == 0) {
230 	return ne_strdup(abs_path);
231     }
232     /* An escaped character is "%xx", i.e., two MORE
233      * characters than the original string */
234     retpos = ret = ne_malloc(strlen(abs_path) + 2*count + 1);
235     for (pnt = abs_path; *pnt != '\0'; pnt++) {
236 	if (ESCAPE(*pnt)) {
237 	    /* Escape it - %<hex><hex> */
238 	    sprintf(retpos, "%%%02x", (unsigned char) *pnt);
239 	    retpos += 3;
240 	} else {
241 	    /* It's cool */
242 	    *retpos++ = *pnt;
243 	}
244     }
245     *retpos = '\0';
246     return ret;
247 }
248 
249 #undef ESCAPE
250 
251 #define CASECMP(field) do { \
252 n = strcasecmp(u1->field, u2->field); if (n) return n; } while(0)
253 
254 #define CMP(field) do { \
255 n = strcmp(u1->field, u2->field); if (n) return n; } while(0)
256 
257 /* As specified by RFC 2616, section 3.2.3. */
ne_uri_cmp(const ne_uri * u1,const ne_uri * u2)258 int ne_uri_cmp(const ne_uri *u1, const ne_uri *u2)
259 {
260     int n;
261 
262     if (u1->path[0] == '\0' && strcmp(u2->path, "/") == 0)
263 	return 0;
264     if (u2->path[0] == '\0' && strcmp(u1->path, "/") == 0)
265 	return 0;
266 
267     CMP(path);
268     CASECMP(host);
269     CASECMP(scheme);
270     if (u1->port > u2->port)
271 	return 1;
272     else if (u1->port < u2->port)
273 	return -1;
274     return 0;
275 }
276 
277 #undef CMP
278 #undef CASECMP
279 
280 /* TODO: implement properly */
ne_path_compare(const char * a,const char * b)281 int ne_path_compare(const char *a, const char *b)
282 {
283     int ret = strcasecmp(a, b);
284     if (ret) {
285 	/* This logic says: "If the lengths of the two URIs differ by
286 	 * exactly one, and the LONGER of the two URIs has a trailing
287 	 * slash and the SHORTER one DOESN'T, then..." */
288 	int traila = ne_path_has_trailing_slash(a),
289 	    trailb = ne_path_has_trailing_slash(b),
290 	    lena = strlen(a), lenb = strlen(b);
291 	if (traila != trailb && abs(lena - lenb) == 1 &&
292 	    ((traila && lena > lenb) || (trailb && lenb > lena))) {
293 	    /* Compare them, ignoring the trailing slash on the longer
294 	     * URI */
295 	    if (strncasecmp(a, b, min(lena, lenb)) == 0)
296 		ret = 0;
297 	}
298     }
299     return ret;
300 }
301 
ne_uri_unparse(const ne_uri * uri)302 char *ne_uri_unparse(const ne_uri *uri)
303 {
304     ne_buffer *buf = ne_buffer_create();
305 
306     ne_buffer_concat(buf, uri->scheme, "://", uri->host, NULL);
307 
308     if (uri->port > 0 && ne_uri_defaultport(uri->scheme) != uri->port) {
309 	char str[20];
310 	ne_snprintf(str, 20, ":%d", uri->port);
311 	ne_buffer_zappend(buf, str);
312     }
313 
314     ne_buffer_zappend(buf, uri->path);
315 
316     return ne_buffer_finish(buf);
317 }
318 
319 /* Give it a path segment, it returns non-zero if child is
320  * a child of parent. */
ne_path_childof(const char * parent,const char * child)321 int ne_path_childof(const char *parent, const char *child)
322 {
323     char *root = ne_strdup(child);
324     int ret;
325     if (strlen(parent) >= strlen(child)) {
326 	ret = 0;
327     } else {
328 	/* root is the first of child, equal to length of parent */
329 	root[strlen(parent)] = '\0';
330 	ret = (ne_path_compare(parent, root) == 0);
331     }
332     ne_free(root);
333     return ret;
334 }
335