1 #include "links.h"
2 
3 struct {
4 	unsigned char *prot;
5 	int port;
6 	void (*func)(struct connection *);
7 	void (*nc_func)(struct session *, unsigned char *);
8 	int free_syntax;
9 	int need_slashes;
10 	int need_slash_after_host;
11 	int allow_post;
12 } protocols[]= {
13 		{"file", 0, file_func, NULL, 1, 1, 0, 0},
14 		{"https", 443, https_func, NULL, 0, 1, 1, 1},
15 		{"http", 80, http_func, NULL, 0, 1, 1, 1},
16 		{"proxy", 3128, proxy_func, NULL, 0, 1, 1, 1},
17 		{"ftp", 21, ftp_func, NULL, 0, 1, 1, 0},
18 		{"finger", 79, finger_func, NULL, 0, 1, 1, 0},
19 #ifndef DISABLE_SMB
20 		{"smb", 139, smb_func, NULL, 0, 1, 1, 0},
21 #endif
22 		{"mailto", 0, NULL, mailto_func, 0, 0, 0, 0},
23 		{"telnet", 0, NULL, telnet_func, 0, 0, 0, 0},
24 		{"tn3270", 0, NULL, tn3270_func, 0, 0, 0, 0},
25 		{"mms", 0, NULL, mms_func, 1, 0, 1, 0},
26 		{NULL, 0, NULL, NULL, 0, 0, 0, 0}
27 };
28 
check_protocol(unsigned char * p,int l)29 int check_protocol(unsigned char *p, int l)
30 {
31 	int i;
32 	for (i = 0; protocols[i].prot; i++)
33 		if (!casecmp(protocols[i].prot, p, l) && (int)strlen(protocols[i].prot) == l) {
34 			return i;
35 		}
36 	return -1;
37 }
38 
get_prot_info(unsigned char * prot,int * port,void (** func)(struct connection *),void (** nc_func)(struct session * ses,unsigned char *),int * allow_post)39 int get_prot_info(unsigned char *prot, int *port, void (**func)(struct connection *), void (**nc_func)(struct session *ses, unsigned char *), int *allow_post)
40 {
41 	int i;
42 	for (i = 0; protocols[i].prot; i++)
43 		if (!strcasecmp(protocols[i].prot, prot)) {
44 			if (port) *port = protocols[i].port;
45 			if (func) *func = protocols[i].func;
46 			if (nc_func) *nc_func = protocols[i].nc_func;
47 			if (allow_post) *allow_post = protocols[i].allow_post;
48 			return 0;
49 		}
50 	return -1;
51 }
52 
parse_url(unsigned char * url,int * prlen,unsigned char ** user,int * uslen,unsigned char ** pass,int * palen,unsigned char ** host,int * holen,unsigned char ** port,int * polen,unsigned char ** data,int * dalen,unsigned char ** post)53 int parse_url(unsigned char *url, int *prlen, unsigned char **user, int *uslen, unsigned char **pass, int *palen, unsigned char **host, int *holen, unsigned char **port, int *polen, unsigned char **data, int *dalen, unsigned char **post)
54 {
55 	unsigned char *p, *q;
56 	unsigned char p_c[2];
57 	int a;
58 	if (prlen) *prlen = 0;
59 	if (user) *user = NULL;
60 	if (uslen) *uslen = 0;
61 	if (pass) *pass = NULL;
62 	if (palen) *palen = 0;
63 	if (host) *host = NULL;
64 	if (holen) *holen = 0;
65 	if (port) *port = NULL;
66 	if (polen) *polen = 0;
67 	if (data) *data = NULL;
68 	if (dalen) *dalen = 0;
69 	if (post) *post = NULL;
70 	if (!url || !(p = strchr(url, ':'))) return -1;
71 	if (prlen) *prlen = p - url;
72 	if ((a = check_protocol(url, p - url)) == -1) return -1;
73 	if (p[1] != '/' || p[2] != '/') {
74 		if (protocols[a].need_slashes) return -1;
75 		p -= 2;
76 	}
77 	if (protocols[a].free_syntax) {
78 		if (data) *data = p + 3;
79 		if (dalen) *dalen = strlen(p + 3);
80 		return 0;
81 	}
82 	p += 3;
83 	q = p + strcspn(p, "@/?");
84 	if (!*q && protocols[a].need_slash_after_host) return -1;
85 	if (*q == '@') {
86 		unsigned char *pp;
87 		while (strcspn(q + 1, "@") < strcspn(q + 1, "/?"))
88 			q = q + 1 + strcspn(q + 1, "@");
89 		pp = strchr(p, ':');
90 		if (!pp || pp > q) {
91 			if (user) *user = p;
92 			if (uslen) *uslen = q - p;
93 		} else {
94 			if (user) *user = p;
95 			if (uslen) *uslen = pp - p;
96 			if (pass) *pass = pp + 1;
97 			if (palen) *palen = q - pp - 1;
98 		}
99 		p = q + 1;
100 	}
101 	q = p + strcspn(p, ":/?");
102 	if (!*q && protocols[a].need_slash_after_host) return -1;
103 	if (host) *host = p;
104 	if (holen) *holen = q - p;
105 	if (*q == ':') {
106 		unsigned char *pp = q + strcspn(q, "/");
107 		int cc;
108 		if (*pp != '/' && protocols[a].need_slash_after_host) return -1;
109 		if (port) *port = q + 1;
110 		if (polen) *polen = pp - q - 1;
111 		for (cc = 0; cc < pp - q - 1; cc++) if (q[cc+1] < '0' || q[cc+1] > '9') return -1;
112 		q = pp;
113 	}
114 	if (*q && *q != '?') q++;
115 	p = q;
116 	p_c[0] = POST_CHAR;
117 	p_c[1] = 0;
118 	q = p + strcspn(p, p_c);
119 	if (data) *data = p;
120 	if (dalen) *dalen = q - p;
121 	if (post) *post = *q ? q + 1 : NULL;
122 	return 0;
123 }
124 
get_protocol_name(unsigned char * url)125 unsigned char *get_protocol_name(unsigned char *url)
126 {
127 	int l;
128 	if (parse_url(url, &l, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)) return NULL;
129 	return memacpy(url, l);
130 }
131 
get_host_and_pass(unsigned char * url)132 unsigned char *get_host_and_pass(unsigned char *url)
133 {
134 	unsigned char *u, *h, *p, *z, *k;
135 	int hl, pl;
136 	if (parse_url(url, NULL, &u, NULL, NULL, NULL, &h, &hl, &p, &pl, NULL, NULL, NULL)) return NULL;
137 	z = u ? u : h;
138 	k = p ? p + pl : h + hl;
139 	return memacpy(z, k - z);
140 }
141 
get_host_name(unsigned char * url)142 unsigned char *get_host_name(unsigned char *url)
143 {
144 	unsigned char *h;
145 	int hl;
146 	if (parse_url(url, NULL, NULL, NULL, NULL, NULL, &h, &hl, NULL, NULL, NULL, NULL, NULL)) return stracpy("");
147 	return memacpy(h, hl);
148 }
149 
get_user_name(unsigned char * url)150 unsigned char *get_user_name(unsigned char *url)
151 {
152 	unsigned char *h;
153 	int hl;
154 	if (parse_url(url, NULL, &h, &hl, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)) return NULL;
155 	return memacpy(h, hl);
156 }
157 
get_pass(unsigned char * url)158 unsigned char *get_pass(unsigned char *url)
159 {
160 	unsigned char *h;
161 	int hl;
162 	if (parse_url(url, NULL,NULL,  NULL, &h, &hl, NULL, NULL, NULL, NULL, NULL, NULL, NULL)) return NULL;
163 	return memacpy(h, hl);
164 }
165 
get_port_str(unsigned char * url)166 unsigned char *get_port_str(unsigned char *url)
167 {
168 	unsigned char *h;
169 	int hl;
170 	if (parse_url(url, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &h, &hl, NULL, NULL, NULL)) return NULL;
171 	return hl ? memacpy(h, hl) : NULL;
172 }
173 
get_port(unsigned char * url)174 int get_port(unsigned char *url)
175 {
176 	unsigned char *h;
177 	int hl;
178 	long n = -1;
179 	if (parse_url(url, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &h, &hl, NULL, NULL, NULL)) return -1;
180 	if (h) {
181 		n = strtol(h, NULL, 10);
182 		if (n > 0 && n < 65536) return n;
183 		return -1;
184 	}
185 	if ((h = get_protocol_name(url))) {
186 		int nn = -1;	/* against warning */
187 		get_prot_info(h, &nn, NULL, NULL, NULL);
188 		mem_free(h);
189 		n = nn;
190 	}
191 	return n;
192 }
193 
get_protocol_handle(unsigned char * url)194 void (*get_protocol_handle(unsigned char *url))(struct connection *)
195 {
196 	unsigned char *p;
197 	void (*f)(struct connection *) = NULL;
198 	int post = 0;
199 	if (!(p = get_protocol_name(url))) return NULL;
200 	get_prot_info(p, NULL, &f, NULL, &post);
201 	mem_free(p);
202 	if (!post && strchr(url, POST_CHAR)) return NULL;
203 	return f;
204 }
205 
get_external_protocol_function(unsigned char * url)206 void (*get_external_protocol_function(unsigned char *url))(struct session *, unsigned char *)
207 {
208 	unsigned char *p;
209 	void (*f)(struct session *, unsigned char *) = NULL;
210 	int post = 0;
211 	if (!(p = get_protocol_name(url))) return NULL;
212 	get_prot_info(p, NULL, NULL, &f, &post);
213 	mem_free(p);
214 	if (!post && strchr(url, POST_CHAR)) return NULL;
215 	return f;
216 }
217 
get_url_data(unsigned char * url)218 unsigned char *get_url_data(unsigned char *url)
219 {
220 	unsigned char *d;
221 	if (parse_url(url, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &d, NULL, NULL)) return NULL;
222 	return d;
223 }
224 
225 #define dsep(x) (lo ? dir_sep(x) : (x) == '/')
226 
translate_directories(unsigned char * url)227 void translate_directories(unsigned char *url)
228 {
229 	unsigned char *dd = get_url_data(url);
230 	unsigned char *s, *d;
231 	int lo = !casecmp(url, "file://", 7);
232 	if (!dd || dd == url/* || *--dd != '/'*/) return;
233 	if (!dsep(*dd)) dd--;
234 	s = dd;
235 	d = dd;
236 	r:
237 	if (end_of_dir(s[0])) {
238 		memmove(d, s, strlen(s) + 1);
239 		return;
240 	}
241 	if (dsep(s[0]) && s[1] == '.' && dsep(s[2])) {
242 		/**d++ = s[0];*/
243 		if (s == dd && !s[3]) goto p;
244 		s += 2;
245 		goto r;
246 	}
247 	if (dsep(s[0]) && s[1] == '.' && s[2] == '.' && (dsep(s[3]) || !s[3])) {
248 		while (d > dd) {
249 			d--;
250 			if (dsep(*d)) goto b;
251 		}
252 		b:
253 		if (!s[3]) *d++ = *s;
254 		s += 3;
255 		goto r;
256 	}
257 	p:
258 	if ((*d++ = *s++)) goto r;
259 }
260 
insert_wd(unsigned char ** up,unsigned char * cwd)261 void insert_wd(unsigned char **up, unsigned char *cwd)
262 {
263 	unsigned char *url = *up;
264 	if (!url || !cwd || !*cwd) return;
265 	if (casecmp(url, "file://", 7)) return;
266 	if (dir_sep(url[7])) return;
267 #ifdef DOS_FS
268 	if (upcase(url[7]) >= 'A' && upcase(url[7]) <= 'Z' && url[8] == ':' && dir_sep(url[9])) return;
269 #endif
270 #ifdef SPAD
271 	if (_is_absolute(url + 7) != _ABS_NO) return;
272 #endif
273 	url = mem_alloc(strlen(*up) + strlen(cwd) + 2);
274 	memcpy(url, *up, 7);
275 	strcpy(url + 7, cwd);
276 	if (!dir_sep(cwd[strlen(cwd) - 1])) strcat(url, "/");
277 	strcat(url, *up + 7);
278 	mem_free(*up);
279 	*up = url;
280 }
281 
join_urls(unsigned char * base,unsigned char * rel)282 unsigned char *join_urls(unsigned char *base, unsigned char *rel)
283 {
284 	unsigned char *p, *n, *pp;
285 	int l;
286 	int lo = !casecmp(base, "file://", 7);
287 	if (rel[0] == '#' || !rel[0]) {
288 		n = stracpy(base);
289 		for (p = n; *p && *p != POST_CHAR && *p != '#'; p++) ;
290 		*p = 0;
291 		add_to_strn(&n, rel);
292 		translate_directories(n);
293 		return n;
294 	}
295 	if (rel[0] == '?' || rel[0] == '&') {
296 		unsigned char rj[3];
297 		unsigned char *d = get_url_data(base);
298 		if (!d) goto bad_base;
299 		rj[0] = rel[0];
300 		rj[1] = POST_CHAR;
301 		rj[2] = 0;
302 		d += strcspn(d, rj);
303 		n = memacpy(base, d - base);
304 		add_to_strn(&n, rel);
305 		translate_directories(n);
306 		return n;
307 	}
308 	if (rel[0] == '/' && rel[1] == '/') {
309 		unsigned char *s, *n;
310 		if (!(s = strstr(base, "//"))) {
311 			if (!(s = strchr(base, ':'))) {
312 				bad_base:
313 				internal("bad base url: %s", base);
314 				return NULL;
315 			}
316 			s++;
317 		}
318 		n = memacpy(base, s - base);
319 		add_to_strn(&n, rel);
320 		if (!parse_url(n, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)) {
321 			translate_directories(n);
322 			return n;
323 		}
324 		add_to_strn(&n, cast_uchar "/");
325 		if (!parse_url(n, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)) {
326 			translate_directories(n);
327 			return n;
328 		}
329 		mem_free(n);
330 	}
331 	if (!casecmp("proxy://", rel, 8)) goto prx;
332 	if (!parse_url(rel, &l, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)) {
333 		n = stracpy(rel);
334 		translate_directories(n);
335 		return n;
336 	}
337 	n = stracpy(rel);
338 	while (n[0] && n[strlen(n) - 1] <= ' ') n[strlen(n) - 1] = 0;
339 	add_to_strn(&n, "/");
340 	if (!parse_url(n, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)) {
341 		translate_directories(n);
342 		return n;
343 	}
344 	mem_free(n);
345 	prx:
346 	if (parse_url(base, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &p, NULL, NULL) || !p) {
347 		goto bad_base;
348 	}
349 	if (!dsep(*p)) p--;
350 	if (end_of_dir(rel[0])) for (; *p; p++) {
351 		if (end_of_dir(*p)) break;
352 	} else if (!dsep(rel[0])) for (pp = p; *pp; pp++) {
353 		if (end_of_dir(*pp)) break;
354 		if (dsep(*pp)) p = pp + 1;
355 	}
356 	n = mem_alloc(p - base + strlen(rel) + 1);
357 	memcpy(n, base, p - base);
358 	strcpy(n + (p - base), rel);
359 	translate_directories(n);
360 	return n;
361 }
362 
translate_url(unsigned char * url,unsigned char * cwd)363 unsigned char *translate_url(unsigned char *url, unsigned char *cwd)
364 {
365 	unsigned char *ch;
366 	unsigned char *nu, *da;
367 	unsigned char *prefix;
368 	int sl;
369 	while (*url == ' ') url++;
370 	if (!casecmp("proxy://", url, 8)) return NULL;
371 	if (!parse_url(url, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &da, NULL, NULL)) {
372 		nu = stracpy(url);
373 		insert_wd(&nu, cwd);
374 		translate_directories(nu);
375 		return nu;
376 	}
377 	if (strchr(url, POST_CHAR)) return NULL;
378 	if (strstr(url, "://")) {
379 		nu = stracpy(url);
380 		add_to_strn(&nu, "/");
381 		if (!parse_url(nu, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)) {
382 			insert_wd(&nu, cwd);
383 			translate_directories(nu);
384 			return nu;
385 		}
386 		mem_free(nu);
387 	}
388 	ch = url + strcspn(url, ".:/@");
389 	prefix = "file://";
390 	sl = 0;
391 	if (*ch != ':' || *(url + strcspn(url, "/@")) == '@') {
392 		if (*url != '.' && *ch == '.') {
393 			unsigned char *f, *e;
394 			int i;
395 			for (e = ch + 1; *(f = e + strcspn(e, ".:/")) == '.'; e = f + 1) ;
396 			for (i = 0; i < f - e; i++) if (e[i] >= '0' && e[i] <= '9') goto http;
397 			if (f - e == 2) {
398 				http:
399 				prefix = "http://", sl = 1;
400 			} else {
401 				unsigned char *tld[] = { "com", "edu", "net", "org", "gov", "mil", "int", "arpa", "aero", "biz", "coop", "info", "museum", "name", "pro", "cat", "jobs", "mobi", "travel", "tel", NULL };
402 				for (i = 0; tld[i]; i++) if ((size_t)(f - e) == strlen(tld[i]) && !casecmp(tld[i], e, f - e)) goto http;
403 			}
404 		}
405 		if (*ch == '@' || *ch == ':' || !cmpbeg(url, "ftp.")) prefix = "ftp://", sl = 1;
406 		goto set_prefix;
407 		set_prefix:
408 		nu = stracpy(prefix);
409 		add_to_strn(&nu, url);
410 		if (sl && !strchr(url, '/')) add_to_strn(&nu, "/");
411 		if (parse_url(nu, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)) mem_free(nu), nu = NULL;
412 		else {
413 			insert_wd(&nu, cwd);
414 			translate_directories(nu);
415 		}
416 		return nu;
417 	}
418 #ifdef DOS_FS
419 	if (ch == url + 1) goto set_prefix;
420 #endif
421 #ifdef SPAD
422 	if (_is_local(url)) goto set_prefix;
423 #endif
424 	nu = memacpy(url, ch - url + 1);
425 	add_to_strn(&nu, "//");
426 	add_to_strn(&nu, ch + 1);
427 	if (!parse_url(nu, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)) {
428 		insert_wd(&nu, cwd);
429 		translate_directories(nu);
430 		return nu;
431 	}
432 	add_to_strn(&nu, "/");
433 	if (!parse_url(nu, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)) {
434 		insert_wd(&nu, cwd);
435 		translate_directories(nu);
436 		return nu;
437 	}
438 	mem_free(nu);
439 	return NULL;
440 }
441 
extract_position(unsigned char * url)442 unsigned char *extract_position(unsigned char *url)
443 {
444 	unsigned char *u, *uu, *r;
445 	if ((u = get_url_data(url))) url = u;
446 	if (!(u = strchr(url, POST_CHAR))) u = url + strlen(url);
447 	if (!(uu = memchr(url, '#', u - url))) return NULL;
448 	r = mem_alloc(u - uu);
449 	memcpy(r, uu + 1, u - uu - 1);
450 	r[u - uu - 1] = 0;
451 	memmove(uu, u, strlen(u) + 1);
452 	return r;
453 }
454 
get_filename_from_url(unsigned char * url,unsigned char ** s,int * l)455 void get_filename_from_url(unsigned char *url, unsigned char **s, int *l)
456 {
457 	int lo = !casecmp(url, "file://", 7);
458 	unsigned char *uu;
459 	if ((uu = get_url_data(url))) url = uu;
460 	*s = url;
461 	while (*url && !end_of_dir(*url)) {
462 		if (dsep(*url)) *s = url + 1;
463 		url++;
464 	}
465 	*l = url - *s;
466 }
467 
url_not_saveable(unsigned char * url)468 int url_not_saveable(unsigned char *url)
469 {
470 	int p, palen;
471 	unsigned char *u = translate_url(url, cast_uchar "/");
472 	if (!u)
473 		return 1;
474 	p = parse_url(u, NULL, NULL, NULL, NULL, &palen, NULL, NULL, NULL, NULL, NULL, NULL, NULL);
475 	mem_free(u);
476 	return p || palen;
477 }
478 
479 #define accept_char(x)	((x) != 10 && (x) != 13 && (x) != '"' && (x) != '\'' && (x) != '&' && (x) != '<' && (x) != '>')
480 #define special_char(x)	((x) < ' ' || (x) == '%' || (x) == '#' || (x) >= 127)
481 
482 /*
483  * -2 percent to raw
484  * -1 percent to html
485  *  0 raw to html
486  *  1 raw to percent
487  */
488 
add_conv_str(unsigned char ** s,int * l,unsigned char * b,int ll,int encode_special)489 void add_conv_str(unsigned char **s, int *l, unsigned char *b, int ll, int encode_special)
490 {
491 	for (; ll > 0; ll--, b++) {
492 		unsigned char chr = *b;
493 		if (!chr) continue;
494 		if (special_char(chr) && encode_special == 1) {
495 			unsigned char h[4];
496 			sprintf(cast_char h, "%%%02X", (unsigned)chr & 0xff);
497 			add_to_str(s, l, h);
498 			continue;
499 		}
500 		if (chr == '%' && encode_special <= -1 && ll > 2 &&
501 		    ((b[1] >= '0' && b[1] <= '9') || (b[1] >= 'A' && b[1] <= 'F') || (b[1] >= 'a' && b[1] <= 'f')) &&
502 		    ((b[2] >= '0' && b[2] <= '9') || (b[2] >= 'A' && b[2] <= 'F') || (b[2] >= 'a' && b[2] <= 'f'))) {
503 			int i;
504 			chr = 0;
505 			for (i = 1; i < 3; i++) {
506 				if (b[i] >= '0' && b[i] <= '9') chr = chr * 16 + b[i] - '0';
507 				if (b[i] >= 'A' && b[i] <= 'F') chr = chr * 16 + b[i] - 'A' + 10;
508 				if (b[i] >= 'a' && b[i] <= 'f') chr = chr * 16 + b[i] - 'a' + 10;
509 			}
510 			ll -= 2;
511 			b += 2;
512 			if (!chr)
513 				continue;
514 		}
515 		if (chr == ' ' && (!encode_special || encode_special == -1)) {
516 			add_to_str(s, l, cast_uchar "&nbsp;");
517 		} else if (accept_char(chr) || encode_special == -2) {
518 			add_chr_to_str(s, l, chr);
519 		} else if (chr == 10 || chr == 13) {
520 		} else {
521 			add_to_str(s, l, cast_uchar "&#");
522 			add_num_to_str(s, l, (int)chr);
523 			add_chr_to_str(s, l, ';');
524 		}
525 	}
526 }
527