1 /* url.c
2 * (c) 2002 Mikulas Patocka
3 * This file is a part of the Links program, released under GPL.
4 */
5
6 #include "links.h"
7
8 static_const struct {
9 char *prot;
10 int port;
11 void (*func)(struct connection *);
12 void (*nc_func)(struct session *, unsigned char *);
13 int free_syntax;
14 int need_slashes;
15 int need_slash_after_host;
16 int allow_post;
17 int bypasses_socks;
18 } protocols[]= {
19 {"data", 0, data_func, NULL, 1, 0, 0, 0, 0},
20 {"file", 0, file_func, NULL, 1, 1, 0, 0, 1},
21 {"https", 443, https_func, NULL, 0, 1, 1, 1, 0},
22 {"http", 80, http_func, NULL, 0, 1, 1, 1, 0},
23 {"proxy", 3128, proxy_func, NULL, 0, 1, 1, 1, 0},
24 {"ftp", 21, ftp_func, NULL, 0, 1, 1, 0, 0},
25 {"finger", 79, finger_func, NULL, 0, 1, 1, 0, 0},
26 #ifndef DISABLE_SMB
27 {"smb", 139, smb_func, NULL, 0, 1, 1, 0, 1},
28 #endif
29 {"mailto", 0, NULL, mailto_func, 0, 0, 0, 0, 0},
30 {"telnet", 0, NULL, telnet_func, 0, 0, 0, 0, 1},
31 {"tn3270", 0, NULL, tn3270_func, 0, 0, 0, 0, 1},
32 {"mms", 0, NULL, mms_func, 1, 0, 1, 0, 1},
33 {"magnet", 0, NULL, magnet_func, 1, 0, 0, 0, 1},
34 #ifdef JS
35 {"javascript", 0, NULL, javascript_func,1, 0, 0, 0, 0},
36 #endif
37 {NULL, 0, NULL, NULL, 0, 0, 0, 0, 0}
38 };
39
40
41
check_protocol(unsigned char * p,int l)42 static int check_protocol(unsigned char *p, int l)
43 {
44 int i;
45 for (i = 0; protocols[i].prot; i++)
46 if (!casecmp(cast_uchar protocols[i].prot, p, l) && strlen(cast_const_char protocols[i].prot) == (size_t)l) {
47 return i;
48 }
49 return -1;
50 }
51
get_prot_info(unsigned char * prot,int * port,void (** func)(struct connection *),void (** nc_func)(struct session * ses,unsigned char *),int * allow_post,int * bypasses_socks)52 static int get_prot_info(unsigned char *prot, int *port, void (**func)(struct connection *), void (**nc_func)(struct session *ses, unsigned char *), int *allow_post, int *bypasses_socks)
53 {
54 int i;
55 for (i = 0; protocols[i].prot; i++)
56 if (!casestrcmp(cast_uchar protocols[i].prot, prot)) {
57 if (port) *port = protocols[i].port;
58 if (func) *func = protocols[i].func;
59 if (nc_func) *nc_func = protocols[i].nc_func;
60 if (allow_post) *allow_post = protocols[i].allow_post;
61 if (bypasses_socks) *bypasses_socks = protocols[i].bypasses_socks;
62 return 0;
63 }
64 return -1;
65 }
66
parse_url(unsigned char * url,int * prlen,unsigned char ** user,int * uslen,unsigned char ** pass,int * palen,unsigned char ** host,int * holen,unsigned char ** port,int * polen,unsigned char ** data,int * dalen,unsigned char ** post)67 int parse_url(unsigned char *url, int *prlen, unsigned char **user, int *uslen, unsigned char **pass, int *palen, unsigned char **host, int *holen, unsigned char **port, int *polen, unsigned char **data, int *dalen, unsigned char **post)
68 {
69 unsigned char *p, *q;
70 unsigned char p_c[2];
71 int a;
72 if (prlen) *prlen = 0;
73 if (user) *user = NULL;
74 if (uslen) *uslen = 0;
75 if (pass) *pass = NULL;
76 if (palen) *palen = 0;
77 if (host) *host = NULL;
78 if (holen) *holen = 0;
79 if (port) *port = NULL;
80 if (polen) *polen = 0;
81 if (data) *data = NULL;
82 if (dalen) *dalen = 0;
83 if (post) *post = NULL;
84 if (!url || !(p = cast_uchar strchr(cast_const_char url, ':'))) return -1;
85 if (prlen) *prlen = (int)(p - url);
86 if ((a = check_protocol(url, (int)(p - url))) == -1) return -1;
87 if (p[1] != '/' || p[2] != '/') {
88 if (protocols[a].need_slashes) return -1;
89 p -= 2;
90 }
91 if (protocols[a].free_syntax) {
92 if (data) *data = p + 3;
93 if (dalen) *dalen = (int)strlen(cast_const_char(p + 3));
94 return 0;
95 }
96 p += 3;
97 q = p + strcspn(cast_const_char p, "@/?");
98 if (!*q && protocols[a].need_slash_after_host) return -1;
99 if (*q == '@') {
100 unsigned char *pp;
101 while (strcspn(cast_const_char(q + 1), "@") < strcspn(cast_const_char(q + 1), "/?"))
102 q = q + 1 + strcspn(cast_const_char(q + 1), "@");
103 pp = cast_uchar strchr(cast_const_char p, ':');
104 if (!pp || pp > q) {
105 if (user) *user = p;
106 if (uslen) *uslen = (int)(q - p);
107 } else {
108 if (user) *user = p;
109 if (uslen) *uslen = (int)(pp - p);
110 if (pass) *pass = pp + 1;
111 if (palen) *palen = (int)(q - pp - 1);
112 }
113 p = q + 1;
114 }
115 if (p[0] == '[') {
116 q = cast_uchar strchr(cast_const_char p, ']');
117 if (q) {
118 q++;
119 goto have_host;
120 }
121 }
122 q = p + strcspn(cast_const_char p, ":/?");
123 have_host:
124 if (!*q && protocols[a].need_slash_after_host) return -1;
125 if (host) *host = p;
126 if (holen) *holen = (int)(q - p);
127 if (*q == ':') {
128 unsigned char *pp = q + strcspn(cast_const_char q, "/");
129 int cc;
130 if (*pp != '/' && protocols[a].need_slash_after_host) return -1;
131 if (port) *port = q + 1;
132 if (polen) *polen = (int)(pp - q - 1);
133 for (cc = 0; cc < pp - q - 1; cc++) if (q[cc+1] < '0' || q[cc+1] > '9') return -1;
134 q = pp;
135 }
136 if (*q && *q != '?') q++;
137 p = q;
138 p_c[0] = POST_CHAR;
139 p_c[1] = 0;
140 q = p + strcspn(cast_const_char p, cast_const_char p_c);
141 if (data) *data = p;
142 if (dalen) *dalen = (int)(q - p);
143 if (post) *post = *q ? q + 1 : NULL;
144 return 0;
145 }
146
get_protocol_name(unsigned char * url)147 unsigned char *get_protocol_name(unsigned char *url)
148 {
149 int l;
150 if (parse_url(url, &l, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)) return NULL;
151 return memacpy(url, l);
152 }
153
get_keepalive_id(unsigned char * url)154 unsigned char *get_keepalive_id(unsigned char *url)
155 {
156 unsigned char *h, *p, *k, *d;
157 int hl, pl;
158 if (parse_url(url, NULL, NULL, NULL, NULL, NULL, &h, &hl, &p, &pl, &d, NULL, NULL)) return NULL;
159 if (is_proxy_url(url) && !casecmp(d, cast_uchar "https://", 8)) {
160 if (parse_url(d, NULL, NULL, NULL, NULL, NULL, &h, &hl, &p, &pl, NULL, NULL, NULL)) return NULL;
161 }
162 k = p ? p + pl : h ? h + hl : NULL;
163 if (!k) return stracpy(cast_uchar "");
164 return memacpy(url, k - url);
165 }
166
get_host_name(unsigned char * url)167 unsigned char *get_host_name(unsigned char *url)
168 {
169 unsigned char *h;
170 int hl;
171 if (parse_url(url, NULL, NULL, NULL, NULL, NULL, &h, &hl, NULL, NULL, NULL, NULL, NULL)) return stracpy(cast_uchar "");
172 return memacpy(h, hl);
173 }
174
get_user_name(unsigned char * url)175 unsigned char *get_user_name(unsigned char *url)
176 {
177 unsigned char *h;
178 int hl;
179 if (parse_url(url, NULL, &h, &hl, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)) return NULL;
180 return memacpy(h, hl);
181 }
182
get_pass(unsigned char * url)183 unsigned char *get_pass(unsigned char *url)
184 {
185 unsigned char *h;
186 int hl;
187 if (parse_url(url, NULL,NULL, NULL, &h, &hl, NULL, NULL, NULL, NULL, NULL, NULL, NULL)) return NULL;
188 return memacpy(h, hl);
189 }
190
get_port_str(unsigned char * url)191 unsigned char *get_port_str(unsigned char *url)
192 {
193 unsigned char *h;
194 int hl;
195 if (parse_url(url, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &h, &hl, NULL, NULL, NULL)) return NULL;
196 return hl ? memacpy(h, hl) : NULL;
197 }
198
get_port(unsigned char * url)199 int get_port(unsigned char *url)
200 {
201 unsigned char *h;
202 int hl;
203 long n = -1;
204 if (parse_url(url, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &h, &hl, NULL, NULL, NULL)) return -1;
205 if (h) {
206 n = strtol(cast_const_char h, NULL, 10);
207 if (n > 0 && n < 65536) return (int)n;
208 return -1;
209 }
210 if ((h = get_protocol_name(url))) {
211 int nn = -1; /* against warning */
212 get_prot_info(h, &nn, NULL, NULL, NULL, NULL);
213 mem_free(h);
214 n = nn;
215 }
216 return (int)n;
217 }
218
get_protocol_handle(unsigned char * url)219 void (*get_protocol_handle(unsigned char *url))(struct connection *)
220 {
221 unsigned char *p;
222 void (*f)(struct connection *) = NULL;
223 int post = 0;
224 if (!(p = get_protocol_name(url))) return NULL;
225 get_prot_info(p, NULL, &f, NULL, &post, NULL);
226 mem_free(p);
227 if (!post && strchr(cast_const_char url, POST_CHAR)) return NULL;
228 return f;
229 }
230
get_external_protocol_function(unsigned char * url)231 void (*get_external_protocol_function(unsigned char *url))(struct session *, unsigned char *)
232 {
233 unsigned char *p;
234 void (*f)(struct session *, unsigned char *) = NULL;
235 int post = 0;
236 if (!(p = get_protocol_name(url))) return NULL;
237 get_prot_info(p, NULL, NULL, &f, &post, NULL);
238 mem_free(p);
239 if (!post && strchr(cast_const_char url, POST_CHAR)) return NULL;
240 return f;
241 }
242
url_bypasses_socks(unsigned char * url)243 int url_bypasses_socks(unsigned char *url)
244 {
245 int ret = 0; /* against warning */
246 unsigned char *p;
247 if (!(p = get_protocol_name(url))) return 1;
248 get_prot_info(p, NULL, NULL, NULL, NULL, &ret);
249 mem_free(p);
250 return ret;
251 }
252
get_url_data(unsigned char * url)253 unsigned char *get_url_data(unsigned char *url)
254 {
255 unsigned char *d;
256 if (parse_url(url, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &d, NULL, NULL)) return NULL;
257 return d;
258 }
259
260 #define dsep(x) (lo ? dir_sep(x) : (x) == '/')
261
translate_directories(unsigned char * url)262 static void translate_directories(unsigned char *url)
263 {
264 unsigned char *dd = get_url_data(url);
265 unsigned char *s, *d;
266 int lo = !casecmp(url, cast_uchar "file://", 7);
267 if (!casecmp(url, cast_uchar "javascript:", 11)) return;
268 if (!casecmp(url, cast_uchar "magnet:", 7)) return;
269 if (!dd || dd == url /*|| *--dd != '/'*/) return;
270 if (!dsep(*dd)) {
271 dd--;
272 if (!dsep(*dd)) {
273 dd++;
274 memmove(dd + 1, dd, strlen(cast_const_char dd) + 1);
275 *dd = '/';
276 }
277 }
278 s = dd;
279 d = dd;
280 r:
281 if (end_of_dir(url, s[0])) {
282 memmove(d, s, strlen(cast_const_char s) + 1);
283 return;
284 }
285 if (dsep(s[0]) && s[1] == '.' && (dsep(s[2]) || !s[2] || end_of_dir(url, s[2]))) {
286 if (!dsep(s[2])) *d++ = *s;
287 s += 2;
288 goto r;
289 }
290 if (dsep(s[0]) && s[1] == '.' && s[2] == '.' && (dsep(s[3]) || !s[3] || end_of_dir(url, s[3]))) {
291 while (d > dd) {
292 d--;
293 if (dsep(*d)) goto b;
294 }
295 b:
296 if (!dsep(s[3])) *d++ = *s;
297 s += 3;
298 goto r;
299 }
300 if ((*d++ = *s++)) goto r;
301 }
302
translate_hashbang(unsigned char * up)303 static unsigned char *translate_hashbang(unsigned char *up)
304 {
305 unsigned char *u, *p, *dp, *data, *post_seq;
306 int q;
307 unsigned char *r;
308 int rl;
309 if (!strstr(cast_const_char up, "#!") && !strstr(cast_const_char up, "#%21")) return up;
310 u = stracpy(up);
311 p = extract_position(u);
312 if (!p) {
313 free_u_ret_up:
314 mem_free(u);
315 return up;
316 }
317 if (p[0] == '!') dp = p + 1;
318 else if (!casecmp(p, cast_uchar "%21", 3)) dp = p + 3;
319 else {
320 mem_free(p);
321 goto free_u_ret_up;
322 }
323 if (!(post_seq = cast_uchar strchr(cast_const_char u, POST_CHAR))) post_seq = cast_uchar strchr(cast_const_char u, 0);
324 data = get_url_data(u);
325 if (!data) data = u;
326 r = init_str();
327 rl = 0;
328 add_bytes_to_str(&r, &rl, u, post_seq - u);
329 q = (int)strlen(cast_const_char data);
330 if (q && (data[q - 1] == '&' || data[q - 1] == '?'))
331 ;
332 else if (strchr(cast_const_char data, '?')) add_chr_to_str(&r, &rl, '&');
333 else add_chr_to_str(&r, &rl, '?');
334 add_to_str(&r, &rl, cast_uchar "_escaped_fragment_=");
335 for (; *dp; dp++) {
336 unsigned char c = *dp;
337 if (c <= 0x20 || c == 0x23 || c == 0x25 || c == 0x26 || c == 0x2b || c >= 0x7f) {
338 unsigned char h[4];
339 sprintf(cast_char h, "%%%02X", c);
340 add_to_str(&r, &rl, h);
341 } else {
342 add_chr_to_str(&r, &rl, c);
343 }
344 }
345 add_to_str(&r, &rl, post_seq);
346 mem_free(u);
347 mem_free(p);
348 mem_free(up);
349 return r;
350 }
351
rewrite_url_google_docs(unsigned char * n)352 static unsigned char *rewrite_url_google_docs(unsigned char *n)
353 {
354 int i;
355 unsigned char *id, *id_end, *url_end;
356 unsigned char *res;
357 int l;
358 struct {
359 const char *beginning;
360 const char *result1;
361 const char *result2;
362 } const patterns[] = {
363 { "https://docs.google.com/document/d/", "https://docs.google.com/document/d/", "/export?format=pdf" },
364 { "https://docs.google.com/document/u/", "https://docs.google.com/document/u/", "/export?format=pdf" },
365 { "https://docs.google.com/spreadsheets/d/", "https://docs.google.com/spreadsheets/d/", "/export?format=pdf" },
366 { "https://docs.google.com/spreadsheets/u/", "https://docs.google.com/spreadsheets/u/", "/export?format=pdf" },
367 { "https://docs.google.com/presentation/d/", "https://docs.google.com/presentation/d/", "/export/pdf" },
368 { "https://docs.google.com/presentation/u/", "https://docs.google.com/presentation/u/", "/export/pdf" },
369 { "https://drive.google.com/file/d/", "https://drive.google.com/uc?export=download&id=", "" },
370 { "https://drive.google.com/file/u/", "https://drive.google.com/uc?export=download&id=", "" }
371 };
372 for (i = 0; i < (int)array_elements(patterns); i++) {
373 if (!cmpbeg(n, cast_uchar patterns[i].beginning))
374 goto match;
375 }
376 return n;
377 match:
378 id = n + strlen(cast_const_char patterns[i].beginning);
379 url_end = id + strcspn(cast_const_char id, "#" POST_CHAR_STRING);
380 id_end = memrchr(id, '/', url_end - id);
381 if (!id_end)
382 return n;
383 if (!cmpbeg(id_end, cast_uchar "/export"))
384 return n;
385 if (!patterns[i].result2[0]) {
386 id = id_end;
387 while (id[-1] != '/')
388 id--;
389 }
390 res = init_str();
391 l = 0;
392 add_to_str(&res, &l, cast_uchar patterns[i].result1);
393 add_bytes_to_str(&res, &l, id, id_end - id);
394 add_to_str(&res, &l, cast_uchar patterns[i].result2);
395 mem_free(n);
396 return res;
397 }
398
rewrite_url_mediawiki_svg(unsigned char * n)399 static unsigned char *rewrite_url_mediawiki_svg(unsigned char *n)
400 {
401 #ifndef HAVE_SVG
402 const unsigned char u1[] = "/media/math/render/svg/";
403 const unsigned char u2[] = "/media/math/render/png/";
404 unsigned char *d, *s;
405 d = get_url_data(n);
406 if (!d)
407 return n;
408 s = cast_uchar strstr(cast_const_char d, cast_const_char u1);
409 if (!s)
410 return n;
411 memcpy(s, u2, strlen(cast_const_char u2));
412 #endif
413 return n;
414 }
415
rewrite_url(unsigned char * n)416 static unsigned char *rewrite_url(unsigned char *n)
417 {
418 extend_str(&n, 1);
419 translate_directories(n);
420 n = translate_hashbang(n);
421 n = rewrite_url_google_docs(n);
422 n = rewrite_url_mediawiki_svg(n);
423 return n;
424 }
425
test_qualified_name(unsigned char * host,unsigned char * hostname)426 static int test_qualified_name(unsigned char *host, unsigned char *hostname)
427 {
428 unsigned char *c;
429 if (!casestrcmp(host, hostname))
430 return 1;
431 c = cast_uchar strchr(cast_const_char hostname, '.');
432 if (c) {
433 *c = 0;
434 if (!casestrcmp(host, hostname))
435 return 1;
436 }
437 return 0;
438 }
439
is_local_host(unsigned char * host)440 static int is_local_host(unsigned char *host)
441 {
442 if (!*host)
443 return 1;
444 if (!casestrcmp(host, cast_uchar "localhost"))
445 return 1;
446 #if defined(HAVE_GETHOSTNAME)
447 {
448 int rs;
449 unsigned char n[4096];
450 n[0] = 0;
451 EINTRLOOP(rs, gethostname(cast_char n, sizeof n));
452 n[sizeof n - 1] = 0;
453 if (!rs && strlen(cast_const_char n) < sizeof n - 1) {
454 if (test_qualified_name(host, n))
455 return 1;
456 }
457 }
458 #elif defined(HAVE_SYS_UTSNAME_H) && defined(HAVE_UNAME)
459 {
460 int rs;
461 struct utsname name;
462 memset(&name, 0, sizeof name);
463 EINTRLOOP(rs, uname(&name));
464 if (rs >= 0) {
465 if (test_qualified_name(host, cast_uchar name.nodename))
466 return 1;
467 }
468 }
469 #endif
470 return 0;
471 }
472
insert_wd(unsigned char ** up,unsigned char * cwd)473 static void insert_wd(unsigned char **up, unsigned char *cwd)
474 {
475 unsigned char *u = *up;
476 unsigned char *cw;
477 unsigned char *url;
478 unsigned char *host;
479 int url_l;
480 int i;
481 if (!u || !cwd || !*cwd) return;
482 if (casecmp(u, cast_uchar "file://", 7)) return;
483 for (i = 7; u[i] && !dir_sep(u[i]); i++) ;
484 host = memacpy(u + 7, i - 7);
485 if (is_local_host(host)) {
486 mem_free(host);
487 memmove(u + 7, u + i, strlen(cast_const_char (u + i)) + 1);
488 return;
489 }
490 mem_free(host);
491 #ifdef DOS_FS
492 if (upcase(u[7]) >= 'A' && upcase(u[7]) <= 'Z' && u[8] == ':' && dir_sep(u[9])) return;
493 #endif
494 #ifdef SPAD
495 if (_is_absolute(cast_const_char(u + 7)) != _ABS_NO) return;
496 #endif
497 url = init_str();
498 url_l = 0;
499 add_bytes_to_str(&url, &url_l, u, 7);
500 for (cw = cwd; *cw; cw++) {
501 unsigned char c = *cw;
502 if (c < ' ' || c == '%' || c >= 127) {
503 unsigned char h[4];
504 sprintf(cast_char h, "%%%02X", (unsigned)c & 0xff);
505 add_to_str(&url, &url_l, h);
506 } else {
507 add_chr_to_str(&url, &url_l, c);
508 }
509 }
510 if (!dir_sep(cwd[strlen(cast_const_char cwd) - 1])) add_chr_to_str(&url, &url_l, '/');
511 add_to_str(&url, &url_l, u + 7);
512 mem_free(u);
513 *up = url;
514 }
515
url_non_ascii(unsigned char * url)516 int url_non_ascii(unsigned char *url)
517 {
518 unsigned char *ch;
519 for (ch = url; *ch; ch++)
520 if (*ch >= 128)
521 return 1;
522 return 0;
523 }
524
translate_idn(unsigned char * nu,int canfail)525 static unsigned char *translate_idn(unsigned char *nu, int canfail)
526 {
527 if (url_non_ascii(nu)) {
528 unsigned char *id = idn_encode_url(nu, 0);
529 if (!id) {
530 if (!canfail)
531 return nu;
532 mem_free(nu);
533 return NULL;
534 }
535 mem_free(nu);
536 return id;
537 }
538 return nu;
539 }
540
541 /*
542 * U funkce join_urls musi byt prvni url absolutni (takove, co projde funkci
543 * parse_url bez chyby --- pokud neni absolutni, tak to spatne na internal) a
544 * druhe url je relativni cesta vuci nemu nebo taky absolutni url. Pokud je
545 * druhe url absolutni, vrati se to; pokud je relativni, tak se spoji prvni a
546 * druhe url.
547 */
join_urls(unsigned char * base,unsigned char * rel)548 unsigned char *join_urls(unsigned char *base, unsigned char *rel)
549 {
550 unsigned char *p, *n, *pp, *ch;
551 int l;
552 int lo = !casecmp(base, cast_uchar "file://", 7);
553 int data = !casecmp(base, cast_uchar "data:", 5);
554 if (rel[0] == '#' || !rel[0]) {
555 n = stracpy(base);
556 for (p = n; *p && *p != POST_CHAR && *p != '#'; p++)
557 ;
558 *p = 0;
559 add_to_strn(&n, rel);
560 goto return_n;
561 }
562 if (rel[0] == '?' || rel[0] == '&') {
563 unsigned char rj[3];
564 unsigned char *d = get_url_data(base);
565 if (!d) goto bad_base;
566 rj[0] = rel[0];
567 rj[1] = POST_CHAR;
568 rj[2] = 0;
569 d += strcspn(cast_const_char d, cast_const_char rj);
570 n = memacpy(base, d - base);
571 add_to_strn(&n, rel);
572 goto return_n;
573 }
574 if (rel[0] == '/' && rel[1] == '/' && !data) {
575 unsigned char *s;
576 if (!(s = cast_uchar strstr(cast_const_char base, "//"))) {
577 if (!(s = cast_uchar strchr(cast_const_char base, ':'))) {
578 bad_base:
579 internal_error("bad base url: %s", base);
580 return NULL;
581 }
582 s++;
583 }
584 n = memacpy(base, s - base);
585 add_to_strn(&n, rel);
586 if (!parse_url(n, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)) goto return_n;
587 add_to_strn(&n, cast_uchar "/");
588 if (!parse_url(n, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)) goto return_n;
589 mem_free(n);
590 }
591 if (is_proxy_url(rel)) goto prx;
592 if (!parse_url(rel, &l, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)) {
593 n = stracpy(rel);
594 goto return_n;
595 }
596 n = stracpy(rel);
597 while (n[0] && n[strlen(cast_const_char n) - 1] <= ' ') n[strlen(cast_const_char n) - 1] = 0;
598 extend_str(&n, 1);
599 ch = cast_uchar strrchr(cast_const_char n, '#');
600 if (!ch || strchr(cast_const_char ch, '/')) ch = n + strlen(cast_const_char n);
601 memmove(ch + 1, ch, strlen(cast_const_char ch) + 1);
602 *ch = '/';
603 if (!parse_url(n, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)) goto return_n;
604 mem_free(n);
605 prx:
606 if (parse_url(base, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &p, NULL, NULL) || !p) {
607 goto bad_base;
608 }
609 if (!dsep(*p)) p--;
610 if (!data) {
611 if (end_of_dir(base, rel[0])) for (; *p; p++) {
612 if (end_of_dir(base, *p)) break;
613 } else if (!dsep(rel[0])) for (pp = p; *pp; pp++) {
614 if (end_of_dir(base, *pp)) break;
615 if (dsep(*pp)) p = pp + 1;
616 }
617 }
618 n = memacpy(base, p - base);
619 add_to_strn(&n, rel);
620 goto return_n;
621
622 return_n:
623 n = translate_idn(n, 0);
624 n = rewrite_url(n);
625 return n;
626 }
627
translate_url(unsigned char * url,unsigned char * cwd)628 unsigned char *translate_url(unsigned char *url, unsigned char *cwd)
629 {
630 unsigned char *ch;
631 unsigned char *nu, *da;
632 unsigned char *prefix;
633 int sl;
634 while (*url == ' ') url++;
635 if (*url && url[strlen(cast_const_char url) - 1] == ' ') {
636 nu = stracpy(url);
637 while (*nu && nu[strlen(cast_const_char nu) - 1] == ' ') nu[strlen(cast_const_char nu) - 1] = 0;
638 ch = translate_url(nu, cwd);
639 mem_free(nu);
640 return ch;
641 }
642 if (is_proxy_url(url)) return NULL;
643 if (!parse_url(url, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &da, NULL, NULL)) {
644 nu = stracpy(url);
645 goto return_nu;
646 }
647 if (strchr(cast_const_char url, POST_CHAR)) return NULL;
648 if (strstr(cast_const_char url, "://")) {
649 nu = stracpy(url);
650 extend_str(&nu, 1);
651 ch = cast_uchar strrchr(cast_const_char nu, '#');
652 if (!ch || strchr(cast_const_char ch, '/')) ch = nu + strlen(cast_const_char nu);
653 memmove(ch + 1, ch, strlen(cast_const_char ch) + 1);
654 *ch = '/';
655 if (!parse_url(nu, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)) goto return_nu;
656 mem_free(nu);
657 }
658 prefix = cast_uchar "file://";
659 if (url[0] == '[' && strchr(cast_const_char url, ']')) {
660 ch = url;
661 goto http;
662 }
663 ch = url + strcspn(cast_const_char url, ".:/@");
664 sl = 0;
665 #ifdef SPAD
666 if (strchr(cast_const_char url, ':') && _is_local(cast_const_char url)) goto set_prefix;
667 #endif
668 if (*ch != ':' || *(url + strcspn(cast_const_char url, "/@")) == '@') {
669 if (*url != '.' && *ch == '.') {
670 unsigned char *e, *f, *g;
671 int tl;
672 for (e = ch + 1; *(f = e + strcspn(cast_const_char e, ".:/")) == '.'; e = f + 1)
673 ;
674 g = memacpy(e, f - e);
675 tl = is_tld(g);
676 mem_free(g);
677 if (tl)
678 http: prefix = cast_uchar "http://", sl = 1;
679 }
680 if (*ch == '@' || *ch == ':' || !cmpbeg(url, cast_uchar "ftp.")) prefix = cast_uchar "ftp://", sl = 1;
681 goto set_prefix;
682 set_prefix:
683 nu = stracpy(prefix);
684 add_to_strn(&nu, url);
685 if (sl && !strchr(cast_const_char url, '/')) add_to_strn(&nu, cast_uchar "/");
686 if (parse_url(nu, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)) {
687 mem_free(nu);
688 return NULL;
689 }
690 goto return_nu;
691 }
692 #ifdef DOS_FS
693 if (ch == url + 1) goto set_prefix;
694 #endif
695 nu = memacpy(url, ch - url + 1);
696 add_to_strn(&nu, cast_uchar "//");
697 add_to_strn(&nu, ch + 1);
698 if (!parse_url(nu, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)) goto return_nu;
699 add_to_strn(&nu, cast_uchar "/");
700 if (!parse_url(nu, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)) goto return_nu;
701 mem_free(nu);
702 return NULL;
703
704 return_nu:
705 nu = translate_idn(nu, 1);
706 if (!nu)
707 return NULL;
708 insert_wd(&nu, cwd);
709 nu = rewrite_url(nu);
710 return nu;
711 }
712
extract_position(unsigned char * url)713 unsigned char *extract_position(unsigned char *url)
714 {
715 unsigned char *u, *uu, *r;
716 if ((u = get_url_data(url))) url = u;
717 if (!(u = cast_uchar strchr(cast_const_char url, POST_CHAR))) u = cast_uchar strchr(cast_const_char url, 0);
718 if (!(uu = memchr(url, '#', u - url))) return NULL;
719 r = memacpy(uu + 1, u - uu - 1);
720 memmove(uu, u, strlen(cast_const_char u) + 1);
721 return r;
722 }
723
url_not_saveable(unsigned char * url)724 int url_not_saveable(unsigned char *url)
725 {
726 int p, palen;
727 unsigned char *u = translate_url(url, cast_uchar "/");
728 if (!u)
729 return 1;
730 p = parse_url(u, NULL, NULL, NULL, NULL, &palen, NULL, NULL, NULL, NULL, NULL, NULL, NULL);
731 mem_free(u);
732 return p || palen;
733 }
734
735 #define accept_char(x) ((x) != 10 && (x) != 13 && (x) != '"' && (x) != '\'' && (x) != '&' && (x) != '<' && (x) != '>')
736 #define special_char(x) ((x) < ' ' || (x) == '%' || (x) == '#' || (x) >= 127)
737
738 /*
739 * -2 percent to raw
740 * -1 percent to html
741 * 0 raw to html
742 * 1 raw to percent
743 */
744
add_conv_str(unsigned char ** s,int * l,unsigned char * b,int ll,int encode_special)745 void add_conv_str(unsigned char **s, int *l, unsigned char *b, int ll, int encode_special)
746 {
747 for (; ll > 0; ll--, b++) {
748 unsigned char chr = *b;
749 if (!chr) continue;
750 if (special_char(chr) && encode_special == 1) {
751 unsigned char h[4];
752 sprintf(cast_char h, "%%%02X", (unsigned)chr & 0xff);
753 add_to_str(s, l, h);
754 continue;
755 }
756 if (chr == '%' && encode_special <= -1 && ll > 2 &&
757 ((b[1] >= '0' && b[1] <= '9') || (b[1] >= 'A' && b[1] <= 'F') || (b[1] >= 'a' && b[1] <= 'f')) &&
758 ((b[2] >= '0' && b[2] <= '9') || (b[2] >= 'A' && b[2] <= 'F') || (b[2] >= 'a' && b[2] <= 'f'))) {
759 int i;
760 chr = 0;
761 for (i = 1; i < 3; i++) {
762 if (b[i] >= '0' && b[i] <= '9') chr = chr * 16 + b[i] - '0';
763 if (b[i] >= 'A' && b[i] <= 'F') chr = chr * 16 + b[i] - 'A' + 10;
764 if (b[i] >= 'a' && b[i] <= 'f') chr = chr * 16 + b[i] - 'a' + 10;
765 }
766 ll -= 2;
767 b += 2;
768 if (!chr)
769 continue;
770 }
771 if (chr == ' ' && (!encode_special || encode_special == -1)) {
772 add_to_str(s, l, cast_uchar " ");
773 } else if (accept_char(chr) || encode_special == -2) {
774 add_chr_to_str(s, l, chr);
775 } else if (chr == 10 || chr == 13) {
776 } else {
777 add_to_str(s, l, cast_uchar "&#");
778 add_num_to_str(s, l, (int)chr);
779 add_chr_to_str(s, l, ';');
780 }
781 }
782 }
783
convert_file_charset(unsigned char ** s,int * l,int start_l)784 void convert_file_charset(unsigned char **s, int *l, int start_l)
785 {
786 #ifdef __CYGWIN__
787 int win_charset = windows_charset();
788 unsigned char *cpy = stracpy(*s + start_l);
789 unsigned char *ptr, *end;
790 (*s)[*l = start_l] = 0;
791 end = cast_uchar strchr(cast_const_char cpy, 0);
792 for (ptr = cpy; ptr < end; ptr++) {
793 unsigned char chr = *ptr;
794 unsigned u;
795 unsigned char *p;
796 if (chr == 0x18) {
797 p = ptr + 1;
798 goto try_get_utf;
799 }
800 if (chr >= 128) {
801 if (win_charset != utf8_table) {
802 u = (unsigned)cp2u(chr, win_charset);
803 if (u != -1U)
804 goto put_u;
805 } else {
806 p = ptr;
807 try_get_utf:
808 GET_UTF_8(p, u);
809 if (u) {
810 ptr = p - 1;
811 put_u:
812 add_to_str(s, l, cast_uchar "&#");
813 add_num_to_str(s, l, (int)u);
814 add_chr_to_str(s, l, ';');
815 continue;
816 }
817 }
818 }
819 add_chr_to_str(s, l, chr);
820 }
821 mem_free(cpy);
822 #endif
823 }
824
825 static_const unsigned char xn[] = "xn--";
826 static_const unsigned xn_l = sizeof(xn) - 1;
827
828 #define puny_max_length 63
829 #define puny_base 36
830 #define puny_tmin 1
831 #define puny_tmax 26
832 #define puny_skew 38
833 #define puny_damp 700
834 #define puny_init_bias 72
835
ascii_allowed(unsigned c)836 static int ascii_allowed(unsigned c)
837 {
838 return c == '-' ||
839 (c >= '0' && c <= '9') ||
840 (c >= 'A' && c <= 'Z') ||
841 (c >= 'a' && c <= 'z');
842 }
843
puny_chrenc(unsigned n)844 static unsigned char puny_chrenc(unsigned n)
845 {
846 return n + (n < 26 ? 'a' : '0' - 26);
847 }
848
puny_chrdec(unsigned char c)849 static unsigned puny_chrdec(unsigned char c)
850 {
851 if (c <= '9')
852 return c - '0' + 26;
853 if (c <= 'Z')
854 return c - 'A';
855 return c - 'a';
856 }
857
858 struct puny_state {
859 unsigned ascii_numpoints;
860 unsigned numpoints;
861 unsigned bias;
862 unsigned k;
863 };
864
puny_init(struct puny_state * st,unsigned numpoints)865 static void puny_init(struct puny_state *st, unsigned numpoints)
866 {
867 st->ascii_numpoints = numpoints;
868 st->numpoints = numpoints;
869 st->bias = puny_init_bias;
870 st->k = puny_base;
871 }
872
puny_threshold(struct puny_state * st)873 static unsigned puny_threshold(struct puny_state *st)
874 {
875 unsigned k = st->k;
876 st->k += puny_base;
877 if (k <= st->bias)
878 return puny_tmin;
879 if (k >= st->bias + puny_tmax)
880 return puny_tmax;
881 return k - st->bias;
882 }
883
puny_adapt(struct puny_state * st,unsigned val)884 static void puny_adapt(struct puny_state *st, unsigned val)
885 {
886 unsigned k;
887 val = st->ascii_numpoints == st->numpoints ? val / puny_damp : val / 2;
888 st->numpoints++;
889 val += val / st->numpoints;
890 k = 0;
891 while (val > ((puny_base - puny_tmin) * puny_tmax) / 2) {
892 val /= puny_base - puny_tmin;
893 k += puny_base;
894 }
895 st->bias = k + (((puny_base - puny_tmin + 1) * val) / (val + puny_skew));
896 st->k = puny_base;
897 }
898
puny_encode(unsigned char * s,int len)899 static unsigned char *puny_encode(unsigned char *s, int len)
900 {
901 unsigned char *p;
902 unsigned *uni;
903 unsigned uni_l;
904 unsigned char *res;
905 int res_l;
906 unsigned i;
907 unsigned ni, cchar, skip;
908 struct puny_state st;
909
910 if (len > 7 * puny_max_length)
911 goto err;
912 uni = mem_alloc(len * sizeof(unsigned));
913 uni_l = 0;
914 for (p = s; p < s + len; ) {
915 unsigned c;
916 GET_UTF_8(p, c);
917 c = uni_locase(c);
918 if (c < 128 && !ascii_allowed(c))
919 goto err_free_uni;
920 if (c > 0x10FFFF)
921 goto err_free_uni;
922 uni[uni_l++] = c;
923 }
924 if (uni_l > puny_max_length)
925 goto err_free_uni;
926
927 res = init_str();
928 res_l = 0;
929 add_to_str(&res, &res_l, cast_uchar xn);
930
931 ni = 0;
932 for (i = 0; i < uni_l; i++) {
933 if (uni[i] < 128) {
934 add_chr_to_str(&res, &res_l, uni[i]);
935 ni++;
936 }
937 }
938
939 if (ni == uni_l) {
940 memmove(res, res + xn_l, res_l - xn_l + 1);
941 res_l -= 4;
942 goto ret_free_uni;
943 }
944
945 if (res_l != (int)xn_l)
946 add_chr_to_str(&res, &res_l, '-');
947
948 puny_init(&st, ni);
949
950 cchar = 128;
951 skip = 0;
952
953 while (1) {
954 unsigned dlen = 0;
955 unsigned lchar = -1U;
956 for (i = 0; i < uni_l; i++) {
957 unsigned c = uni[i];
958 if (c < cchar)
959 dlen++;
960 else if (c < lchar)
961 lchar = c;
962 }
963 if (lchar == -1U)
964 break;
965 skip += (lchar - cchar) * (dlen + 1);
966 for (i = 0; i < uni_l; i++) {
967 unsigned c = uni[i];
968 if (c < lchar)
969 skip++;
970 if (c == lchar) {
971 unsigned n;
972 /*fprintf(stderr, "%d\n", skip);*/
973 n = skip;
974 while (1) {
975 unsigned t = puny_threshold(&st);
976 if (n < t) {
977 add_chr_to_str(&res, &res_l, puny_chrenc(n));
978 break;
979 } else {
980 unsigned d = (n - t) % (puny_base - t);
981 n = (n - t) / (puny_base - t);
982 add_chr_to_str(&res, &res_l, puny_chrenc(d + t));
983 }
984 }
985 puny_adapt(&st, skip);
986 skip = 0;
987 }
988 }
989 skip++;
990 cchar = lchar + 1;
991 }
992
993 ret_free_uni:
994 mem_free(uni);
995
996 if (res_l > puny_max_length)
997 goto err;
998
999 return res;
1000
1001 err_free_uni:
1002 mem_free(uni);
1003 err:
1004 return NULL;
1005 }
1006
puny_decode(unsigned char * s,int len)1007 static unsigned char *puny_decode(unsigned char *s, int len)
1008 {
1009 unsigned char *p, *last_dash;
1010 unsigned *uni;
1011 unsigned uni_l;
1012 unsigned char *res;
1013 int res_l;
1014 unsigned i;
1015 unsigned cchar, pos;
1016 struct puny_state st;
1017
1018 if (!(len >= 4 && !casecmp(s, xn, xn_l)))
1019 return NULL;
1020 s += xn_l;
1021 len -= xn_l;
1022
1023 last_dash = NULL;
1024 for (p = s; p < s + len; p++) {
1025 unsigned char c = *p;
1026 if (!ascii_allowed(c))
1027 goto err;
1028 if (c == '-')
1029 last_dash = p;
1030 }
1031
1032 if (len > puny_max_length)
1033 goto err;
1034
1035 uni = mem_alloc(len * sizeof(unsigned));
1036 uni_l = 0;
1037
1038 if (last_dash) {
1039 for (p = s; p < last_dash; p++)
1040 uni[uni_l++] = *p;
1041 p = last_dash + 1;
1042 } else {
1043 p = s;
1044 }
1045
1046 puny_init(&st, uni_l);
1047
1048 cchar = 128;
1049 pos = 0;
1050
1051 while (p < s + len) {
1052 unsigned w = 1;
1053 unsigned val = 0;
1054 while (1) {
1055 unsigned n, t, nv, nw;
1056 if (p >= s + len)
1057 goto err_free_uni;
1058 n = puny_chrdec(*p++);
1059 nw = n * w;
1060 if (nw / w != n)
1061 goto err_free_uni;
1062 nv = val + nw;
1063 if (nv < val)
1064 goto err_free_uni;
1065 val = nv;
1066 t = puny_threshold(&st);
1067 if (n < t)
1068 break;
1069 nw = w * (puny_base - t);
1070 if (nw / w != puny_base - t)
1071 goto err_free_uni;
1072 w = nw;
1073 }
1074 puny_adapt(&st, val);
1075
1076 if (val > uni_l - pos) {
1077 unsigned cp;
1078 val -= uni_l - pos + 1;
1079 pos = 0;
1080 cp = val / (uni_l + 1) + 1;
1081 val %= uni_l + 1;
1082 if (cchar + cp < cchar)
1083 goto err_free_uni;
1084 cchar += cp;
1085 if (cchar > 0x10FFFF)
1086 goto err_free_uni;
1087 }
1088 pos += val;
1089 memmove(uni + pos + 1, uni + pos, (uni_l - pos) * sizeof(unsigned));
1090 uni[pos++] = cchar;
1091 uni_l++;
1092 }
1093
1094 res = init_str();
1095 res_l = 0;
1096
1097 for (i = 0; i < uni_l; i++) {
1098 unsigned char *us = encode_utf_8(uni[i]);
1099 add_to_str(&res, &res_l, us);
1100 }
1101
1102 mem_free(uni);
1103
1104 return res;
1105
1106 err_free_uni:
1107 mem_free(uni);
1108 err:
1109 return NULL;
1110 }
1111
idn_encode_host(unsigned char * host,int len,unsigned char * separator,int decode)1112 unsigned char *idn_encode_host(unsigned char *host, int len, unsigned char *separator, int decode)
1113 {
1114 unsigned char *p, *s;
1115 int pl, l, i;
1116 p = init_str();
1117 pl = 0;
1118
1119 next_host_elem:
1120 l = len;
1121 for (s = separator; *s; s++) {
1122 unsigned char *d = memchr(host, *s, l);
1123 if (d)
1124 l = (int)(d - host);
1125 }
1126
1127 if (!decode) {
1128 for (i = 0; i < l; i++)
1129 if (host[i] >= 0x80) {
1130 unsigned char *enc = puny_encode(host, l);
1131 if (!enc)
1132 goto err;
1133 add_to_str(&p, &pl, enc);
1134 mem_free(enc);
1135 goto advance_host;
1136 }
1137 } else {
1138 unsigned char *dec = puny_decode(host, l);
1139 if (dec) {
1140 add_to_str(&p, &pl, dec);
1141 mem_free(dec);
1142 goto advance_host;
1143 }
1144 }
1145
1146 add_bytes_to_str(&p, &pl, host, l);
1147
1148 advance_host:
1149 if (l != len) {
1150 add_chr_to_str(&p, &pl, host[l]);
1151 host += l + 1;
1152 len -= l + 1;
1153 goto next_host_elem;
1154 }
1155 return p;
1156
1157 err:
1158 mem_free(p);
1159 return NULL;
1160 }
1161
idn_encode_url(unsigned char * url,int decode)1162 unsigned char *idn_encode_url(unsigned char *url, int decode)
1163 {
1164 unsigned char *host, *p, *h;
1165 int holen, pl;
1166 if (parse_url(url, NULL, NULL, NULL, NULL, NULL, &host, &holen, NULL, NULL, NULL, NULL, NULL) || !host) {
1167 host = url;
1168 holen = 0;
1169 }
1170
1171 h = idn_encode_host(host, holen, cast_uchar ".", decode);
1172 if (!h)
1173 return NULL;
1174
1175 p = init_str();
1176 pl = 0;
1177 add_bytes_to_str(&p, &pl, url, host - url);
1178 add_to_str(&p, &pl, h);
1179 add_to_str(&p, &pl, host + holen);
1180 mem_free(h);
1181 return p;
1182 }
1183
display_url_or_host(struct terminal * term,unsigned char * url,int warn_idn,int just_host,unsigned char * separator)1184 static unsigned char *display_url_or_host(struct terminal *term, unsigned char *url, int warn_idn, int just_host, unsigned char *separator)
1185 {
1186 unsigned char *uu, *url_dec, *url_conv, *url_conv2, *url_enc, *ret;
1187 int is_idn;
1188
1189 if (!url)
1190 return stracpy(cast_uchar "");
1191
1192 url = stracpy(url);
1193 if (!just_host) {
1194 if ((uu = cast_uchar strchr(cast_const_char url, POST_CHAR))) *uu = 0;
1195 }
1196
1197 if (!url_non_ascii(url) && !strstr(cast_const_char url, cast_const_char xn))
1198 return url;
1199
1200 if (!just_host)
1201 url_dec = idn_encode_url(url, 1);
1202 else
1203 url_dec = idn_encode_host(url, (int)strlen(cast_const_char url), separator, 1);
1204 is_idn = strcmp(cast_const_char url_dec, cast_const_char url);
1205 url_conv = convert(utf8_table, term_charset(term), url_dec, NULL);
1206 mem_free(url_dec);
1207 url_conv2 = convert(term_charset(term), utf8_table, url_conv, NULL);
1208 if (!just_host)
1209 url_enc = idn_encode_url(url_conv2, 0);
1210 else
1211 url_enc = idn_encode_host(url_conv2, (int)strlen(cast_const_char url_conv2), separator, 0);
1212 if (!url_enc)
1213 url_enc = stracpy(url_conv2), is_idn = 1;
1214 mem_free(url_conv2);
1215 if (!strcmp(cast_const_char url_enc, cast_const_char url)) {
1216 if (is_idn && warn_idn) {
1217 ret = stracpy(cast_uchar "(IDN) ");
1218 add_to_strn(&ret, url_conv);
1219 } else {
1220 ret = url_conv;
1221 url_conv = DUMMY;
1222 }
1223 } else {
1224 ret = convert(utf8_table, term_charset(term), url, NULL);
1225 }
1226 mem_free(url);
1227 mem_free(url_conv);
1228 mem_free(url_enc);
1229 return ret;
1230 }
1231
display_url(struct terminal * term,unsigned char * url,int warn_idn)1232 unsigned char *display_url(struct terminal *term, unsigned char *url, int warn_idn)
1233 {
1234 return display_url_or_host(term, url, warn_idn, 0, cast_uchar ".");
1235 }
1236
display_host(struct terminal * term,unsigned char * host)1237 unsigned char *display_host(struct terminal *term, unsigned char *host)
1238 {
1239 return display_url_or_host(term, host, 1, 1, cast_uchar ".");
1240 }
1241
display_host_list(struct terminal * term,unsigned char * host)1242 unsigned char *display_host_list(struct terminal *term, unsigned char *host)
1243 {
1244 return display_url_or_host(term, host, 0, 1, cast_uchar ".,");
1245 }
1246