1 /* http.c
2  * HTTP protocol client implementation
3  * This file is part of the edbrowse project, released under GPL.
4  */
5 
6 #include "eb.h"
7 
8 #ifdef _MSC_VER
9 #include <fcntl.h>
10 #else
11 #include <signal.h>
12 #endif
13 #include <time.h>
14 
15 char *serverData;
16 int serverDataLen;
17 CURL *global_http_handle;
18 CURLSH *global_share_handle;
19 bool pluginsOn = true;
20 bool down_bg;			// download in background
21 bool down_jsbg = true;		// download js in background
22 char showProgress = 'd';	// dots
23 static char *httpLanguage;	/* outgoing */
24 
25 struct BG_JOB {
26 	struct BG_JOB *next, *prev;
27 	int state;
28 	size_t fsize;		// file size
29 	int file2;		// offset into filename
30 	char file[4];
31 };
32 static struct listHead down_jobs = {
33 	&down_jobs, &down_jobs
34 };
35 
36 static void setup_download(struct i_get *g);
37 static CURL *http_curl_init(struct i_get *g);
38 static size_t curl_header_callback(char *header_line, size_t size, size_t nmemb,
39 				   struct i_get *g);
40 static bool ftpConnect(struct i_get *g, char *creds_buf);
41 static bool gopherConnect(struct i_get *g);
42 static bool read_credentials(char *buffer);
43 static const char *message_for_response_code(int code);
44 
45 /* string is allocated. Quotes are removed. No other processing is done.
46  * You may need to decode %xx bytes or such. */
find_http_header(struct i_get * g,const char * name)47 static char *find_http_header(struct i_get *g, const char *name)
48 {
49 	char *s, *t, *u, *v;
50 	int namelen = strlen(name);
51 	char *h = g->headers;
52 	if (!h)
53 		return NULL;
54 	for (s = h; *s; s = v) {
55 /* find start of next line */
56 		v = strchr(s, '\n');
57 		if (!v)
58 			break;
59 		++v;
60 
61 /* name: value */
62 		t = strchr(s, ':');
63 		if (!t || t >= v)
64 			continue;
65 		u = t;
66 		while (u > s && isspace(u[-1]))
67 			--u;
68 		if (u - s != namelen)
69 			continue;
70 		if (!memEqualCI(s, name, namelen))
71 			continue;
72 
73 /* This is a match */
74 		++t;
75 		while (t < v && isspace(*t))
76 			++t;
77 		u = v;
78 		while (u > t && isspace(u[-1]))
79 			--u;
80 /* remove quotes */
81 		if (u - t >= 2 && *t == u[-1] && (*t == '"' || *t == '\''))
82 			++t, --u;
83 		if (u == t)
84 			return NULL;
85 		return pullString(t, u - t);
86 	}
87 
88 	return NULL;
89 }				/* find_http_header */
90 
scan_http_headers(struct i_get * g,bool fromCallback)91 static void scan_http_headers(struct i_get *g, bool fromCallback)
92 {
93 	char *v;
94 
95 	if (!g->content[0] && (v = find_http_header(g, "content-type"))) {
96 		strncpy(g->content, v, sizeof(g->content) - 1);
97 		caseShift(g->content, 'l');
98 		nzFree(v);
99 		debugPrint(3, "content %s", g->content);
100 		g->charset = strchr(g->content, ';');
101 		if (g->charset)
102 			*(g->charset)++ = 0;
103 		if (stringEqual(g->content, "text/html"))
104 			g->csp = true;
105 		else if (g->pg_ok && !cf->mt)
106 			cf->mt = findMimeByContent(g->content);
107 	}
108 
109 	if (!g->cdfn && (v = find_http_header(g, "content-disposition"))) {
110 		char *s = strstrCI(v, "filename=");
111 		if (s && !strncmp(v, "attachment", 10)) {
112 			s += 9;
113 			if (*s == '"') {
114 				char *t;
115 				++s;
116 				t = strchr(s, '"');
117 				if (t)
118 					*t = 0;
119 			}
120 			g->cdfn = cloneString(s);
121 			debugPrint(4, "disposition filename %s", g->cdfn);
122 // I'm not ready to do this part yet.
123 #if 0
124 			if (g->pg_ok && !cf->mt)
125 				cf->mt = findMimeByFile(g->cdfn);
126 #endif
127 		}
128 		nzFree(v);
129 	}
130 
131 	if (!g->hcl && (v = find_http_header(g, "content-length"))) {
132 		sscanf(v, "%lld", &g->hcl);
133 		nzFree(v);
134 		if (g->hcl)
135 			debugPrint(4, "content length %lld", g->hcl);
136 	}
137 
138 	if (!g->etag && (v = find_http_header(g, "etag"))) {
139 		g->etag = v;
140 		debugPrint(4, "etag %s", g->etag);
141 	}
142 
143 	if (g->cacheable && (v = find_http_header(g, "cache-control"))) {
144 		caseShift(v, 'l');
145 		if (strstr(v, "no-cache")) {
146 			g->cacheable = false;
147 			debugPrint(4, "no cache");
148 		}
149 		nzFree(v);
150 	}
151 
152 	if (g->cacheable && (v = find_http_header(g, "pragma"))) {
153 		caseShift(v, 'l');
154 		if (strstr(v, "no-cache")) {
155 			g->cacheable = false;
156 			debugPrint(4, "no cache");
157 		}
158 		nzFree(v);
159 	}
160 
161 	if (!g->modtime && (v = find_http_header(g, "last-modified"))) {
162 		g->modtime = parseHeaderDate(v);
163 		if (g->modtime)
164 			debugPrint(4, "mod date %s", v);
165 		nzFree(v);
166 	}
167 	if (!g->auth_realm[0] && (v = find_http_header(g, "WWW-Authenticate"))) {
168 		char *realm, *end;
169 		if ((realm = strstrCI(v, "realm="))) {
170 			realm += 6;
171 			if (realm[0] == '"' || realm[0] == '\'') {
172 				end = strchr(realm + 1, realm[0]);
173 				realm++;
174 			} else {
175 				/* look for space if unquoted */
176 				end = strchr(realm, ' ');
177 			}
178 			if (end) {
179 				int sz = end - realm;
180 				if (sz > sizeof(g->auth_realm) - 1)
181 					sz = sizeof(g->auth_realm) - 1;
182 				memcpy(g->auth_realm, realm, sz);
183 				g->auth_realm[sz] = 0;
184 			} else {
185 				strncpy(g->auth_realm, realm,
186 					sizeof(g->auth_realm) - 1);
187 			}
188 			debugPrint(4, "auth realm %s", g->auth_realm);
189 		}
190 		nzFree(v);
191 	}
192 
193 	if (fromCallback)
194 		return;
195 
196 	if (!g->newloc && (v = find_http_header(g, "location"))) {
197 // as though a user had typed it in
198 		unpercentURL(v);
199 		g->newloc = v;
200 	}
201 
202 	if (!g->newloc && (v = find_http_header(g, "refresh"))) {
203 		int delay;
204 		if (parseRefresh(v, &delay)) {
205 			unpercentURL(v);
206 			g->newloc = v;
207 			g->newloc_d = delay;
208 			v = NULL;
209 		}
210 		nzFree(v);
211 	}
212 }				/* scan_http_headers */
213 
i_get_free(struct i_get * g,bool nodata)214 static void i_get_free(struct i_get *g, bool nodata)
215 {
216 	if (nodata) {
217 		nzFree(g->buffer);
218 		g->buffer = 0;
219 		g->length = 0;
220 	}
221 	nzFree(g->headers);
222 	nzFree(g->urlcopy);
223 	nzFree(g->cdfn);
224 	nzFree(g->etag);
225 	nzFree(g->newloc);
226 	cnzFree(g->down_file);
227 // should not be necessary, but just to be safe:
228 	g->headers = g->urlcopy = g->cdfn = g->etag = g->newloc = 0;
229 	g->down_file = 0;
230 	if (g->down_fd > 0) {
231 		close(g->down_fd);
232 		g->down_fd = 0;
233 	}
234 }
235 
236 /* actually run the curl request, http or ftp or whatever */
fetch_internet(struct i_get * g)237 static CURLcode fetch_internet(struct i_get *g)
238 {
239 	CURLcode curlret;
240 	g->buffer = initString(&g->length);
241 	g->headers = initString(&g->headers_len);
242 	curlret = curl_easy_perform(g->h);
243 	if (g->is_http)
244 		scan_http_headers(g, false);
245 	return curlret;
246 }				/* fetch_internet */
247 
248 /* Callback used by libcurl. Captures data from http, ftp, pop3, gopher.
249  * download states:
250  * -1 user aborted the download
251  * 0 standard in-memory download
252  * 1 download but stop and ask user if he wants to download to disk
253 * 2 disk download in foreground
254 * 3 disk download parent thread
255 * 4 disk download child thread
256 * 5 disk download before the thread is spawned
257  * 6 mime type says this should be a stream */
258 size_t
eb_curl_callback(char * incoming,size_t size,size_t nitems,struct i_get * g)259 eb_curl_callback(char *incoming, size_t size, size_t nitems, struct i_get * g)
260 {
261 	size_t num_bytes = nitems * size;
262 	int dots1, dots2, rc;
263 
264 	if (g->down_state == 1 && g->is_http) {
265 /* don't do a download unless the code is 200. */
266 		curl_easy_getinfo(g->h, CURLINFO_RESPONSE_CODE, &(g->code));
267 		if (g->code != 200)
268 			g->down_state = 0;
269 	}
270 
271 	if (g->down_state == 1) {
272 		if (g->hcl == 0) {
273 // http should always set http content length, this is just for ftp.
274 // And ftp downloading a file always has state = 1 on the first data block.
275 			double d_size = 0.0;	// download size, if we can get it
276 			curl_easy_getinfo(g->h,
277 					  CURLINFO_CONTENT_LENGTH_DOWNLOAD,
278 					  &d_size);
279 			g->hcl = d_size;
280 			if (g->hcl < 0)
281 				g->hcl = 0;
282 		}
283 
284 /* state 1, first data block, ask the user */
285 		setup_download(g);
286 		if (g->down_state == 0)
287 			goto showdots;
288 		if (g->down_state == -1 || g->down_state == 5)
289 			return -1;
290 	}
291 
292 	if (g->down_state == 2 || g->down_state == 4) {	/* to disk */
293 		rc = write(g->down_fd, incoming, num_bytes);
294 		if (rc == num_bytes) {
295 			if (g->down_state == 4) {
296 #if 0
297 // Deliberately delay background download, to get several running in parallel
298 // for testing purposes.
299 				if (g->down_length == 0)
300 					sleep(12);
301 				g->down_length += rc;
302 #endif
303 				return rc;
304 			}
305 			goto showdots;
306 		}
307 		if (g->down_state == 2) {
308 // has to be the foreground http thread, so ok to call setErro,
309 // which is not threadsafe.
310 			setError(MSG_NoWrite2, g->down_file);
311 		} else {
312 			i_printf(MSG_NoWrite2, g->down_file);
313 			printf(", ");
314 			i_puts(MSG_DownAbort);
315 		}
316 		return -1;
317 	}
318 
319 showdots:
320 	dots1 = g->length / CHUNKSIZE;
321 	if (g->down_state == 0)
322 		stringAndBytes(&g->buffer, &g->length, incoming, num_bytes);
323 	else
324 		g->length += num_bytes;
325 	dots2 = g->length / CHUNKSIZE;
326 // showing dots in parallel background download threads
327 // gets jumbled and doesn't mean anything.
328 	if (showProgress != 'q' && dots1 < dots2 && !g->down_force) {
329 		if (showProgress == 'd') {
330 			for (; dots1 < dots2; ++dots1)
331 				putchar('.');
332 			fflush(stdout);
333 		}
334 		if (showProgress == 'c' && g->hcl)
335 			printf("%d/%d\n", dots2,
336 			       (int)((g->hcl + CHUNKSIZE - 1) / CHUNKSIZE));
337 	}
338 	return num_bytes;
339 }
340 
341 /* We want to be able to abort transfers when SIGINT is received.
342  * During data transfers, libcurl ignores EINTR.  So there's no obvious way
343  * to abort a transfer on SIGINT.
344  * However, libcurl does call a function periodically, to indicate the
345  * progress of the transfer.  If the progress function returns a non-zero
346  * value, then libcurl aborts the transfer.  The nice thing about libcurl
347  * is that it uses timeouts when reading and writing.  It won't block
348  * forever in some system call.
349  * We can be certain that libcurl will, in fact, call the progress function
350  * periodically.
351  * Note: libcurl doesn't start calling the progress function until after the
352  * connection is made.  So it can block indefinitely during connect().
353  * All of the progress arguments to the function are unused. */
354 
355 static int
curl_progress(void * data_p,double dl_total,double dl_now,double ul_total,double ul_now)356 curl_progress(void *data_p, double dl_total, double dl_now,
357 	      double ul_total, double ul_now)
358 {
359 	struct i_get *g = data_p;
360 	int ret = 0;
361 // ^c will interrupt an http or ftp download but not a background download
362 	if (intFlag && g->down_force != 1) {
363 		if (g->down_force == 0)
364 			i_puts(MSG_Interrupted);
365 		ret = 1;
366 	}
367 	return ret;
368 }				/* curl_progress */
369 
370 static void
unpackUploadedFile(const char * post,const char * boundary,char ** postb,int * postb_l)371 unpackUploadedFile(const char *post, const char *boundary,
372 		   char **postb, int *postb_l)
373 {
374 	static const char message64[] = "Content-Transfer-Encoding: base64";
375 	const int boundlen = strlen(boundary);
376 	const int m64len = strlen(message64);
377 	char *post2;
378 	char *b1, *b2, *b3, *b4;	/* boundary points */
379 	int unpack_ret;
380 
381 	*postb = 0;
382 	*postb_l = 0;
383 	if (!strstr(post, message64))
384 		return;
385 
386 	post2 = cloneString(post);
387 	b2 = strstr(post2, boundary);
388 	while (true) {
389 		b1 = b2 + boundlen;
390 		if (*b1 != '\r')
391 			break;
392 		b1 += 2;
393 		b1 = strstr(b1, "Content-Transfer");
394 		b2 = strstr(b1, boundary);
395 		if (memcmp(b1, message64, m64len))
396 			continue;
397 		b1 += m64len - 6;
398 		strcpy(b1, "8bit\r\n\r\n");
399 		b1 += 8;
400 		b1[0] = b1[1] = ' ';
401 		b3 = b2 - 4;
402 
403 		b4 = b3;
404 		unpack_ret = base64Decode(b1, &b4);
405 		if (unpack_ret != GOOD_BASE64_DECODE)
406 			mail64Error(unpack_ret);
407 		/* Should we *really* keep going at this point? */
408 		strmove(b4, b3);
409 		b2 = b4 + 4;
410 	}
411 
412 	b1 += strlen(b1);
413 	*postb = post2;
414 	*postb_l = b1 - post2;
415 }				/* unpackUploadedFile */
416 
417 // Date format is:    Mon, 03 Jan 2000 21:29:33 GMT|[+-]nnnn
418 			// Or perhaps:     Sun Nov  6 08:49:37 1994
419 // or perhaps: 1994-11-06 08:49:37.nnnnZ
420 // or perhaps 06-Jun-2018 21:47:09 +nnnn
parseHeaderDate(const char * date)421 time_t parseHeaderDate(const char *date)
422 {
423 	static const char *const months[12] = {
424 		"Jan", "Feb", "Mar", "Apr", "May", "Jun",
425 		"Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
426 	};
427 	time_t t = 0;
428 	int zone = 0;
429 	time_t now = 0;
430 	int y;			// the type of format, 0 through 3
431 	int m;			// month
432 	struct tm *temptm = NULL;
433 	struct tm tm;
434 	long utcoffset = 0;
435 	const char *date0 = date;	// remember for debugging
436 	memset(&tm, 0, sizeof(struct tm));
437 	tm.tm_isdst = -1;
438 
439 	now = time(NULL);
440 	temptm = localtime(&now);
441 	if (temptm == NULL)
442 		goto fail;
443 #ifndef _MSC_VER
444 	utcoffset = temptm->tm_gmtoff;
445 #endif
446 
447 	if (isdigitByte(date[0]) && isdigitByte(date[1]) &&
448 	    date[2] == '-' && isalphaByte(date[3])) {
449 		y = 3;
450 		tm.tm_mday = atoi(date);
451 		date += 3;
452 		for (m = 0; m < 12; m++)
453 			if (memEqualCI(date, months[m], 3))
454 				goto f5;
455 		goto fail;
456 f5:
457 		tm.tm_mon = m;
458 		date += 3;
459 		if (*date != '-' || !isdigitByte(date[1]))
460 			goto fail;
461 		tm.tm_year = atoi(date + 1) - 1900;
462 		date += 5;
463 		while (*date == ' ')
464 			++date;
465 		goto f3;
466 	}
467 
468 	if (isdigitByte(date[0]) && isdigitByte(date[1]) &&
469 	    isdigitByte(date[2]) && isdigitByte(date[3]) && date[4] == '-') {
470 		y = 2;
471 		tm.tm_year = atoi(date + 0) - 1900;
472 		tm.tm_mon = atoi(date + 5) - 1;
473 		tm.tm_mday = atoi(date + 8);
474 		date += 11;
475 		goto f3;
476 	}
477 
478 /* skip past day of the week */
479 	date = strchr(date, ' ');
480 	if (!date)
481 		goto fail;
482 	date++;
483 
484 	if (isdigitByte(*date)) {	/* first format */
485 		y = 0;
486 		if (isdigitByte(date[1])) {
487 			tm.tm_mday = (date[0] - '0') * 10 + date[1] - '0';
488 			date += 2;
489 		} else {
490 			tm.tm_mday = *date - '0';
491 			++date;
492 		}
493 		if (*date != ' ' && *date != '-')
494 			goto fail;
495 		++date;
496 		for (m = 0; m < 12; m++)
497 			if (memEqualCI(date, months[m], 3))
498 				goto f1;
499 		goto fail;
500 f1:
501 		tm.tm_mon = m;
502 		date += 3;
503 		if (*date == ' ') {
504 			date++;
505 			if (!isdigitByte(date[0]) || !isdigitByte(date[1]) ||
506 			    !isdigitByte(date[2]) || !isdigitByte(date[3]))
507 				goto fail;
508 			tm.tm_year =
509 			    (date[0] - '0') * 1000 + (date[1] - '0') * 100 +
510 			    (date[2] - '0') * 10 + date[3] - '0' - 1900;
511 			date += 4;
512 		} else if (*date == '-') {
513 			/* Sunday, 06-Nov-94 08:49:37 GMT */
514 			date++;
515 			if (!isdigitByte(date[0]) || !isdigitByte(date[1]))
516 				goto fail;
517 			if (!isdigitByte(date[2])) {
518 				tm.tm_year =
519 				    (date[0] >=
520 				     '7' ? 1900 : 2000) + (date[0] - '0') * 10 +
521 				    date[1] - '0' - 1900;
522 				date += 2;
523 			} else {
524 				tm.tm_year = atoi(date) - 1900;
525 				date += 4;
526 			}
527 		} else
528 			goto fail;
529 		if (*date != ' ')
530 			goto fail;
531 		date++;
532 	} else {
533 /* second format */
534 		y = 1;
535 		for (m = 0; m < 12; m++)
536 			if (memEqualCI(date, months[m], 3))
537 				goto f2;
538 		goto fail;
539 f2:
540 		tm.tm_mon = m;
541 		date += 3;
542 		while (*date == ' ')
543 			date++;
544 		if (!isdigitByte(date[0]))
545 			goto fail;
546 		tm.tm_mday = date[0] - '0';
547 		date++;
548 		if (*date != ' ') {
549 			if (!isdigitByte(date[0]))
550 				goto fail;
551 			tm.tm_mday = tm.tm_mday * 10 + date[0] - '0';
552 			date++;
553 		}
554 		if (*date != ' ')
555 			goto fail;
556 		date++;
557 	}
558 
559 f3:
560 /* ready to crack time */
561 	if (!isdigitByte(date[0]) || !isdigitByte(date[1]))
562 		goto fail;
563 	tm.tm_hour = (date[0] - '0') * 10 + date[1] - '0';
564 	date += 2;
565 	if (*date != ':')
566 		goto fail;
567 	date++;
568 	if (!isdigitByte(date[0]) || !isdigitByte(date[1]))
569 		goto fail;
570 	tm.tm_min = (date[0] - '0') * 10 + date[1] - '0';
571 	date += 2;
572 	if (*date != ':')
573 		goto fail;
574 	date++;
575 	if (!isdigitByte(date[0]) || !isdigitByte(date[1]))
576 		goto fail;
577 	tm.tm_sec = (date[0] - '0') * 10 + date[1] - '0';
578 	date += 2;
579 	if (y == 2)
580 		goto f4;
581 
582 	if (y == 1) {
583 /* year is at the end */
584 		if (*date != ' ')
585 			goto fail;
586 		date++;
587 		if (!isdigitByte(date[0]) || !isdigitByte(date[1]) ||
588 		    !isdigitByte(date[2]) || !isdigitByte(date[3]))
589 			goto fail;
590 		tm.tm_year =
591 		    (date[0] - '0') * 1000 + (date[1] - '0') * 100 + (date[2] -
592 								      '0') *
593 		    10 + date[3] - '0' - 1900;
594 		date += 4;
595 	}
596 
597 	if (*date != ' ' && *date)
598 		goto fail;
599 
600 	while (*date == ' ')
601 		++date;
602 	if ((*date == '+' || *date == '-') &&
603 	    isdigit(date[1]) && isdigit(date[2]) &&
604 	    isdigit(date[3]) && isdigit(date[4])) {
605 		zone = 10 * (date[1] - '0') + date[2] - '0';
606 		zone *= 60;
607 		zone += 10 * (date[3] - '0') + date[4] - '0';
608 		zone *= 60;
609 /* adjust to gmt */
610 		if (*date == '+')
611 			zone = -zone;
612 	}
613 
614 f4:
615 	t = mktime(&tm);
616 	if (t != (time_t) - 1)
617 		return t + zone + utcoffset;
618 
619 fail:
620 	debugPrint(3, "parseHeaderDate fails on %s", date0);
621 	return 0;
622 }				/* parseHeaderDate */
623 
parseRefresh(char * ref,int * delay_p)624 bool parseRefresh(char *ref, int *delay_p)
625 {
626 	int delay = 0;
627 	char *u = ref;
628 	if (isdigitByte(*u))
629 		delay = atoi(u);
630 	while (isdigitByte(*u) || *u == '.')
631 		++u;
632 	if (*u == ';')
633 		++u;
634 	while (*u == ' ')
635 		++u;
636 	if (memEqualCI(u, "url=", 4)) {
637 		char qc;
638 		u += 4;
639 		while (isspace(*u))
640 			++u;
641 		qc = *u;
642 		if (qc == '"' || qc == '\'')
643 			++u;
644 		else
645 			qc = 0;
646 		strmove(ref, u);
647 		u = ref + strlen(ref);
648 		if (u > ref && u[-1] == qc)
649 			u[-1] = 0;
650 		debugPrint(3, "delay %d %s", delay, ref);
651 /* avoid the obvious infinite loop */
652 		if (sameURL(ref, cf->fileName)) {
653 			*delay_p = 0;
654 			return false;
655 		}
656 		*delay_p = delay;
657 		return true;
658 	}
659 	i_printf(MSG_GarbledRefresh, ref);
660 	*delay_p = 0;
661 	return false;
662 }				/* parseRefresh */
663 
shortRefreshDelay(const char * r,int d)664 bool shortRefreshDelay(const char *r, int d)
665 {
666 /* the value 10 seconds is somewhat arbitrary */
667 	if (d < 10)
668 		return true;
669 	i_printf(MSG_RedirectDelayed, r, d);
670 	return false;
671 }				/* shortRefreshDelay */
672 
673 // encode the url, if it was supplied by the user.
674 // Otherwise just make a copy.
675 // Either way there is room for one more char at the end.
urlSanitize(struct i_get * g,const char * post)676 static void urlSanitize(struct i_get *g, const char *post)
677 {
678 	const char *portloc;
679 	const char *url = g->url;
680 
681 	if (g->uriEncoded && !looksPercented(url, post)) {
682 		debugPrint(2, "Warning, url %s doesn't look encoded", url);
683 		g->uriEncoded = false;
684 	}
685 
686 	if (!g->uriEncoded) {
687 		g->urlcopy = percentURL(url, post);
688 		g->urlcopy_l = strlen(g->urlcopy);
689 	} else {
690 		char *frag;
691 		if (post)
692 			g->urlcopy_l = post - url;
693 		else
694 			g->urlcopy_l = strlen(url);
695 		g->urlcopy = allocMem(g->urlcopy_l + 2);
696 		strncpy(g->urlcopy, url, g->urlcopy_l);
697 		g->urlcopy[g->urlcopy_l] = 0;
698 // percentURL strips off the hash, so we need to here.
699 		frag = findHash(g->urlcopy);
700 		if (frag)
701 			*frag = 0;
702 	}
703 
704 // get rid of : in http://this.that.com:/path, curl can't handle it.
705 	getPortLocURL(g->urlcopy, &portloc, 0);
706 	if (portloc && !isdigit(portloc[1])) {
707 		const char *s = portloc + strcspn(portloc, "/?#\1");
708 		strmove((char *)portloc, s);
709 		g->urlcopy_l = strlen(g->urlcopy);
710 	}
711 }				/* urlSanitize */
712 
httpConnect(struct i_get * g)713 bool httpConnect(struct i_get *g)
714 {
715 	const char *url = g->url;
716 	char *cacheData = NULL;
717 	int cacheDataLen = 0;
718 	CURL *h;		// the curl http handle
719 	char *referrer = NULL;
720 	CURLcode curlret = CURLE_OK;
721 	struct curl_slist *custom_headers = NULL;
722 	struct curl_slist *tmp_headers = NULL;
723 	const struct MIMETYPE *mt;
724 	char creds_buf[MAXUSERPASS * 2 + 2];	/* creds abr. for credentials */
725 	bool still_fetching = true;
726 	char prot[MAXPROTLEN], host[MAXHOSTLEN];
727 	const char *post, *s;
728 	char *postb = NULL;
729 	int postb_l = 0;
730 	bool transfer_status = false;
731 	bool proceed_unauthenticated = false;
732 	int redirect_count = 0;
733 	bool post_request = false;
734 	bool head_request = false;
735 	uchar sxfirst = 0;
736 	int n;
737 
738 	if (!getProtHostURL(url, prot, host)) {
739 // only the foreground http thread uses setError,
740 // the traditional /bin/ed error system.
741 		if (g->foreground)
742 			setError(MSG_DomainEmpty);
743 		return false;
744 	}
745 // plugins can only be ok from one thread, the interactive thread
746 // that calls up web pages at the user's behest.
747 // None of this machinery need be threadsafe.
748 	if (g->pg_ok && (cf->mt = mt = findMimeByURL(url, &sxfirst)) &&
749 	    !(mt->from_file | mt->down_url) && !(mt->outtype && g->playonly)) {
750 		char *f;
751 		urlSanitize(g, 0);
752 mimestream:
753 // don't have to fetch the data, the program can handle it.
754 		nzFree(g->buffer);
755 		g->buffer = 0;
756 		g->code = 200;
757 		f = g->urlcopy;
758 		if (mt->outtype) {
759 			runPluginCommand(mt, f, 0, 0, 0, &g->buffer,
760 					 &g->length);
761 			cf->render1 = true;
762 			if (sxfirst)
763 				cf->render2 = true;
764 			i_get_free(g, false);
765 		} else {
766 			runPluginCommand(mt, f, 0, 0, 0, 0, 0);
767 			i_get_free(g, true);
768 		}
769 		return true;
770 	}
771 
772 /* Pull user password out of the url */
773 	n = getCredsURL(url, creds_buf);
774 	if (n == 1) {
775 		if (g->foreground)
776 			setError(MSG_UserNameLong, MAXUSERPASS);
777 		return false;
778 	}
779 	if (n == 2) {
780 		if (g->foreground)
781 			setError(MSG_PasswordLong, MAXUSERPASS);
782 		return false;
783 	}
784 	unpercentString(creds_buf);
785 
786 	if (!curlActive) {
787 		eb_curl_global_init();
788 		cookiesFromJar();
789 		setupEdbrowseCache();
790 	}
791 
792 	if (stringEqualCI(prot, "http") || stringEqualCI(prot, "https")) {
793 		;		/* ok for now */
794 	} else if (stringEqualCI(prot, "gopher")) {
795 		return gopherConnect(g);
796 	} else if (stringEqualCI(prot, "ftp") ||
797 		   stringEqualCI(prot, "ftps") ||
798 		   stringEqualCI(prot, "scp") ||
799 		   stringEqualCI(prot, "tftp") || stringEqualCI(prot, "sftp")) {
800 		return ftpConnect(g, creds_buf);
801 	} else {
802 		if (g->foreground)
803 			setError(MSG_WebProtBad, prot);
804 		else if (debugLevel >= 3) {
805 			i_printf(MSG_WebProtBad, prot);
806 			nl();
807 		}
808 		return false;
809 	}
810 
811 	h = http_curl_init(g);
812 	if (!h) {		// should never happen
813 		i_get_free(g, false);
814 		return false;
815 	}
816 
817 /* "Expect:" header causes some servers to lose.  Disable it. */
818 	tmp_headers = curl_slist_append(custom_headers, "Expect:");
819 	if (tmp_headers == NULL)
820 		i_printfExit(MSG_NoMem);
821 	custom_headers = tmp_headers;
822 	if (httpLanguage) {
823 		custom_headers =
824 		    curl_slist_append(custom_headers, httpLanguage);
825 		if (custom_headers == NULL)
826 			i_printfExit(MSG_NoMem);
827 	}
828 
829 	post = strchr(url, '\1');
830 	postb = 0;
831 	urlSanitize(g, post);
832 
833 	if (post) {
834 		post_request = true;
835 		post++;
836 
837 		if (strncmp(post, "`mfd~", 5) == 0) {
838 			int multipart_header_len = 0;
839 			char *multipart_header =
840 			    initString(&multipart_header_len);
841 			char thisbound[24];
842 			post += 5;
843 			stringAndString(&multipart_header,
844 					&multipart_header_len,
845 					"Content-Type: multipart/form-data; boundary=");
846 			s = strchr(post, '\r');
847 			stringAndBytes(&multipart_header, &multipart_header_len,
848 				       post, s - post);
849 			tmp_headers =
850 			    curl_slist_append(custom_headers, multipart_header);
851 			if (tmp_headers == NULL)
852 				i_printfExit(MSG_NoMem);
853 			custom_headers = tmp_headers;
854 			/* curl_slist_append made a copy of multipart_header. */
855 			nzFree(multipart_header);
856 			memcpy(thisbound, post, s - post);
857 			thisbound[s - post] = 0;
858 			post = s + 2;
859 			unpackUploadedFile(post, thisbound, &postb, &postb_l);
860 		}
861 		curlret = curl_easy_setopt(h, CURLOPT_POSTFIELDS,
862 					   (postb_l ? postb : post));
863 		if (curlret != CURLE_OK)
864 			goto curl_fail;
865 		curlret =
866 		    curl_easy_setopt(h, CURLOPT_POSTFIELDSIZE,
867 				     postb_l ? postb_l : strlen(post));
868 		if (curlret != CURLE_OK)
869 			goto curl_fail;
870 	} else {
871 		curlret = curl_easy_setopt(h, CURLOPT_HTTPGET, 1);
872 		if (curlret != CURLE_OK)
873 			goto curl_fail;
874 	}
875 
876 	if (sendReferrer && isURL(g->thisfile) &&
877 	    (memEqualCI(g->thisfile, "http:", 5)
878 	     || memEqualCI(g->thisfile, "https:", 6))) {
879 		char *p, *p2, *p3;
880 		referrer = cloneString(g->thisfile);
881 // lop off post data
882 		p = strchr(referrer, '\1');
883 		if (p)
884 			*p = 0;
885 // lop off .browse
886 		p = referrer + strlen(referrer);
887 		if (p - referrer > 7 && !memcmp(p - 7, ".browse", 7))
888 			p[-7] = 0;
889 // excise login:password
890 		p = strchr(referrer, ':');
891 		++p;
892 		if (*p == '/')
893 			++p;
894 		if (*p == '/')
895 			++p;
896 		p2 = strchr(p, '@');
897 		p3 = strchr(p, '/');
898 		if (p2 && (!p3 || p2 < p3))
899 			strmove(p, p2 + 1);
900 // The current protocol should be http or https, we cleared out everything else.
901 // But https to http is not allowed.   RFC 2616, section 15.1.3
902 		p = strchr(referrer, ':');
903 		if (strlen(prot) == 4 && p - referrer == 5) {
904 			nzFree(referrer);
905 			referrer = NULL;
906 		}
907 	}
908 // We keep the same referrer even after redirections, which I think is right.
909 // That's why it's here instead of inside the loop.
910 	curlret = curl_easy_setopt(h, CURLOPT_REFERER, referrer);
911 	if (curlret != CURLE_OK)
912 		goto curl_fail;
913 
914 // look for custom headers from the calling function
915 	if (g->custom_h) {
916 		const char *u, *v;
917 		u = g->custom_h;
918 		while (*u) {
919 			int d;
920 			char *w;
921 			v = strchr(u, '\n');
922 			if (!v)
923 				break;
924 			d = v - u;
925 			w = allocMem(d + 1);
926 			memcpy(w, u, d);
927 			w[d] = 0;
928 			tmp_headers = curl_slist_append(custom_headers, w);
929 			if (tmp_headers == NULL)
930 				i_printfExit(MSG_NoMem);
931 			custom_headers = tmp_headers;
932 			debugPrint(4, "custom %s", w);
933 			nzFree(w);
934 			u = v + 1;
935 		}
936 	}
937 
938 	curlret = curl_easy_setopt(h, CURLOPT_HTTPHEADER, custom_headers);
939 	if (curlret != CURLE_OK)
940 		goto curl_fail;
941 	curlret = setCurlURL(h, g->urlcopy);
942 	if (curlret != CURLE_OK)
943 		goto curl_fail;
944 
945 	/* If we have a username and password, then tell libcurl about it.
946 	 * libcurl won't send it to the server unless server gave a 401 response.
947 	 * Libcurl selects the most secure form of auth provided by server. */
948 
949 	if (stringEqual(creds_buf, ":"))
950 		getUserPass(g->urlcopy, creds_buf, false);
951 // If the URL didn't have user and password, and getUserPass failed,
952 // then creds_buf = ":".
953 	curlret = curl_easy_setopt(h, CURLOPT_USERPWD, creds_buf);
954 	if (curlret != CURLE_OK)
955 		goto curl_fail;
956 
957 /* We are ready to make a transfer.  Here is where it gets complicated.
958  * At the top of the loop, we perform the HTTP request.  It may fail entirely
959  * (I.E., libcurl returns an indicator other than CURLE_OK).
960  * We may be redirected.  Edbrowse needs finer control over the redirection
961  * process than libcurl gives us.
962  * Decide whether to accept the redirection, using the following criteria.
963  * Does user permit redirects?  Will we exceed maximum allowable redirects?
964  * We may be asked for authentication.  In that case, grab username and
965  * password from the user.  If the server accepts the username and password,
966  * then add it to the list of authentication records.  */
967 
968 	still_fetching = true;
969 
970 	if (!post_request && presentInCache(g->urlcopy)) {
971 		head_request = true;
972 		curl_easy_setopt(h, CURLOPT_NOBODY, 1l);
973 	}
974 
975 	while (still_fetching == true) {
976 		char *redir = NULL;
977 
978 // recheck the url after a redirect
979 		if (redirect_count && g->pg_ok &&
980 		    (cf->mt = mt = findMimeByURL(g->urlcopy, &sxfirst)) &&
981 		    !(mt->from_file | mt->down_url) &&
982 		    !(mt->outtype && g->playonly)) {
983 			curl_easy_cleanup(h);
984 			goto mimestream;
985 		}
986 
987 		if (head_request && g->down_force == 1) {
988 			curl_easy_setopt(h, CURLOPT_NOBODY, 0l);
989 			head_request = false;
990 		}
991 
992 		if (g->down_force == 1)
993 			truncate0(g->down_file, g->down_fd);
994 
995 perform:
996 		g->is_http = g->cacheable = true;
997 		curlret = fetch_internet(g);
998 
999 /*********************************************************************
1000 This is a one line workaround for an apparent bug in curl.
1001 The return CURLE_WRITE_ERROR means the data fetched from the internet
1002 could not be written to disk. And how does curl know?
1003 Because the callback function returns a lesser number of bytes.
1004 This is like write(), if it returns a lesser number
1005 of bytes then it was unable to write the entire block to disk.
1006 Ok, but I never return fewer bytes than was passed to me.
1007 I return the expected number of bytes, or -1 in the rare case
1008 that I want to abort the download.
1009 So you see, curl should never return this WRITE error.
1010 Yet it does, in version 7.58.0-2, on debian.
1011 And only on one page we have found so far:
1012 https://www.literotica.com/stories/new_submissions.php
1013 The entire page is downloaded, down to the very last byte,
1014 then the WRITE error is passed back.
1015 Well if it happens once it will happen elsewhere.
1016 Users will not be able to fetch pages from the internet, and not know why.
1017 The error message, can't write to disk, is not helpful at all.
1018 So this is a simple workaround.
1019 *********************************************************************/
1020 
1021 		if (curlret == CURLE_WRITE_ERROR)
1022 			curlret = CURLE_OK;
1023 
1024 		if (g->down_state == 6) {
1025 // Header has indicated a plugin by content type or protocol or suffix.
1026 			curl_easy_cleanup(h);
1027 			goto mimestream;
1028 		}
1029 
1030 /*********************************************************************
1031 If the desired file is in cache for some reason, and we issued the head request,
1032 and it is application, or some such that triggers a download, then state = 1,
1033 but no data is forthcoming, and the user was never asked if he wants
1034 to download, so state is still 1.
1035 So ask, and then look at state.
1036 If state is nonzero, sorry, I'm not taking the file from cache,
1037 not yet, just because it's a whole bunch of new code.
1038 We don't generally store our downloaded files in cache anyways,
1039 they go where they go, so this doesn't come up very often.
1040 *********************************************************************/
1041 
1042 		if (head_request) {
1043 			if (g->down_state == 1) {
1044 				setup_download(g);
1045 /* now we have our answer */
1046 			}
1047 
1048 			if (g->down_state != 0) {
1049 				curl_easy_setopt(h, CURLOPT_NOBODY, 0l);
1050 				head_request = false;
1051 				debugPrint(3, "switch to get for download %d",
1052 					   g->down_state);
1053 			}
1054 
1055 			if (g->down_state == 2) {
1056 				curl_easy_getinfo(h, CURLINFO_RESPONSE_CODE,
1057 						  &g->code);
1058 				if (g->code == 200)
1059 					goto perform;
1060 				g->down_state = 0;
1061 			}
1062 		}
1063 
1064 		if (g->down_state == 5) {
1065 /* user has directed a download of this file in the background. */
1066 /* We spawn a thread to do this, then return, but g could go away */
1067 /* before the child thread has a chance to read its contents. */
1068 			struct i_get g0;
1069 			pthread_t tid;
1070 			nzFree(g->buffer);
1071 			g->buffer = NULL;
1072 			g->length = 0;
1073 			g0 = *g;	// structure copy
1074 			if (custom_headers)
1075 				curl_slist_free_all(custom_headers);
1076 			curl_easy_cleanup(h);
1077 			nzFree(postb);
1078 			nzFree(referrer);
1079 			pthread_create(&tid, NULL, httpConnectBack1,
1080 				       (void *)&g0);
1081 // I will assume the thread was created.
1082 // Don't call i_get_free(g); the child thread is using those strings.
1083 			return true;
1084 		}
1085 
1086 		if (g->down_state == 3 || g->down_state == -1) {
1087 			i_get_free(g, true);
1088 			curl_easy_cleanup(h);
1089 			nzFree(referrer);
1090 			return false;
1091 		}
1092 
1093 		if (g->down_state == 4) {
1094 			bool r = true;
1095 			if (curlret != CURLE_OK) {
1096 				r = false;
1097 				ebcurl_setError(curlret, g->urlcopy, 1,
1098 						g->error);
1099 			} else {
1100 				curl_easy_getinfo(h, CURLINFO_RESPONSE_CODE,
1101 						  &(g->code));
1102 				if (g->code != 200) {
1103 					r = false;
1104 				} else {
1105 					i_printf(MSG_DownSuccess);
1106 					printf(": %s\n", g->down_file2);
1107 				}
1108 			}
1109 			if (custom_headers)
1110 				curl_slist_free_all(custom_headers);
1111 			curl_easy_cleanup(h);
1112 			nzFree(postb);
1113 			nzFree(referrer);
1114 			i_get_free(g, true);
1115 			return r;
1116 		}
1117 
1118 		if (g->length >= CHUNKSIZE && showProgress == 'd'
1119 		    && !g->down_force)
1120 			nl();	/* We printed dots, so terminate them with newline */
1121 
1122 		if (g->down_state == 2) {
1123 			close(g->down_fd);
1124 			i_get_free(g, true);
1125 			setError(MSG_DownSuccess);
1126 			curl_easy_cleanup(h);
1127 			nzFree(referrer);
1128 			return false;
1129 		}
1130 
1131 		if (curlret != CURLE_OK) {
1132 			if (!head_request)
1133 				goto curl_fail;
1134 			ebcurl_setError(curlret, g->urlcopy, 1, g->error);
1135 			debugPrint(3, "switch from head to get");
1136 			curl_easy_setopt(h, CURLOPT_NOBODY, 0l);
1137 			head_request = false;
1138 			goto perform;
1139 		}
1140 // get http code
1141 		curl_easy_getinfo(h, CURLINFO_RESPONSE_CODE, &g->code);
1142 		if (curlret != CURLE_OK)
1143 			goto curl_fail;
1144 
1145 		if (g->tsn)
1146 			debugPrint(3, "thread %d http code %ld", g->tsn,
1147 				   g->code);
1148 		else
1149 			debugPrint(3, "http code %ld", g->code);
1150 
1151 /* refresh header is an alternate form of redirection */
1152 		if (g->newloc && g->newloc_d >= 0) {
1153 			if (shortRefreshDelay(g->newloc, g->newloc_d)) {
1154 				g->code = 302;
1155 			} else {
1156 				nzFree(g->newloc);
1157 				g->newloc = 0;
1158 			}
1159 		}
1160 
1161 		redir = g->newloc;
1162 		g->newloc = 0;
1163 
1164 		if (allowRedirection &&
1165 		    ((g->code >= 301 && g->code <= 303) ||
1166 		     (g->code >= 307 && g->code <= 308))) {
1167 			if (redir)
1168 				redir = resolveURL(g->urlcopy, redir);
1169 			still_fetching = false;
1170 			if (redir == NULL) {
1171 				/* Redirected, but we don't know where to go. */
1172 				i_printf(MSG_RedirectNoURL, g->code);
1173 				transfer_status = true;
1174 			} else if (redirect_count >= 10) {
1175 				i_puts(MSG_RedirectMany);
1176 				transfer_status = true;
1177 				nzFree(redir);
1178 			} else {	/* redirection looks good. */
1179 				strcpy(creds_buf, ":");	/* Flush stale data. */
1180 				nzFree(g->urlcopy);
1181 				g->urlcopy = redir;
1182 				g->urlcopy_l = strlen(g->urlcopy);
1183 				redir = NULL;
1184 
1185 /* Convert POST request to GET request after redirection. */
1186 /* This should only be done for 301 through 303 */
1187 				if (g->code < 307) {
1188 					curl_easy_setopt(h, CURLOPT_HTTPGET, 1);
1189 					post_request = false;
1190 				}
1191 /* I think there is more work to do for 307 308,
1192  * pasting the prior post string onto the new URL. Not sure about this. */
1193 
1194 				getUserPass(g->urlcopy, creds_buf, false);
1195 				curlret =
1196 				    curl_easy_setopt(h, CURLOPT_USERPWD,
1197 						     creds_buf);
1198 				if (curlret != CURLE_OK)
1199 					goto curl_fail;
1200 
1201 				curlret = setCurlURL(h, g->urlcopy);
1202 				if (curlret != CURLE_OK)
1203 					goto curl_fail;
1204 
1205 				if (!post_request && presentInCache(g->urlcopy)) {
1206 					head_request = true;
1207 					curl_easy_setopt(h, CURLOPT_NOBODY, 1l);
1208 				}
1209 // This is unusual in that we're using the i_get structure again,
1210 // so we need to reset some parts of it and not others.
1211 				nzFree(g->buffer);
1212 				g->buffer = 0;
1213 // This 302 redirection could set content type = application/binary,
1214 // which in turn sets state = 1, which is ignored since 302 takes precedence.
1215 // So state might still be 1, set it back to 0.
1216 				g->down_state = 0;
1217 				g->code = 0;
1218 				g->csp = false;
1219 				nzFree(g->headers);
1220 				g->headers = 0;
1221 				g->headers_len = 0;
1222 				g->content[0] = 0;
1223 				g->charset = 0;
1224 				g->hcl = 0;
1225 				nzFree(g->cdfn);
1226 				g->cdfn = 0;
1227 				g->modtime = 0;
1228 				nzFree(g->etag);
1229 				g->etag = 0;
1230 				++redirect_count;
1231 				still_fetching = true;
1232 				debugPrint(2, "redirect %s", g->urlcopy);
1233 			}
1234 		}
1235 
1236 		else if (g->code == 401 && !proceed_unauthenticated) {
1237 			bool got_creds = false;
1238 
1239 			/* only try realm on first try - prevents loop */
1240 			if (stringEqual(creds_buf, ":"))
1241 				got_creds =
1242 				    getUserPassRealm(g->urlcopy, creds_buf,
1243 						     g->auth_realm);
1244 			if (!got_creds && g->foreground) {
1245 				i_printf(MSG_AuthRequired, g->urlcopy,
1246 					 g->auth_realm);
1247 				nl();
1248 				got_creds = read_credentials(creds_buf);
1249 			}
1250 			if (got_creds && g->foreground)
1251 				addWebAuthorization(g->urlcopy, creds_buf,
1252 						    false, g->auth_realm);
1253 			if (got_creds) {
1254 				curl_easy_setopt(h, CURLOPT_USERPWD, creds_buf);
1255 				nzFree(g->buffer);
1256 				g->buffer = 0;
1257 				g->length = 0;
1258 			} else {
1259 /* User aborted the login process, try and at least get something. */
1260 				proceed_unauthenticated = true;
1261 			}
1262 		} else {	/* not redirect, not 401 */
1263 			if (head_request) {
1264 				if (fetchCache
1265 				    (g->urlcopy, g->etag, g->modtime,
1266 				     &cacheData, &cacheDataLen)) {
1267 					nzFree(g->buffer);
1268 					g->buffer = cacheData;
1269 					g->length = cacheDataLen;
1270 					still_fetching = false;
1271 					transfer_status = true;
1272 				} else {
1273 /* Back through the loop,
1274  * now doing GET rather than HEAD. */
1275 					curl_easy_setopt(h, CURLOPT_NOBODY, 0l);
1276 					head_request = false;
1277 					--redirect_count;
1278 				}
1279 			} else {
1280 				if (g->code == 200 && g->cacheable &&
1281 				    (g->modtime || g->etag) &&
1282 				    g->down_state == 0)
1283 					storeCache(g->urlcopy, g->etag,
1284 						   g->modtime, g->buffer,
1285 						   g->length);
1286 				still_fetching = false;
1287 				transfer_status = true;
1288 			}
1289 		}
1290 	}
1291 
1292 curl_fail:
1293 	if (custom_headers)
1294 		curl_slist_free_all(custom_headers);
1295 	curl_easy_cleanup(h);
1296 	nzFree(postb);
1297 
1298 	if (curlret != CURLE_OK) {
1299 		ebcurl_setError(curlret, g->urlcopy, (g->foreground ? 0 : 1),
1300 				g->error);
1301 		nzFree(referrer);
1302 		i_get_free(g, true);
1303 		return false;
1304 	}
1305 
1306 	if (!transfer_status) {
1307 		nzFree(referrer);
1308 		i_get_free(g, true);
1309 		return false;
1310 	}
1311 
1312 	if ((g->code != 200 && g->code != 201 &&
1313 	     (g->foreground || debugLevel >= 2)) ||
1314 	    (g->code == 201 && debugLevel >= 3))
1315 		i_printf(MSG_HTTPError,
1316 			 g->code, message_for_response_code(g->code));
1317 
1318 // with lopping off post data, or encoding the url,
1319 // it's easier to just assume the name has changed,
1320 // even if there is no redirection.
1321 	g->cfn = g->urlcopy;
1322 	g->urlcopy = 0;
1323 
1324 /* see if http header has set the filename */
1325 	if (g->cdfn) {
1326 		nzFree(g->cfn);
1327 		g->cfn = g->cdfn;
1328 		g->cdfn = NULL;
1329 	}
1330 
1331 	if (g->headers_p) {
1332 		*g->headers_p = g->headers;
1333 // The string is your responsibility now.
1334 		g->headers = 0;
1335 	}
1336 
1337 	i_get_free(g, false);
1338 	g->referrer = referrer;
1339 	return transfer_status;
1340 }				/* httpConnect */
1341 
1342 static int tsn;			// thread sequence number
1343 
httpConnectBack1(void * ptr)1344 void *httpConnectBack1(void *ptr)
1345 {
1346 	struct i_get *g0 = ptr;
1347 	struct i_get g = *g0;	// structure copy
1348 	struct BG_JOB *job;
1349 	bool rc;
1350 	g.down_force = 1;
1351 	g.down_state = 4;
1352 // urlcopy will be recomputed on the next http call
1353 	nzFree(g.urlcopy);
1354 	g.urlcopy = 0;
1355 // Other things we should clean up?
1356 	g.tsn = ++tsn;
1357 	debugPrint(3, "bg thread %d", tsn);
1358 	i_puts(MSG_DownProgress);
1359 /* push job onto the list for tracking and display */
1360 	job = allocMem(sizeof(struct BG_JOB) + strlen(g.down_file));
1361 	job->state = 4;
1362 	strcpy(job->file, g.down_file);
1363 	job->file2 = g.down_file2 - g.down_file;
1364 // round file size up to the nearest chunk.
1365 // This will come out 0 only if the true size is 0.
1366 	job->fsize = ((g.hcl + (CHUNKSIZE - 1)) / CHUNKSIZE);
1367 	addToListBack(&down_jobs, job);
1368 	rc = httpConnect(&g);
1369 	job->state = (rc ? 0 : -1);
1370 	return NULL;
1371 }
1372 
httpConnectBack2(void * ptr)1373 void *httpConnectBack2(void *ptr)
1374 {
1375 	Tag *t = ptr;
1376 	bool rc;
1377 	struct i_get g;
1378 	memset(&g, 0, sizeof(g));
1379 	g.thisfile = cf->fileName;
1380 	g.uriEncoded = true;
1381 	g.url = t->href;
1382 	g.down_force = 2;
1383 	g.tsn = ++tsn;
1384 	debugPrint(3, "jsbg thread %d", tsn);
1385 	rc = httpConnect(&g);
1386 	t->loadsuccess = rc;
1387 	if (!rc)
1388 		t->hcode = g.code;
1389 	else {
1390 // Rarely, a js file is not in utf8; convert it here, inside the thread.
1391 		char *b = force_utf8(g.buffer, g.length);
1392 		if (!b)
1393 			b = g.buffer;
1394 		else
1395 			nzFree(g.buffer);
1396 // don't know why t->value would be anything
1397 		nzFree(t->value);
1398 		t->value = b;
1399 	}
1400 	return NULL;
1401 }
1402 
httpConnectBack3(void * ptr)1403 void *httpConnectBack3(void *ptr)
1404 {
1405 	Tag *t = ptr;
1406 	bool rc;
1407 	struct i_get g;
1408 	char *outgoing_body = 0, *outgoing_headers = 0;
1409 	memset(&g, 0, sizeof(g));
1410 	g.thisfile = cf->fileName;
1411 	g.uriEncoded = true;
1412 	g.url = t->href;
1413 	g.custom_h = t->innerHTML;
1414 	g.headers_p = &outgoing_headers;
1415 	g.down_force = 2;
1416 	g.tsn = ++tsn;
1417 	debugPrint(3, "xhr thread %d", tsn);
1418 	rc = httpConnect(&g);
1419 	outgoing_body = g.buffer;
1420 	t->loadsuccess = rc;
1421 	if (!rc)
1422 		t->hcode = g.code;
1423 	else {
1424 		char *a;
1425 		int l;
1426 // don't know why t->value would be anything
1427 		nzFree(t->value);
1428 		a = initString(&l);
1429 		if (outgoing_headers == 0)
1430 			outgoing_headers = emptyString;
1431 		if (outgoing_body == 0)
1432 			outgoing_body = emptyString;
1433 		stringAndNum(&a, &l, rc);
1434 		stringAndString(&a, &l, "\r\n\r\n");
1435 		stringAndNum(&a, &l, g.code);
1436 		stringAndString(&a, &l, "\r\n\r\n");
1437 		stringAndString(&a, &l, outgoing_headers);
1438 		stringAndString(&a, &l, outgoing_body);
1439 		while (l && isspace(a[l - 1]))
1440 			a[--l] = 0;
1441 		t->value = a;
1442 	}
1443 	nzFree(outgoing_headers);
1444 	nzFree(outgoing_body);
1445 	nzFree(t->innerHTML);
1446 	t->innerHTML = 0;
1447 	return NULL;
1448 }
1449 
1450 // copy text over to the buffer but change < to &lt; etc,
1451 // since this data will be browsed as if it were html.
prepHtmlString(struct i_get * g,const char * q)1452 static void prepHtmlString(struct i_get *g, const char *q)
1453 {
1454 	char c;
1455 	if (!strpbrk(q, "<>&")) {	// no bad characters
1456 		stringAndString(&g->buffer, &g->length, q);
1457 		return;
1458 	}
1459 	for (; (c = *q); ++q) {
1460 		char *meta = 0;
1461 		if (c == '<')
1462 			meta = "&lt;";
1463 		if (c == '>')
1464 			meta = "&gt;";
1465 		if (c == '&')
1466 			meta = "&amp;";
1467 		if (meta)
1468 			stringAndString(&g->buffer, &g->length, meta);
1469 		else
1470 			stringAndChar(&g->buffer, &g->length, c);
1471 	}
1472 }
1473 
1474 /* Format a line from an ftp directory. */
ftp_ls_line(struct i_get * g,char * line)1475 static void ftp_ls_line(struct i_get *g, char *line)
1476 {
1477 	int l = strlen(line);
1478 	int j;
1479 	if (l && line[l - 1] == '\r')
1480 		line[--l] = 0;
1481 
1482 /* blank line becomes paragraph break */
1483 	if (!l || (memEqualCI(line, "total ", 6) && stringIsNum(line + 6))) {
1484 		stringAndString(&g->buffer, &g->length, "<P>\n");
1485 		return;
1486 	}
1487 	stringAndString(&g->buffer, &g->length, "<br>");
1488 
1489 	for (j = 0; line[j]; ++j)
1490 		if (!strchr("-rwxdlsS", line[j]))
1491 			break;
1492 
1493 	if (j == 10 && line[j] == ' ') {	/* long list */
1494 		int fsize, nlinks;
1495 		char user[42], group[42];
1496 		char month[8];
1497 		int day;
1498 		char *q, *t;
1499 		sscanf(line + j, " %d %40s %40s %d %3s %d",
1500 		       &nlinks, user, group, &fsize, month + 1, &day);
1501 		q = strchr(line, ':');
1502 		if (q) {
1503 			for (++q; isdigitByte(*q) || *q == ':'; ++q) ;
1504 			while (*q == ' ')
1505 				++q;
1506 		} else {
1507 /* old files won't have the time, but instead, they have the year. */
1508 /* bad news for us; no good/easy way to glom onto this one. */
1509 			month[0] = month[4] = ' ';
1510 			month[5] = 0;
1511 			q = strstr(line, month);
1512 			if (q) {
1513 				q += 8;
1514 				while (*q == ' ')
1515 					++q;
1516 				while (isdigitByte(*q))
1517 					++q;
1518 				while (*q == ' ')
1519 					++q;
1520 			}
1521 		}
1522 
1523 		if (q && *q) {
1524 			char qc = '"';
1525 			if (strchr(q, qc))
1526 				qc = '\'';
1527 			stringAndString(&g->buffer, &g->length, "<A HREF=x");
1528 			g->buffer[g->length - 1] = qc;
1529 			t = strstr(q, " -> ");
1530 			if (t)
1531 				stringAndBytes(&g->buffer, &g->length, q,
1532 					       t - q);
1533 			else
1534 				stringAndString(&g->buffer, &g->length, q);
1535 			stringAndChar(&g->buffer, &g->length, qc);
1536 			stringAndChar(&g->buffer, &g->length, '>');
1537 			stringAndString(&g->buffer, &g->length, q);
1538 			stringAndString(&g->buffer, &g->length, "</A>");
1539 			if (line[0] == 'd')
1540 				stringAndChar(&g->buffer, &g->length, '/');
1541 			stringAndString(&g->buffer, &g->length, ": ");
1542 			stringAndNum(&g->buffer, &g->length, fsize);
1543 			stringAndChar(&g->buffer, &g->length, '\n');
1544 			return;
1545 		}
1546 	}
1547 
1548 	prepHtmlString(g, line);
1549 	stringAndChar(&g->buffer, &g->length, '\n');
1550 }				/* ftp_ls_line */
1551 
1552 /* ftp_listing: convert an FTP-style listing to html. */
1553 /* Repeatedly calls ftp_ls_line to parse each line of the data. */
ftp_listing(struct i_get * g)1554 static void ftp_listing(struct i_get *g)
1555 {
1556 	char *s, *t;
1557 	char *incomingData = g->buffer;
1558 	int incomingLen = g->length;
1559 	g->buffer = initString(&g->length);
1560 	stringAndString(&g->buffer, &g->length, "<html>\n<body>\n");
1561 
1562 	if (!incomingLen) {
1563 		i_stringAndMessage(&g->buffer, &g->length, MSG_FTPEmptyDir);
1564 	} else {
1565 
1566 		s = incomingData;
1567 		while (s < incomingData + incomingLen) {
1568 			t = strchr(s, '\n');
1569 			if (!t || t >= incomingData + incomingLen)
1570 				break;	/* should never happen */
1571 			*t = 0;
1572 			ftp_ls_line(g, s);
1573 			s = t + 1;
1574 		}
1575 	}
1576 
1577 	stringAndString(&g->buffer, &g->length, "</body></html>\n");
1578 	nzFree(incomingData);
1579 }				/* ftp_listing */
1580 
1581 /* Format a line from a gopher directory. */
gopher_ls_line(struct i_get * g,char * line)1582 static void gopher_ls_line(struct i_get *g, char *line)
1583 {
1584 	int port;
1585 	char first, *text, *pathname, *host, *s, *plus;
1586 	int l = strlen(line);
1587 	if (l && line[l - 1] == '\r')
1588 		line[--l] = 0;
1589 
1590 // first character is the type of line
1591 	first = 'i';
1592 	if (line[0])
1593 		first = *line++;
1594 // . alone ends the listing
1595 	if (first == '.')
1596 		return;
1597 
1598 // cut into pieces by tabs.
1599 	pathname = host = 0;
1600 	text = line;
1601 	s = strchr(line, '\t');
1602 	if (s) {
1603 		*s++ = 0;
1604 		pathname = s;
1605 		s = strchr(pathname, '\t');
1606 		if (s) {
1607 			*s++ = 0;
1608 			host = s;
1609 			s = strchr(host, '\t');
1610 			if (s) {
1611 				*s++ = 0;
1612 				if (*s) {
1613 					// Gopher+ servers add an extra \t+,
1614 					// which we need to truncate
1615 					plus = strchr(s, '\t');
1616 					if (plus)
1617 						*plus = 0;
1618 					port = atoi(s);
1619 				}
1620 			}
1621 		}
1622 	}
1623 
1624 	while (*text == ' ')
1625 		++text;
1626 
1627 // gopher is very much line oriented.
1628 	stringAndString(&g->buffer, &g->length, "<br>\n");
1629 
1630 // i or 3 is informational, 3 being an error.
1631 	if (first == 'i' || first == '3') {
1632 		prepHtmlString(g, text);
1633 		stringAndChar(&g->buffer, &g->length, '\n');
1634 		return;
1635 	}
1636 // everything else becomes hyperlink apart from item type 7 which becomes form
1637 	if (host) {
1638 		char qc = '"';
1639 // I just assume host and path can be quoted with either " or '
1640 		if (strchr(host, qc)	// should never happen
1641 		    || strchr(pathname, qc))
1642 			qc = '\'';
1643 		if (first != '7')
1644 			stringAndString(&g->buffer, &g->length, "<a href=x");
1645 		else
1646 			stringAndString(&g->buffer, &g->length,
1647 					"<form method='get' action=x");
1648 		g->buffer[g->length - 1] = qc;
1649 
1650 		if (!strncmp(pathname, "URL:", 4)) {
1651 // Full URL in path so use it unencoded
1652 			stringAndString(&g->buffer, &g->length, pathname + 4);
1653 			pathname = 0;
1654 		} else {
1655 // Just a path
1656 			pathname = encodePostData(pathname, "./-_$");
1657 			stringAndString(&g->buffer, &g->length, "gopher://");
1658 			stringAndString(&g->buffer, &g->length, host);
1659 			if (port && port != 70) {
1660 				stringAndChar(&g->buffer, &g->length, ':');
1661 				stringAndNum(&g->buffer, &g->length, port);
1662 			}
1663 // gopher requires us to inject the  "first" directive into the path. Wow.
1664 			stringAndChar(&g->buffer, &g->length, '/');
1665 			stringAndChar(&g->buffer, &g->length, first);
1666 			stringAndString(&g->buffer, &g->length, pathname);
1667 		}
1668 		nzFree(pathname);
1669 		stringAndChar(&g->buffer, &g->length, qc);
1670 		stringAndChar(&g->buffer, &g->length, '>');
1671 	}
1672 
1673 	s = strchr(text, '(');
1674 	if (s && s == text)
1675 		s = 0;
1676 	if (s)
1677 		*s = 0;
1678 
1679 	prepHtmlString(g, text);
1680 	if (host) {
1681 		if (first == '7')
1682 			stringAndString(&g->buffer, &g->length,
1683 					" <input type='text' /> <input type='submit' /></form>");
1684 		else
1685 			stringAndString(&g->buffer, &g->length, "</a>");
1686 	}
1687 	if (s) {
1688 		*s = '(';
1689 		prepHtmlString(g, s);
1690 	}
1691 	stringAndChar(&g->buffer, &g->length, '\n');
1692 }				/* gopher_ls_line */
1693 
1694 /* gopher_listing: convert a gopher-style listing to html. */
1695 /* Repeatedly calls gopher_ls_line to parse each line of the data. */
gopher_listing(struct i_get * g)1696 static void gopher_listing(struct i_get *g)
1697 {
1698 	char *s, *t;
1699 	char *incomingData = g->buffer;
1700 	int incomingLen = g->length;
1701 	g->buffer = initString(&g->length);
1702 	stringAndString(&g->buffer, &g->length, "<html>\n<body>\n");
1703 
1704 	if (!incomingLen) {
1705 		i_stringAndMessage(&g->buffer, &g->length, MSG_GopherEmptyDir);
1706 	} else {
1707 
1708 		s = incomingData;
1709 		while (s < incomingData + incomingLen) {
1710 			t = strchr(s, '\n');
1711 			if (!t || t >= incomingData + incomingLen)
1712 				break;	/* should never happen */
1713 			*t = 0;
1714 			gopher_ls_line(g, s);
1715 			s = t + 1;
1716 		}
1717 	}
1718 
1719 	stringAndString(&g->buffer, &g->length, "</body></html>\n");
1720 	nzFree(incomingData);
1721 }				/* gopher_listing */
1722 
1723 // action: 0 traditional set, 1 print, 2 print and exit
ebcurl_setError(CURLcode curlret,const char * url,int action,const char * curl_error)1724 void ebcurl_setError(CURLcode curlret, const char *url, int action,
1725 		     const char *curl_error)
1726 {
1727 	char prot[MAXPROTLEN], host[MAXHOSTLEN];
1728 	void (*fn) (int, ...);
1729 
1730 	if (!getProtHostURL(url, prot, host)) {
1731 /* this should never happen */
1732 		prot[0] = host[0] = 0;
1733 	}
1734 
1735 	fn = (action ? i_printf : setError);
1736 
1737 	switch (curlret) {
1738 	case CURLE_UNSUPPORTED_PROTOCOL:
1739 		(*fn) (MSG_WebProtBad, prot);
1740 		break;
1741 
1742 	case CURLE_URL_MALFORMAT:
1743 		(*fn) (MSG_BadURL, url);
1744 		break;
1745 
1746 	case CURLE_COULDNT_RESOLVE_HOST:
1747 		(*fn) (MSG_IdentifyHost, host);
1748 		break;
1749 
1750 	case CURLE_REMOTE_ACCESS_DENIED:
1751 		(*fn) (MSG_RemoteAccessDenied);
1752 		break;
1753 
1754 	case CURLE_TOO_MANY_REDIRECTS:
1755 		(*fn) (MSG_RedirectMany);
1756 		break;
1757 
1758 	case CURLE_OPERATION_TIMEDOUT:
1759 		(*fn) (MSG_Timeout);
1760 		break;
1761 
1762 	case CURLE_PEER_FAILED_VERIFICATION:
1763 #if LIBCURL_VERSION_NUM < 0x073e00
1764 	case CURLE_SSL_CACERT:
1765 #endif
1766 		(*fn) (MSG_NoCertify, host);
1767 		break;
1768 
1769 	case CURLE_GOT_NOTHING:
1770 	case CURLE_RECV_ERROR:
1771 		(*fn) (MSG_WebRead);
1772 		break;
1773 
1774 	case CURLE_SEND_ERROR:
1775 		(*fn) (MSG_CurlSendData);
1776 		break;
1777 
1778 	case CURLE_COULDNT_CONNECT:
1779 		(*fn) (MSG_WebConnect, host);
1780 		break;
1781 
1782 	case CURLE_FTP_CANT_GET_HOST:
1783 		(*fn) (MSG_FTPConnect);
1784 		break;
1785 
1786 	case CURLE_ABORTED_BY_CALLBACK:
1787 #if 0
1788 // this is printed by the callback function
1789 		(*fn) (MSG_Interrupted);
1790 #endif
1791 		break;
1792 
1793 /* These all look like session initiation failures. */
1794 	case CURLE_FTP_WEIRD_SERVER_REPLY:
1795 	case CURLE_FTP_WEIRD_PASS_REPLY:
1796 	case CURLE_FTP_WEIRD_PASV_REPLY:
1797 	case CURLE_FTP_WEIRD_227_FORMAT:
1798 	case CURLE_FTP_COULDNT_SET_ASCII:
1799 	case CURLE_FTP_COULDNT_SET_BINARY:
1800 	case CURLE_FTP_PORT_FAILED:
1801 		(*fn) (MSG_FTPSession);
1802 		break;
1803 
1804 	case CURLE_FTP_USER_PASSWORD_INCORRECT:
1805 		(*fn) (MSG_LogPass);
1806 		break;
1807 
1808 	case CURLE_FTP_COULDNT_RETR_FILE:
1809 		(*fn) (MSG_FTPTransfer);
1810 		break;
1811 
1812 	case CURLE_SSL_CONNECT_ERROR:
1813 		(*fn) (MSG_SSLConnectError, curl_error);
1814 		break;
1815 
1816 	case CURLE_LOGIN_DENIED:
1817 		(*fn) (MSG_LogPass);
1818 		break;
1819 
1820 	default:
1821 		(*fn) (MSG_CurlCatchAll, curl_easy_strerror(curlret));
1822 		break;
1823 	}
1824 
1825 	if (action)
1826 		nl();
1827 	if (action == 2)
1828 		exit(2);
1829 }				/* ebcurl_setError */
1830 
1831 /* Like httpConnect, but for ftp */
ftpConnect(struct i_get * g,char * creds_buf)1832 static bool ftpConnect(struct i_get *g, char *creds_buf)
1833 {
1834 	CURL *h;		// the curl handle for ftp
1835 	int protLength;		/* length of "ftp://" */
1836 	bool transfer_success = false;
1837 	bool has_slash, is_scp;
1838 	CURLcode curlret = CURLE_OK;
1839 	const char *url = g->url;
1840 
1841 	protLength = strchr(url, ':') - url + 3;
1842 /* scp is somewhat unique among the protocols handled here */
1843 	is_scp = memEqualCI(url, "scp", 3);
1844 
1845 	if (stringEqual(creds_buf, ":") && memEqualCI(url, "ftp", 3))
1846 		strcpy(creds_buf, "anonymous:ftp@example.com");
1847 
1848 	h = http_curl_init(g);
1849 	if (!h)
1850 		goto ftp_transfer_fail;
1851 	curlret = curl_easy_setopt(h, CURLOPT_USERPWD, creds_buf);
1852 	if (curlret != CURLE_OK)
1853 		goto ftp_transfer_fail;
1854 
1855 	urlSanitize(g, 0);
1856 
1857 /* libcurl appends an implicit slash to URLs like "ftp://foo.com".
1858 * Be explicit, so that edbrowse knows that we have a directory. */
1859 	if (!strchr(g->urlcopy + protLength, '/'))
1860 		strcpy(g->urlcopy + g->urlcopy_l++, "/");
1861 
1862 	curlret = setCurlURL(h, g->urlcopy);
1863 	if (curlret != CURLE_OK)
1864 		goto ftp_transfer_fail;
1865 
1866 	has_slash = g->urlcopy[g->urlcopy_l - 1] == '/';
1867 /* don't download a directory listing, we want to see that */
1868 /* Fetching a directory will fail in the special case of scp. */
1869 	if (!g->down_force)
1870 		g->down_state = (has_slash ? 0 : 1);
1871 	g->down_length = 0;
1872 	g->down_msg = MSG_FTPDownload;
1873 	if (is_scp)
1874 		g->down_msg = MSG_SCPDownload;
1875 
1876 	curlret = fetch_internet(g);
1877 
1878 	if (g->down_state == 5) {
1879 /* user has directed a download of this file in the background. */
1880 /* We spawn a thread to do this, then return, but g could go away */
1881 /* before the child thread has a chance to read its contents. */
1882 		struct i_get g0;
1883 		pthread_t tid;
1884 		nzFree(g->buffer);
1885 		g->buffer = NULL;
1886 		g->length = 0;
1887 		g0 = *g;	// structure copy
1888 		curl_easy_cleanup(h);
1889 		pthread_create(&tid, NULL, httpConnectBack1, (void *)&g0);
1890 // I will assume the thread was created.
1891 // Don't call i_get_free(g); the child thread is using those strings.
1892 		return true;
1893 	}
1894 
1895 	if (g->down_state == 3 || g->down_state == -1) {
1896 		i_get_free(g, true);
1897 		curl_easy_cleanup(h);
1898 		return false;
1899 	}
1900 
1901 	if (g->down_state == 4) {
1902 		bool r = true;
1903 		if (curlret != CURLE_OK) {
1904 			r = false;
1905 			ebcurl_setError(curlret, g->urlcopy, 1, g->error);
1906 		} else {
1907 			i_printf(MSG_DownSuccess);
1908 			printf(": %s\n", g->down_file2);
1909 		}
1910 		curl_easy_cleanup(h);
1911 		i_get_free(g, true);
1912 		return r;
1913 	}
1914 
1915 	if (g->length >= CHUNKSIZE && showProgress == 'd')
1916 		nl();		/* We printed dots, so terminate them with newline */
1917 
1918 	if (g->down_state == 2) {
1919 		close(g->down_fd);
1920 		setError(MSG_DownSuccess);
1921 		i_get_free(g, true);
1922 		curl_easy_cleanup(h);
1923 		return false;
1924 	}
1925 
1926 /* Should we run this code on any error condition? */
1927 /* The SSH error pops up under sftp. */
1928 	if (curlret == CURLE_FTP_COULDNT_RETR_FILE ||
1929 	    curlret == CURLE_REMOTE_FILE_NOT_FOUND || curlret == CURLE_SSH) {
1930 		if (has_slash | is_scp)
1931 			transfer_success = false;
1932 		else {		/* try appending a slash. */
1933 			strcpy(g->urlcopy + g->urlcopy_l++, "/");
1934 			g->down_state = 0;
1935 			cnzFree(g->down_file);
1936 			g->down_file = 0;
1937 			curlret = setCurlURL(h, g->urlcopy);
1938 			if (curlret != CURLE_OK)
1939 				goto ftp_transfer_fail;
1940 
1941 			curlret = fetch_internet(g);
1942 			if (curlret != CURLE_OK)
1943 				transfer_success = false;
1944 			else {
1945 				ftp_listing(g);
1946 				transfer_success = true;
1947 			}
1948 		}
1949 	} else if (curlret == CURLE_OK) {
1950 		if (has_slash)
1951 			ftp_listing(g);
1952 		transfer_success = true;
1953 	} else
1954 		transfer_success = false;
1955 
1956 ftp_transfer_fail:
1957 	if (h)
1958 		curl_easy_cleanup(h);
1959 	if (transfer_success == false) {
1960 		if (curlret != CURLE_OK)
1961 			ebcurl_setError(curlret, g->urlcopy,
1962 					(g->foreground ? 0 : 1), g->error);
1963 	}
1964 	if (transfer_success == true && !stringEqual(url, g->urlcopy))
1965 		g->cfn = g->urlcopy;
1966 	else
1967 		nzFree(g->urlcopy);
1968 	g->urlcopy = 0;
1969 
1970 	i_get_free(g, !transfer_success);
1971 
1972 	return transfer_success;
1973 }				/* ftpConnect */
1974 
1975 /* Like httpConnect, but for gopher */
gopherConnect(struct i_get * g)1976 static bool gopherConnect(struct i_get *g)
1977 {
1978 	CURL *h;		// the curl handle for gopher
1979 	int protLength;		/* length of "gopher://" */
1980 	bool transfer_success = false;
1981 	bool has_slash;
1982 	char first = 0;
1983 	char *s;
1984 	CURLcode curlret = CURLE_OK;
1985 	const char *url = g->url;
1986 
1987 	protLength = strchr(url, ':') - url + 3;
1988 	h = http_curl_init(g);
1989 	if (!h)
1990 		goto gopher_transfer_fail;
1991 	urlSanitize(g, 0);
1992 
1993 /* libcurl appends an implicit slash to URLs like "gopher://foo.com".
1994 * Be explicit, so that edbrowse knows if we have a directory. */
1995 	if (!strchr(g->urlcopy + protLength, '/'))
1996 		strcpy(g->urlcopy + g->urlcopy_l, "/");
1997 	curlret = setCurlURL(h, g->urlcopy);
1998 	if (curlret != CURLE_OK)
1999 		goto gopher_transfer_fail;
2000 
2001 	has_slash = g->urlcopy[strlen(g->urlcopy) - 1] == '/';
2002 /* don't download a directory listing, we want to see that */
2003 	g->down_state = (has_slash ? 0 : 1);
2004 	g->down_length = 0;
2005 	g->down_msg = MSG_GopherDownload;
2006 // That's the default, let the leading character override
2007 	s = strchr(g->urlcopy + protLength, '/');
2008 	if (s && (first = s[1])) {
2009 // almost every file type downloads.
2010 		g->down_state = 1;
2011 // 0 is tricky because "05" and "09" can mean binary
2012 // in doubt, treat as integer and skip leading 0s
2013 		while (first == '0' && isdigit(s[2])) {
2014 			s++;
2015 			first = s[1];
2016 		}
2017 		if (strchr("017h", first))
2018 			g->down_state = 0;
2019 		if (first == '1' || first == '7')
2020 			has_slash = true;
2021 	}
2022 
2023 	if (g->down_force)
2024 		g->down_state = 4;
2025 
2026 	curlret = fetch_internet(g);
2027 
2028 	if (g->down_state == 5) {
2029 /* user has directed a download of this file in the background. */
2030 /* We spawn a thread to do this, then return, but g could go away */
2031 /* before the child thread has a chance to read its contents. */
2032 		struct i_get g0;
2033 		pthread_t tid;
2034 		nzFree(g->buffer);
2035 		g->buffer = NULL;
2036 		g->length = 0;
2037 		g0 = *g;	// structure copy
2038 		curl_easy_cleanup(h);
2039 		pthread_create(&tid, NULL, httpConnectBack1, (void *)&g0);
2040 // I will assume the thread was created.
2041 // Don't call i_get_free(g); the child thread is using those strings.
2042 		return true;
2043 	}
2044 
2045 	if (g->down_state == 3 || g->down_state == -1) {
2046 		i_get_free(g, true);
2047 		curl_easy_cleanup(h);
2048 		return false;
2049 	}
2050 
2051 	if (g->down_state == 4) {
2052 		bool r = true;
2053 		if (curlret != CURLE_OK) {
2054 			r = false;
2055 			ebcurl_setError(curlret, g->urlcopy, 1, g->error);
2056 		} else {
2057 			i_printf(MSG_DownSuccess);
2058 			printf(": %s\n", g->down_file2);
2059 		}
2060 		curl_easy_cleanup(h);
2061 		i_get_free(g, true);
2062 		return r;
2063 	}
2064 
2065 	if (g->length >= CHUNKSIZE && showProgress == 'd')
2066 		nl();		/* We printed dots, so terminate them with newline */
2067 
2068 	if (g->down_state == 2) {
2069 		close(g->down_fd);
2070 		setError(MSG_DownSuccess);
2071 		i_get_free(g, true);
2072 		curl_easy_cleanup(h);
2073 		return false;
2074 	}
2075 
2076 	if (curlret == CURLE_OK) {
2077 		if (has_slash)
2078 			gopher_listing(g);
2079 		transfer_success = true;
2080 	} else
2081 		transfer_success = false;
2082 
2083 gopher_transfer_fail:
2084 	if (h)
2085 		curl_easy_cleanup(h);
2086 	if (!transfer_success) {
2087 		if (curlret != CURLE_OK)
2088 			ebcurl_setError(curlret, g->urlcopy,
2089 					(g->foreground ? 0 : 1), g->error);
2090 		i_get_free(g, true);
2091 		return false;
2092 	}
2093 
2094 	if (!stringEqual(url, g->urlcopy))
2095 		g->cfn = g->urlcopy;
2096 	g->urlcopy = 0;
2097 
2098 	if (first == '0') {
2099 // it's a text file, neeed to undos.
2100 // The curl callback function always makes sure there is an extra byte at the end.
2101 		int i, j;
2102 		g->buffer[g->length] = 0;
2103 		for (i = j = 0; i < g->length; ++i) {
2104 			if (g->buffer[i] == '\r' && g->buffer[i + 1] == '\n')
2105 				continue;
2106 			g->buffer[j++] = g->buffer[i];
2107 		}
2108 		g->buffer[j] = 0;
2109 		g->length = j;
2110 	}
2111 
2112 	return true;
2113 }				/* gopherConnect */
2114 
2115 /* If the user has asked for locale-specific responses, then build an
2116  * appropriate Accept-Language: header. */
setHTTPLanguage(const char * lang)2117 void setHTTPLanguage(const char *lang)
2118 {
2119 	int httpLanguage_l;
2120 	char *s;
2121 
2122 	nzFree(httpLanguage);
2123 	httpLanguage = NULL;
2124 	if (!lang)
2125 		return;
2126 
2127 	httpLanguage = initString(&httpLanguage_l);
2128 	stringAndString(&httpLanguage, &httpLanguage_l, "Accept-Language: ");
2129 	stringAndString(&httpLanguage, &httpLanguage_l, lang);
2130 
2131 // Transliterate _ to -, some websites require this.
2132 // en-us not en_us
2133 	for (s = httpLanguage; *s; ++s)
2134 		if (*s == '_')
2135 			*s = '-';
2136 }				/* setHTTPLanguage */
2137 
2138 /* Set the FD_CLOEXEC flag on a socket newly-created by libcurl.
2139  * Let's not leak libcurl's sockets to child processes created by the
2140  * ! (escape-to-shell) command.
2141  * This is a callback.  It returns 0 on success, 1 on failure, per the
2142  * libcurl docs.
2143  */
2144 static int
my_curl_safeSocket(void * clientp,curl_socket_t socketfd,curlsocktype purpose)2145 my_curl_safeSocket(void *clientp, curl_socket_t socketfd, curlsocktype purpose)
2146 {
2147 #ifdef _MSC_VER
2148 	return 0;
2149 #else // !_MSC_VER for success = fcntl(socketfd, F_SETFD, FD_CLOEXEC);
2150 	int success = fcntl(socketfd, F_SETFD, FD_CLOEXEC);
2151 	if (success == -1)
2152 		success = 1;
2153 	else
2154 		success = 0;
2155 	return success;
2156 #endif // _MSC_VER y/n
2157 }
2158 
http_curl_init(struct i_get * g)2159 static CURL *http_curl_init(struct i_get *g)
2160 {
2161 	CURLcode curl_init_status = CURLE_OK;
2162 	int curl_auth;
2163 	CURL *h = curl_easy_init();
2164 	if (h == NULL)
2165 		goto libcurl_init_fail;
2166 	g->h = h;
2167 	curl_init_status =
2168 	    curl_easy_setopt(h, CURLOPT_SHARE, global_share_handle);
2169 	if (curl_init_status != CURLE_OK)
2170 		goto libcurl_init_fail;
2171 	curl_init_status = curl_easy_setopt(h, CURLOPT_COOKIEFILE, "");
2172 	if (curl_init_status != CURLE_OK)
2173 		goto libcurl_init_fail;
2174 /* Lots of these setopt calls shouldn't fail.  They just diddle a struct. */
2175 	curl_easy_setopt(h, CURLOPT_SOCKOPTFUNCTION, my_curl_safeSocket);
2176 	curl_easy_setopt(h, CURLOPT_WRITEFUNCTION, eb_curl_callback);
2177 	curl_easy_setopt(h, CURLOPT_WRITEDATA, g);
2178 	curl_easy_setopt(h, CURLOPT_HEADERFUNCTION, curl_header_callback);
2179 	curl_easy_setopt(h, CURLOPT_HEADERDATA, g);
2180 	if (debugLevel >= 4)
2181 		curl_easy_setopt(h, CURLOPT_VERBOSE, 1);
2182 	curl_easy_setopt(h, CURLOPT_DEBUGFUNCTION, ebcurl_debug_handler);
2183 	curl_easy_setopt(h, CURLOPT_DEBUGDATA, g);
2184 	curl_easy_setopt(h, CURLOPT_NOPROGRESS, 0);
2185 	curl_easy_setopt(h, CURLOPT_PROGRESSFUNCTION, curl_progress);
2186 	curl_easy_setopt(h, CURLOPT_PROGRESSDATA, g);
2187 	curl_easy_setopt(h, CURLOPT_CONNECTTIMEOUT, webTimeout);
2188 	curl_easy_setopt(h, CURLOPT_USERAGENT, currentAgent);
2189 	curl_easy_setopt(h, CURLOPT_SSLVERSION, CURL_SSLVERSION_DEFAULT);
2190 /* We're doing this manually for now.
2191 	curl_easy_setopt(h, CURLOPT_FOLLOWLOCATION, allowRedirection);
2192 */
2193 	curl_easy_setopt(h, CURLOPT_AUTOREFERER, sendReferrer);
2194 	if (ftpActive)
2195 		curl_easy_setopt(h, CURLOPT_FTPPORT, "-");
2196 	else
2197 		curl_easy_setopt(h, CURLOPT_FTPPORT, NULL);
2198 /* See "man curl_easy_setopt.3" for info on CURLOPT_FTPPORT.  Supplying
2199 * "-" makes libcurl select the best IP address for active ftp. */
2200 
2201 /*
2202 * tell libcurl to pick the strongest method from basic, digest and ntlm authentication
2203 * don't use any auth method by default as it will prefer Negotiate to NTLM,
2204 * and it looks like in most cases microsoft IIS says it supports both and libcurl
2205 * doesn't fall back to NTLM when it discovers that Negotiate isn't set up on a system
2206 */
2207 	curl_auth = CURLAUTH_BASIC | CURLAUTH_DIGEST | CURLAUTH_NTLM;
2208 	if (curlAuthNegotiate)
2209 #ifdef CURLAUTH_NEGOTIATE
2210 		curl_auth |= CURLAUTH_NEGOTIATE;
2211 #else
2212 		curl_auth |= CURLAUTH_GSSNEGOTIATE;	/* libcurl < 7.38 */
2213 #endif
2214 	curl_easy_setopt(h, CURLOPT_HTTPAUTH, curl_auth);
2215 
2216 #if 0
2217 // in case you run into DH key too small
2218 // This may not be portable, e.g. curl compiled with gnutls;
2219 // though it is usually compiled with openssl.
2220 // Not sure of the best solution here.
2221 	curl_easy_setopt(h, CURLOPT_SSL_CIPHER_LIST, "DEFAULT@SECLEVEL=1");
2222 #endif
2223 
2224 /* The next few setopt calls could allocate or perform file I/O. */
2225 	g->error[0] = '\0';
2226 	curl_init_status = curl_easy_setopt(h, CURLOPT_ERRORBUFFER, g->error);
2227 	if (curl_init_status != CURLE_OK)
2228 		goto libcurl_init_fail;
2229 	curl_init_status = curl_easy_setopt(h, CURLOPT_ENCODING, "");
2230 	if (curl_init_status != CURLE_OK)
2231 		goto libcurl_init_fail;
2232 
2233 	return h;
2234 
2235 libcurl_init_fail:
2236 	i_printf(MSG_LibcurlNoInit);
2237 	if (h)
2238 		curl_easy_cleanup(h);
2239 	return 0;
2240 }				/* http_curl_init */
2241 
2242 /*
2243  * There's no easy way to get at the server's response message from libcurl.
2244  * So here are some tables and a function for translating response codes to
2245  * messages.
2246 */
2247 
2248 static const char *response_codes_1xx[] = {
2249 	"Continue",
2250 	"Switching Protocols"
2251 };
2252 
2253 static const char *response_codes_2xx[] = {
2254 	"OK",
2255 	"Created" "Accepted",
2256 	"Non-Authoritative Information",
2257 	"No Content",
2258 	"Reset Content",
2259 	"Partial Content"
2260 };
2261 
2262 static const char *response_codes_3xx[] = {
2263 	"Multiple Choices",
2264 	"Moved Permanently",
2265 	"Found",
2266 	"See Other",
2267 	"Not Modified",
2268 	"Use Proxy",
2269 	"(Unused)",
2270 	"Temporary Redirect"
2271 };
2272 
2273 static const char *response_codes_4xx[] = {
2274 	"Bad Request",
2275 	"Unauthorized",
2276 	"Payment Required",
2277 	"Forbidden",
2278 	"Not Found",
2279 	"Method Not Allowed",
2280 	"Not Acceptable",
2281 	"Proxy Authentication Required",
2282 	"Request Timeout",
2283 	"Conflict",
2284 	"Gone",
2285 	"Length Required",
2286 	"Precondition Failed",
2287 	"Request Entity Too Large",
2288 	"Request-URI Too Long",
2289 	"Unsupported Media Type",
2290 	"Requested Range Not Satisfiable",
2291 	"Expectation Failed"
2292 };
2293 
2294 static const char *response_codes_5xx[] = {
2295 	"Internal Server Error",
2296 	"Not Implemented",
2297 	"Bad Gateway",
2298 	"Service Unavailable",
2299 	"Gateway Timeout",
2300 	"HTTP Version Not Supported"
2301 };
2302 
2303 static const char *unknown_http_response =
2304     "Unknown response when accessing webpage.";
2305 
2306 static int max_codes[] = {
2307 	0,
2308 	sizeof(response_codes_1xx) / sizeof(char *),
2309 	sizeof(response_codes_2xx) / sizeof(char *),
2310 	sizeof(response_codes_3xx) / sizeof(char *),
2311 	sizeof(response_codes_4xx) / sizeof(char *),
2312 	sizeof(response_codes_5xx) / sizeof(char *)
2313 };
2314 
2315 static const char **responses[] = {
2316 	NULL, response_codes_1xx, response_codes_2xx, response_codes_3xx,
2317 	response_codes_4xx, response_codes_5xx
2318 };
2319 
message_for_response_code(int code)2320 static const char *message_for_response_code(int code)
2321 {
2322 	const char *message = NULL;
2323 	if (code < 100 || code > 599)
2324 		message = unknown_http_response;
2325 	else {
2326 		int primary = code / 100;	/* Yields int in interval [1,6] */
2327 		int subcode = code % 100;
2328 		if (subcode >= max_codes[primary])
2329 			message = unknown_http_response;
2330 		else
2331 			message = responses[primary][subcode];
2332 	}
2333 	return message;
2334 }				/* message_for_response_code */
2335 
2336 /*
2337  * Function: prompt_and_read
2338  * Arguments:
2339   ** prompt: prompt that user should see.
2340   ** buffer: buffer into which the data should be stored.
2341   ** max_length: maximum allowable length of input.
2342   ** error_msg: message to display if input exceeds maximum length.
2343   ** hide_echo: whether to disable terminal echo (sensitive input)
2344  * Note: prompt and error_message should be message constants from messages.h.
2345  * Return value: none.  buffer contains input on return. */
2346 
2347 /* We need to read two things from the user while authenticating: a username
2348  * and a password.  Here, the task of prompting and reading is encapsulated
2349  * in a function, and we call that function twice.
2350  * After the call, the buffer contains the user's input, without a newline.
2351  * The return value is the length of the string in buffer. */
2352 int
prompt_and_read(int prompt,char * buffer,int buffer_length,int error_message,bool hide_echo)2353 prompt_and_read(int prompt, char *buffer, int buffer_length, int error_message,
2354 		bool hide_echo)
2355 {
2356 	bool reading = true;
2357 	int n = 0;
2358 
2359 	while (reading) {
2360 		char *s;
2361 		if (hide_echo)
2362 			ttySetEcho(false);
2363 		i_printf(prompt);
2364 		fflush(stdout);
2365 		s = fgets(buffer, buffer_length, stdin);
2366 		if (hide_echo)
2367 			ttySetEcho(true);
2368 		if (!s)
2369 			ebClose(0);
2370 		n = strlen(buffer);
2371 		if (n && buffer[n - 1] == '\n')
2372 			buffer[--n] = '\0';	/* replace newline with NUL */
2373 		if (n >= (MAXUSERPASS - 1)) {
2374 			i_printf(error_message, MAXUSERPASS - 2);
2375 			nl();
2376 		} else
2377 			reading = false;
2378 	}
2379 	return n;
2380 }				/* prompt_and_read */
2381 
2382 /*
2383  * Function: read_credentials
2384  * Arguments:
2385  ** buffer: buffer in which to place username and password.
2386  * Return value: true if credentials were read, false otherwise.
2387 
2388 * Behavior: read a username and password from the user.  Store them in
2389  * the buffer, separated by a colon.
2390  * This function returns false in two situations.
2391  * 1. The program is not being run interactively.  The error message is
2392  * set to indicate this.
2393  * 2. The user aborted the login process by typing x"x".
2394  * Again, the error message reflects this condition.
2395 */
2396 
read_credentials(char * buffer)2397 static bool read_credentials(char *buffer)
2398 {
2399 	int input_length = 0;
2400 	bool got_creds = false;
2401 
2402 	if (!isInteractive)
2403 		setError(MSG_Authorize2);
2404 	else {
2405 		i_puts(MSG_WebAuthorize);
2406 		input_length =
2407 		    prompt_and_read(MSG_UserName, buffer, MAXUSERPASS,
2408 				    MSG_UserNameLong, false);
2409 		if (!stringEqual(buffer, "x")) {
2410 			char *password_ptr = buffer + input_length + 1;
2411 			prompt_and_read(MSG_Password, password_ptr, MAXUSERPASS,
2412 					MSG_PasswordLong, true);
2413 			if (!stringEqual(password_ptr, "x")) {
2414 				got_creds = true;
2415 				*(password_ptr - 1) = ':';	/* separate user and password with colon. */
2416 			}
2417 		}
2418 
2419 		if (!got_creds)
2420 			setError(MSG_LoginAbort);
2421 	}
2422 
2423 	return got_creds;
2424 }				/* read_credentials */
2425 
2426 /* Callback used by libcurl.
2427  * Gather all the http headers into one long string. */
2428 static size_t
curl_header_callback(char * header_line,size_t size,size_t nmemb,struct i_get * g)2429 curl_header_callback(char *header_line, size_t size, size_t nmemb,
2430 		     struct i_get *g)
2431 {
2432 	const struct MIMETYPE *mt;
2433 	size_t bytes_in_line = size * nmemb;
2434 	stringAndBytes(&g->headers, &g->headers_len,
2435 		       header_line, bytes_in_line);
2436 
2437 	scan_http_headers(g, true);
2438 	mt = cf->mt;
2439 
2440 // a from-the-web mime type causes a download interrupt
2441 	if (g->pg_ok && mt && !(mt->down_url | mt->from_file) &&
2442 	    !(mt->outtype && g->playonly)) {
2443 		g->down_state = 6;
2444 		return -1;
2445 	}
2446 
2447 	if (g->down_ok && g->down_state == 0 &&
2448 	    !(mt && g->pg_ok && mt->down_url && !mt->from_file) &&
2449 	    g->content[0] && !memEqualCI(g->content, "text/", 5) &&
2450 	    !memEqualCI(g->content, "application/xhtml+xml", 21)) {
2451 		g->down_state = 1;
2452 		g->down_msg = MSG_Down;
2453 		debugPrint(3, "potential download based on type %s",
2454 			   g->content);
2455 	}
2456 
2457 	return bytes_in_line;
2458 }				/* curl_header_callback */
2459 
2460 /* Print text, discarding the unnecessary carriage return character. */
2461 static void
prettify_network_text(const char * text,size_t size,FILE * destination)2462 prettify_network_text(const char *text, size_t size, FILE * destination)
2463 {
2464 	size_t i;
2465 	for (i = 0; i < size; i++) {
2466 		if (text[i] != '\r')
2467 			fputc(text[i], destination);
2468 	}
2469 }				/* prettify_network_text */
2470 
2471 /* Print incoming and outgoing headers.
2472  * Incoming headers are prefixed with curl<, and outgoing headers are
2473  * prefixed with curl>
2474  * We may support more of the curl_infotype values soon. */
2475 
2476 int
ebcurl_debug_handler(CURL * handle,curl_infotype info_desc,char * data,size_t size,struct i_get * g)2477 ebcurl_debug_handler(CURL * handle, curl_infotype info_desc, char *data,
2478 		     size_t size, struct i_get *g)
2479 {
2480 	FILE *f = debugFile ? debugFile : stdout;
2481 
2482 // There's a special case where this function is used
2483 // by the imap client to see if the server is move capable.
2484 	if (ismc & isimap && info_desc == CURLINFO_HEADER_IN &&
2485 	    size > 17 && !strncmp(data, "* CAPABILITY IMAP", 17)) {
2486 		char *s;
2487 // data may not be null terminated; can't use strstr
2488 		for (s = data; s < data + size - 6; ++s)
2489 			if (!strncmp(s, " MOVE", 5) && isspace(s[5])) {
2490 				g->move_capable = true;
2491 				break;
2492 			}
2493 	}
2494 	if (debugLevel < 4)
2495 		return 0;
2496 
2497 	if (info_desc == CURLINFO_HEADER_OUT) {
2498 		fprintf(f, "curl>\n");
2499 		prettify_network_text(data, size, f);
2500 	} else if (info_desc == CURLINFO_HEADER_IN) {
2501 		if (!g->last_curlin)
2502 			fprintf(f, "curl<\n");
2503 		prettify_network_text(data, size, f);
2504 	} else;			/* Do nothing.  We don't care about this piece of data. */
2505 
2506 	if (info_desc == CURLINFO_HEADER_IN)
2507 		g->last_curlin = true;
2508 	else if (info_desc)
2509 		g->last_curlin = false;
2510 
2511 	return 0;
2512 }				/* ebcurl_debug_handler */
2513 
2514 // At this point, down_state = 1
2515 // Only runs from the foreground thread, does not have to be threadsafe.
setup_download(struct i_get * g)2516 static void setup_download(struct i_get *g)
2517 {
2518 	const char *filepart;
2519 	const char *answer;
2520 	char *fp2, *s;
2521 
2522 /* if not run from a terminal then just return. */
2523 	if (!isInteractive) {
2524 		g->down_state = 0;
2525 		return;
2526 	}
2527 
2528 	if (g->cdfn)
2529 		filepart = g->cdfn;
2530 	else
2531 		filepart = getFileURL(g->urlcopy, true);
2532 // transliterate to get rid of /
2533 	fp2 = cloneString(filepart);
2534 	for (s = fp2; *s; ++s)
2535 		if (*s == '/' || *s == '\\')
2536 			*s = '_';
2537 
2538 top:
2539 	answer = getFileName(g->down_msg, fp2, false, true);
2540 /* space for a filename means read into memory */
2541 	if (stringEqual(answer, " ")) {
2542 		g->down_state = 0;	/* in memory download */
2543 		nzFree(fp2);
2544 		return;
2545 	}
2546 
2547 	if (stringEqual(answer, "x") || stringEqual(answer, "X")) {
2548 		g->down_state = -1;
2549 		setError(MSG_DownAbort);
2550 		nzFree(fp2);
2551 		return;
2552 	}
2553 
2554 	if (!envFileDown(answer, &answer)) {
2555 		showError();
2556 		goto top;
2557 	}
2558 
2559 	g->down_fd = creat(answer, MODE_rw);
2560 	if (g->down_fd < 0) {
2561 		i_printf(MSG_NoCreate2, answer);
2562 		nl();
2563 		goto top;
2564 	}
2565 
2566 	nzFree(fp2);
2567 
2568 // we will free down_file, but not down_file2
2569 	g->down_file = g->down_file2 = cloneString(answer);
2570 	if (downDir) {
2571 		int l = strlen(downDir);
2572 		if (!strncmp(g->down_file2, downDir, l)) {
2573 			g->down_file2 += l;
2574 			if (g->down_file2[0] == '/')
2575 				++g->down_file2;
2576 		}
2577 	}
2578 
2579 	g->down_state = (down_bg ? 5 : 2);
2580 }				/* setup_download */
2581 
2582 /* show background jobs and return the number of jobs pending */
2583 /* if iponly is true then just show in progress */
bg_jobs(bool iponly)2584 int bg_jobs(bool iponly)
2585 {
2586 	bool present = false, part;
2587 	int numback = 0;
2588 	struct BG_JOB *j;
2589 
2590 /* three passes */
2591 /* in progress */
2592 	part = false;
2593 	foreach(j, down_jobs) {
2594 		if (j->state != 4)
2595 			continue;
2596 		++numback;
2597 		if (!part) {
2598 			i_printf(MSG_InProgress);
2599 			puts(" {");
2600 			part = present = true;
2601 		}
2602 		printf("%s", j->file + j->file2);
2603 		if (j->fsize)
2604 			printf(" %d/%zu",
2605 			       (int)(fileSizeByName(j->file) / CHUNKSIZE),
2606 			       j->fsize);
2607 		nl();
2608 	}
2609 	if (part)
2610 		puts("}");
2611 
2612 	if (iponly)
2613 		return numback;
2614 
2615 /* complete */
2616 	part = false;
2617 	foreach(j, down_jobs) {
2618 		if (j->state != 0)
2619 			continue;
2620 		if (!part) {
2621 			i_printf(MSG_Complete);
2622 			puts(" {");
2623 			part = present = true;
2624 		}
2625 		puts(j->file + j->file2);
2626 	}
2627 	if (part)
2628 		puts("}");
2629 
2630 /* failed */
2631 	part = false;
2632 	foreach(j, down_jobs) {
2633 		if (j->state != -1)
2634 			continue;
2635 		if (!part) {
2636 			i_printf(MSG_Failed);
2637 			puts(" {");
2638 			part = present = true;
2639 		}
2640 		puts(j->file + j->file2);
2641 	}
2642 	if (part)
2643 		puts("}");
2644 
2645 	if (!present)
2646 		i_puts(MSG_Empty);
2647 
2648 	return numback;
2649 }
2650 
setCurlURL(CURL * h,const char * url)2651 CURLcode setCurlURL(CURL * h, const char *url)
2652 {
2653 	unsigned long verify = mustVerifyHost(url);
2654 	const char *proxy = findProxyForURL(url);
2655 	const char *agent = findAgentForURL(url);
2656 	if (!proxy)
2657 		proxy = "";
2658 	else
2659 		debugPrint(4, "proxy %s", proxy);
2660 	curl_easy_setopt(h, CURLOPT_PROXY, proxy);
2661 	if (agent) {
2662 		debugPrint(4, "agent %s", agent);
2663 		curl_easy_setopt(h, CURLOPT_USERAGENT, agent);
2664 	}
2665 	curl_easy_setopt(h, CURLOPT_SSL_VERIFYPEER, verify);
2666 	curl_easy_setopt(h, CURLOPT_SSL_VERIFYHOST, (verify ? 2 : 0));
2667 // certificate file is per handle, not global, so must be set here.
2668 // cookie file is however on the global handle, go figure.
2669 	if (sslCerts)
2670 		curl_easy_setopt(h, CURLOPT_CAINFO, sslCerts);
2671 	return curl_easy_setopt(h, CURLOPT_URL, url);
2672 }				/* setCurlURL */
2673 
2674 /* expand a frame inline.
2675  * Pass a range of lines; you can expand all the frames in one go.
2676  * Return false if there is a problem fetching a web page,
2677  * or if none of the lines are frames. */
2678 static int frameContractLine(int lineNumber);
2679 static const char *stringInBufLine(const char *s, const char *t);
frameExpand(bool expand,int ln1,int ln2)2680 bool frameExpand(bool expand, int ln1, int ln2)
2681 {
2682 	int ln;			/* line number */
2683 	int problem = 0, p;
2684 	bool something_worked = false;
2685 
2686 	for (ln = ln1; ln <= ln2; ++ln) {
2687 		if (expand)
2688 			p = frameExpandLine(ln, NULL);
2689 		else
2690 			p = frameContractLine(ln);
2691 		if (p > problem)
2692 			problem = p;
2693 		if (p == 0)
2694 			something_worked = true;
2695 	}
2696 
2697 	if (something_worked && problem < 3)
2698 		problem = 0;
2699 	if (problem == 1)
2700 		setError(expand ? MSG_NoFrame1 : MSG_NoFrame2);
2701 	if (problem == 2)
2702 		setError(MSG_FrameNoURL);
2703 	return (problem == 0);
2704 }				/* frameExpand */
2705 
2706 /* Problems: 0, frame expanded successfully.
2707  1 line is not a frame.
2708  2 frame doesn't have a valid url.
2709  3 Problem fetching the rul or rendering the page.  */
frameExpandLine(int ln,jsobjtype fo)2710 int frameExpandLine(int ln, jsobjtype fo)
2711 {
2712 	pst line;
2713 	int tagno, start;
2714 	const char *s;
2715 	char *a;
2716 	char *jssrc = 0;
2717 	Tag *t;
2718 	Frame *save_cf, *new_cf, *last_f;
2719 	uchar save_local;
2720 	Tag *cdt;	// contentDocument tag
2721 
2722 	if (fo) {
2723 		t = tagFromJavaVar(fo);
2724 		if (!t)
2725 			return 1;
2726 	} else {
2727 		line = fetchLine(ln, -1);
2728 		s = stringInBufLine((char *)line, "Frame ");
2729 		if (!s)
2730 			return 1;
2731 		if ((s = strchr(s, InternalCodeChar)) == NULL)
2732 			return 2;
2733 		tagno = strtol(s + 1, (char **)&s, 10);
2734 		if (tagno < 0 || tagno >= cw->numTags || *s != '{')
2735 			return 2;
2736 		t = tagList[tagno];
2737 	}
2738 	if (t->action != TAGACT_FRAME)
2739 		return 1;
2740 
2741 /* the easy case is if it's already been expanded before, we just unhide it. */
2742 	if (t->f1) {
2743 		if (!fo)
2744 			t->contracted = false;
2745 		return 0;
2746 	}
2747 // Check with js first, in case it changed.
2748 	if (t->jv && (a = get_property_url(t->f0, t->jv, false)) && *a) {
2749 		nzFree(t->href);
2750 		t->href = a;
2751 	}
2752 	s = t->href;
2753 
2754 // javascript in the src, what is this for?
2755 	if (s && !strncmp(s, "javascript:", 11)) {
2756 		jssrc = (char *)s;
2757 		s = 0;
2758 	}
2759 
2760 	if (!s) {
2761 // No source. If this is your request then return an error.
2762 // But if we're dipping into the objects then it needs to expand
2763 // into a separate window, a separate js space, with an empty body.
2764 		if (!fo && !jssrc)
2765 			return 2;
2766 // After expansion we need to be able to expand it,
2767 // because there's something there, well maybe.
2768 		t->href = cloneString("#");
2769 // jssrc is the old href and we are responsible for it
2770 	}
2771 
2772 	save_cf = cf = t->f0;
2773 /* have to push a new frame before we read the web page */
2774 	for (last_f = &(cw->f0); last_f->next; last_f = last_f->next) ;
2775 	last_f->next = cf = allocZeroMem(sizeof(Frame));
2776 	cf->owner = cw;
2777 	cf->frametag = t;
2778 	cf->gsn = ++gfsn;
2779 	debugPrint(2, "fetch frame %s",
2780 		   (s ? s : (jssrc ? "javascript" : "empty")));
2781 
2782 	if (s) {
2783 		bool rc = readFileArgv(s, (fo ? 2 : 1));
2784 		if (!rc) {
2785 /* serverData was never set, or was freed do to some other error. */
2786 /* We just need to pop the frame and return. */
2787 			fileSize = -1;	/* don't print 0 */
2788 			nzFree(cf->fileName);
2789 			free(cf);
2790 			last_f->next = 0;
2791 			cf = save_cf;
2792 			return 3;
2793 		}
2794 
2795        /*********************************************************************
2796 readFile could return success and yet serverData is null.
2797 This happens if httpConnect did something other than fetching data,
2798 like playing a stream. Does that happen, even in a frame?
2799 It can, if the frame is a youtube video, which is not unusual at all.
2800 So check for serverData null here. Once again we pop the frame.
2801 *********************************************************************/
2802 
2803 		if (serverData == NULL) {
2804 			nzFree(cf->fileName);
2805 			free(cf);
2806 			last_f->next = 0;
2807 			cf = save_cf;
2808 			fileSize = -1;
2809 			return 0;
2810 		}
2811 	} else {
2812 		serverData = cloneString("<body></body>");
2813 		serverDataLen = strlen(serverData);
2814 	}
2815 
2816 	new_cf = cf;
2817 	if (changeFileName) {
2818 		nzFree(cf->fileName);
2819 		cf->fileName = changeFileName;
2820 		cf->uriEncoded = true;
2821 		changeFileName = 0;
2822 	} else {
2823 		cf->fileName = cloneString(s);
2824 	}
2825 
2826 /* don't print the size of what we just fetched */
2827 	fileSize = -1;
2828 
2829 /* If we got some data it has to be html.
2830  * I should check for that, something like htmlTest in html.c,
2831  * but I'm too lazy to do that right now, so I'll just assume it's good.
2832  * Also, we have verified content-type = text/html, so that's pretty good. */
2833 
2834 	cf->hbase = cloneString(cf->fileName);
2835 	save_local = browseLocal;
2836 	browseLocal = !isURL(cf->fileName);
2837 	prepareForBrowse(serverData, serverDataLen);
2838 	if (javaOK(cf->fileName))
2839 		createJavaContext();
2840 	nzFree(newlocation);	/* should already be 0 */
2841 	newlocation = 0;
2842 
2843 	start = cw->numTags;
2844 /* call the tidy parser to build the html nodes */
2845 	html2nodes(serverData, true);
2846 	nzFree(serverData);	/* don't need it any more */
2847 	serverData = 0;
2848 	htmlGenerated = false;
2849 // in the edbrowse world, the only child of the frame tag
2850 // is the contentDocument tag.
2851 	cdt = t->firstchild;
2852 // the placeholder document node will soon be orphaned.
2853 	delete_property(cdt->f0, cdt->jv, "parentNode");
2854 	htmlNodesIntoTree(start, cdt);
2855 	cdt->step = 0;
2856 	prerender(0);
2857 
2858 /*********************************************************************
2859 At this point cdt->step is 1; the html tree is built, but not decorated.
2860 Well I put the object on cdt manually. Besides, we don't want to set up
2861 the fake cdt object and the getter that auto-expands the frame,
2862 we did that before and now it's being expanded. So bump step up to 2.
2863 *********************************************************************/
2864 	cdt->step = 2;
2865 
2866 	if (cf->docobj) {
2867 		jsobjtype topobj;
2868 		decorate(0);
2869 		set_basehref(cf->hbase);
2870 // parent points to the containing frame.
2871 		set_property_object(cf, cf->winobj, "parent", save_cf->winobj);
2872 // And top points to the top.
2873 		cf = save_cf;
2874 		topobj = get_property_object(cf, cf->winobj, "top");
2875 		cf = new_cf;
2876 		set_property_object(cf, cf->winobj, "top", topobj);
2877 		set_property_object(cf, cf->winobj, "frameElement", t->jv);
2878 		run_function_bool(cf, cf->winobj, "eb$qs$start");
2879 		if (jssrc) {
2880 			jsRunScript(cf, cf->winobj, jssrc, "frame.src", 1);
2881 		}
2882 		runScriptsPending(true);
2883 		runOnload();
2884 		runScriptsPending(false);
2885 		set_property_string(cf, cf->docobj, "readyState", "complete");
2886 		run_event_bool(cf, cf->docobj, "document", "onreadystatechange");
2887 		runScriptsPending(false);
2888 		rebuildSelectors();
2889 	}
2890 	nzFree(jssrc);
2891 
2892 	if (cf->fileName) {
2893 		int j = strlen(cf->fileName);
2894 		cf->fileName = reallocMem(cf->fileName, j + 8);
2895 		strcat(cf->fileName, ".browse");
2896 	}
2897 
2898 	t->f1 = cf;
2899 	cf = save_cf;
2900 	browseLocal = save_local;
2901 	if (fo)
2902 		t->contracted = true;
2903 	if (new_cf->docobj) {
2904 		jsobjtype cdo;	// contentDocument object
2905 		jsobjtype cwo;	// contentWindow object
2906 		jsobjtype cna;	// childNodes array
2907 		cdo = new_cf->docobj;
2908 		disconnectTagObject(cdt);
2909 		connectTagObject(cdt, cdo);
2910 		cdt->style = 0;
2911 // Should I switch this tag into the new frame? I don't really know.
2912 		cdt->f0 = new_cf;
2913 		set_property_object(new_cf, t->jv, "content$Document", cdo);
2914 		cna = get_property_object(t->f0, t->jv, "childNodes");
2915 		set_array_element_object(t->f0, cna, 0, cdo);
2916 // Should we do this? For consistency I guess yes.
2917 		set_property_object(t->f0, cdo, "parentNode", t->jv);
2918 		cwo = new_cf->winobj;
2919 		set_property_object(new_cf, t->jv, "content$Window", cwo);
2920 // run the frame onload function if it is there.
2921 // I assume it should run in the higher frame.
2922 		run_event_bool(t->f0, t->jv, t->info->name, "onload");
2923 	}
2924 
2925 	return 0;
2926 }				/* frameExpandLine */
2927 
frameContractLine(int ln)2928 static int frameContractLine(int ln)
2929 {
2930 	Tag *t = line2frame(ln);
2931 	if (!t)
2932 		return 1;
2933 	t->contracted = true;
2934 	return 0;
2935 }				/* frameContractLine */
2936 
line2frame(int ln)2937 Tag *line2frame(int ln)
2938 {
2939 	const char *line;
2940 	int n, opentag = 0, ln1 = ln;
2941 	const char *s;
2942 
2943 	for (; ln; --ln) {
2944 		line = (char *)fetchLine(ln, -1);
2945 		if (!opentag && ln < ln1
2946 		    && (s = stringInBufLine(line, "*--`\n"))) {
2947 			for (--s; s > line && *s != InternalCodeChar; --s) ;
2948 			if (*s == InternalCodeChar)
2949 				opentag = atoi(s + 1);
2950 			continue;
2951 		}
2952 		s = stringInBufLine(line, "*`--\n");
2953 		if (!s)
2954 			continue;
2955 		for (--s; s > line && *s != InternalCodeChar; --s) ;
2956 		if (*s != InternalCodeChar)
2957 			continue;
2958 		n = atoi(s + 1);
2959 		if (!opentag)
2960 			return tagList[n];
2961 		if (n == opentag)
2962 			opentag = 0;
2963 	}
2964 
2965 	return 0;
2966 }				/* line2frame */
2967 
2968 /* a text line in the buffer isn't a string; you can't use strstr */
stringInBufLine(const char * s,const char * t)2969 static const char *stringInBufLine(const char *s, const char *t)
2970 {
2971 	int n = strlen(t);
2972 	for (; *s != '\n'; ++s) {
2973 		if (!strncmp(s, t, n))
2974 			return s;
2975 	}
2976 	return 0;
2977 }				/* stringInBufLine */
2978 
reexpandFrame(void)2979 bool reexpandFrame(void)
2980 {
2981 	int j, start;
2982 	Tag *frametag;
2983 	Tag *cdt;	// contentDocument tag
2984 	uchar save_local;
2985 	bool rc;
2986 	jsobjtype save_top, save_parent, save_fe;
2987 
2988 	cf = newloc_f;
2989 	frametag = cf->frametag;
2990 	cdt = frametag->firstchild;
2991 	save_top = get_property_object(cf, cf->winobj, "top");
2992 	save_parent = get_property_object(cf, cf->winobj, "parent");
2993 	save_fe = get_property_object(cf, cf->winobj, "frameElement");
2994 
2995 // Cut away our tree nodes from the previous document, which are now inaccessible.
2996 	underKill(cdt);
2997 
2998 // the previous document node will soon be orphaned.
2999 	delete_property(cf, cdt->jv, "parentNode");
3000 
3001 	delTimers(cf);
3002 	freeJavaContext(cf);
3003 	nzFree(cf->dw);
3004 	cf->dw = 0;
3005 	nzFree(cf->hbase);
3006 	cf->hbase = 0;
3007 	nzFree(cf->fileName);
3008 	cf->fileName = newlocation;
3009 	newlocation = 0;
3010 	cf->uriEncoded = false;
3011 	nzFree(cf->firstURL);
3012 	cf->firstURL = 0;
3013 	rc = readFileArgv(cf->fileName, 2);
3014 	if (!rc) {
3015 /* serverData was never set, or was freed do to some other error. */
3016 		fileSize = -1;	/* don't print 0 */
3017 		return false;
3018 	}
3019 
3020 	if (serverData == NULL) {
3021 /* frame replaced itself with a playable stream, what to do? */
3022 		fileSize = -1;
3023 		return true;
3024 	}
3025 
3026 	if (changeFileName) {
3027 		nzFree(cf->fileName);
3028 		cf->fileName = changeFileName;
3029 		cf->uriEncoded = true;
3030 		changeFileName = 0;
3031 	}
3032 
3033 	/* don't print the size of what we just fetched */
3034 	fileSize = -1;
3035 
3036 	cf->hbase = cloneString(cf->fileName);
3037 	save_local = browseLocal;
3038 	browseLocal = !isURL(cf->fileName);
3039 	prepareForBrowse(serverData, serverDataLen);
3040 	if (javaOK(cf->fileName))
3041 		createJavaContext();
3042 
3043 	start = cw->numTags;
3044 /* call the tidy parser to build the html nodes */
3045 	html2nodes(serverData, true);
3046 	nzFree(serverData);	/* don't need it any more */
3047 	serverData = 0;
3048 	htmlGenerated = false;
3049 	htmlNodesIntoTree(start, cdt);
3050 	cdt->step = 0;
3051 	prerender(0);
3052 	cdt->step = 2;
3053 	if (cf->docobj) {
3054 		decorate(0);
3055 		set_basehref(cf->hbase);
3056 		set_property_object(cf, cf->winobj, "top", save_top);
3057 		set_property_object(cf, cf->winobj, "parent", save_parent);
3058 		set_property_object(cf, cf->winobj, "frameElement", save_fe);
3059 		run_function_bool(cf, cf->winobj, "eb$qs$start");
3060 		runScriptsPending(true);
3061 		runOnload();
3062 		runScriptsPending(false);
3063 		set_property_string(cf, cf->docobj, "readyState", "complete");
3064 		run_event_bool(cf, cf->docobj, "document", "onreadystatechange");
3065 		runScriptsPending(false);
3066 		rebuildSelectors();
3067 	}
3068 
3069 	j = strlen(cf->fileName);
3070 	cf->fileName = reallocMem(cf->fileName, j + 8);
3071 	strcat(cf->fileName, ".browse");
3072 	browseLocal = save_local;
3073 
3074 	if (cf->docobj) {
3075 		Frame *save_cf;
3076 		jsobjtype cdo;	// contentDocument object
3077 		jsobjtype cwo;	// contentWindow object
3078 		jsobjtype cna;	// childNodes array
3079 		cdo = cf->docobj;
3080 		cwo = cf->winobj;
3081 		disconnectTagObject(cdt);
3082 		connectTagObject(cdt, cdo);
3083 		cdt->style = 0;
3084 // Should I switch this tag into the new frame? I don't really know.
3085 		cdt->f0 = cf;
3086 // have to point contentDocument to the new document object,
3087 // but that requires a change of context.
3088 		save_cf = cf;
3089 		cf = frametag->f0;
3090 		set_property_object(cf, frametag->jv, "content$Document", cdo);
3091 		cna = get_property_object(cf, frametag->jv, "childNodes");
3092 		set_array_element_object(cf, cna, 0, cdo);
3093 // Should we do this? For consistency I guess yes.
3094 		set_property_object(cf, cdo, "parentNode", frametag->jv);
3095 		set_property_object(cf, frametag->jv, "content$Window", cwo);
3096 		cf = save_cf;
3097 	}
3098 
3099 	return true;
3100 }				/* reexpandFrame */
3101 
3102 // Make sure a web page is not trying to read a local file.
frameSecurityFile(const char * thisfile)3103 bool frameSecurityFile(const char *thisfile)
3104 {
3105 	Frame *f = &cf->owner->f0;
3106 	for (; f != cf; f = f->next) {
3107 		if (!isURL(f->fileName))
3108 			continue;
3109 		setError(MSG_NoAccessSecure, thisfile);
3110 		return false;
3111 	}
3112 	return true;
3113 }
3114 
3115 static bool remember_contracted;
3116 
3117 // Undo the above,as though the frame were never expanded.
unframe(jsobjtype fobj,jsobjtype newdoc)3118 void unframe(jsobjtype fobj, jsobjtype newdoc)
3119 {
3120 	int i, n;
3121 	Tag *t, *cdt;
3122 	jsobjtype cdo;
3123 	Frame *f, *f1;
3124 
3125 	t = tagFromJavaVar(fobj);
3126 	if (!t) {
3127 		debugPrint(1, "unframe couldn't find tag");
3128 		return;
3129 	}
3130 	if (!(cdt = t->firstchild) || cdt->action != TAGACT_DOC || cdt->sibling
3131 	    || !(cdo = cdt->jv)) {
3132 		debugPrint(1, "unframe child tag isn't right");
3133 		return;
3134 	}
3135 	underKill(cdt);
3136 	disconnectTagObject(cdt);
3137 	connectTagObject(cdt, newdoc);
3138 
3139 	f1 = t->f1;
3140 	t->f1 = 0;
3141 	remember_contracted = t->contracted;
3142 	if (f1 == cf) {
3143 		debugPrint(1,
3144 			   "deleting the current frame, this shouldn't happen, edbrowse is corrupt");
3145 		return;
3146 	}
3147 	for (f = &(cw->f0); f; f = f->next)
3148 		if (f->next == f1)
3149 			break;
3150 	if (!f) {
3151 		debugPrint(1, "unframe can't find prior frame to relink");
3152 		return;
3153 	}
3154 	f->next = f1->next;
3155 	delTimers(f1);
3156 	freeJavaContext(f1);
3157 	nzFree(f1->dw);
3158 	nzFree(f1->hbase);
3159 	nzFree(f1->fileName);
3160 	nzFree(f1->firstURL);
3161 	free(f1);
3162 
3163 // cdt use to belong to f1, which no longer exists.
3164 	cdt->f0 = f;		// back to its parent frame
3165 
3166 // A running frame could create nodes in its parent frame, or any other frame.
3167 	n = 0;
3168 	for (i = 0; i < cw->numTags; ++i) {
3169 		t = tagList[i];
3170 		if (t->f0 == f1)
3171 			t->f0 = f, ++n;
3172 	}
3173 	if (n)
3174 		debugPrint(3, "%d nodes pushed up to the parent frame", n);
3175 }
3176 
unframe2(jsobjtype fobj)3177 void unframe2(jsobjtype fobj)
3178 {
3179 	Tag *t = tagFromJavaVar(fobj);
3180 	t->contracted = remember_contracted;
3181 }
3182