1 /* http.c
2 * HTTP protocol client implementation
3 * This file is part of the edbrowse project, released under GPL.
4 */
5
6 #include "eb.h"
7
8 #ifdef _MSC_VER
9 #include <fcntl.h>
10 #else
11 #include <signal.h>
12 #endif
13 #include <time.h>
14
15 char *serverData;
16 int serverDataLen;
17 CURL *global_http_handle;
18 CURLSH *global_share_handle;
19 bool pluginsOn = true;
20 bool down_bg; // download in background
21 bool down_jsbg = true; // download js in background
22 char showProgress = 'd'; // dots
23 static char *httpLanguage; /* outgoing */
24
25 struct BG_JOB {
26 struct BG_JOB *next, *prev;
27 int state;
28 size_t fsize; // file size
29 int file2; // offset into filename
30 char file[4];
31 };
32 static struct listHead down_jobs = {
33 &down_jobs, &down_jobs
34 };
35
36 static void setup_download(struct i_get *g);
37 static CURL *http_curl_init(struct i_get *g);
38 static size_t curl_header_callback(char *header_line, size_t size, size_t nmemb,
39 struct i_get *g);
40 static bool ftpConnect(struct i_get *g, char *creds_buf);
41 static bool gopherConnect(struct i_get *g);
42 static bool read_credentials(char *buffer);
43 static const char *message_for_response_code(int code);
44
45 /* string is allocated. Quotes are removed. No other processing is done.
46 * You may need to decode %xx bytes or such. */
find_http_header(struct i_get * g,const char * name)47 static char *find_http_header(struct i_get *g, const char *name)
48 {
49 char *s, *t, *u, *v;
50 int namelen = strlen(name);
51 char *h = g->headers;
52 if (!h)
53 return NULL;
54 for (s = h; *s; s = v) {
55 /* find start of next line */
56 v = strchr(s, '\n');
57 if (!v)
58 break;
59 ++v;
60
61 /* name: value */
62 t = strchr(s, ':');
63 if (!t || t >= v)
64 continue;
65 u = t;
66 while (u > s && isspace(u[-1]))
67 --u;
68 if (u - s != namelen)
69 continue;
70 if (!memEqualCI(s, name, namelen))
71 continue;
72
73 /* This is a match */
74 ++t;
75 while (t < v && isspace(*t))
76 ++t;
77 u = v;
78 while (u > t && isspace(u[-1]))
79 --u;
80 /* remove quotes */
81 if (u - t >= 2 && *t == u[-1] && (*t == '"' || *t == '\''))
82 ++t, --u;
83 if (u == t)
84 return NULL;
85 return pullString(t, u - t);
86 }
87
88 return NULL;
89 } /* find_http_header */
90
scan_http_headers(struct i_get * g,bool fromCallback)91 static void scan_http_headers(struct i_get *g, bool fromCallback)
92 {
93 char *v;
94
95 if (!g->content[0] && (v = find_http_header(g, "content-type"))) {
96 strncpy(g->content, v, sizeof(g->content) - 1);
97 caseShift(g->content, 'l');
98 nzFree(v);
99 debugPrint(3, "content %s", g->content);
100 g->charset = strchr(g->content, ';');
101 if (g->charset)
102 *(g->charset)++ = 0;
103 if (stringEqual(g->content, "text/html"))
104 g->csp = true;
105 else if (g->pg_ok && !cf->mt)
106 cf->mt = findMimeByContent(g->content);
107 }
108
109 if (!g->cdfn && (v = find_http_header(g, "content-disposition"))) {
110 char *s = strstrCI(v, "filename=");
111 if (s && !strncmp(v, "attachment", 10)) {
112 s += 9;
113 if (*s == '"') {
114 char *t;
115 ++s;
116 t = strchr(s, '"');
117 if (t)
118 *t = 0;
119 }
120 g->cdfn = cloneString(s);
121 debugPrint(4, "disposition filename %s", g->cdfn);
122 // I'm not ready to do this part yet.
123 #if 0
124 if (g->pg_ok && !cf->mt)
125 cf->mt = findMimeByFile(g->cdfn);
126 #endif
127 }
128 nzFree(v);
129 }
130
131 if (!g->hcl && (v = find_http_header(g, "content-length"))) {
132 sscanf(v, "%lld", &g->hcl);
133 nzFree(v);
134 if (g->hcl)
135 debugPrint(4, "content length %lld", g->hcl);
136 }
137
138 if (!g->etag && (v = find_http_header(g, "etag"))) {
139 g->etag = v;
140 debugPrint(4, "etag %s", g->etag);
141 }
142
143 if (g->cacheable && (v = find_http_header(g, "cache-control"))) {
144 caseShift(v, 'l');
145 if (strstr(v, "no-cache")) {
146 g->cacheable = false;
147 debugPrint(4, "no cache");
148 }
149 nzFree(v);
150 }
151
152 if (g->cacheable && (v = find_http_header(g, "pragma"))) {
153 caseShift(v, 'l');
154 if (strstr(v, "no-cache")) {
155 g->cacheable = false;
156 debugPrint(4, "no cache");
157 }
158 nzFree(v);
159 }
160
161 if (!g->modtime && (v = find_http_header(g, "last-modified"))) {
162 g->modtime = parseHeaderDate(v);
163 if (g->modtime)
164 debugPrint(4, "mod date %s", v);
165 nzFree(v);
166 }
167 if (!g->auth_realm[0] && (v = find_http_header(g, "WWW-Authenticate"))) {
168 char *realm, *end;
169 if ((realm = strstrCI(v, "realm="))) {
170 realm += 6;
171 if (realm[0] == '"' || realm[0] == '\'') {
172 end = strchr(realm + 1, realm[0]);
173 realm++;
174 } else {
175 /* look for space if unquoted */
176 end = strchr(realm, ' ');
177 }
178 if (end) {
179 int sz = end - realm;
180 if (sz > sizeof(g->auth_realm) - 1)
181 sz = sizeof(g->auth_realm) - 1;
182 memcpy(g->auth_realm, realm, sz);
183 g->auth_realm[sz] = 0;
184 } else {
185 strncpy(g->auth_realm, realm,
186 sizeof(g->auth_realm) - 1);
187 }
188 debugPrint(4, "auth realm %s", g->auth_realm);
189 }
190 nzFree(v);
191 }
192
193 if (fromCallback)
194 return;
195
196 if (!g->newloc && (v = find_http_header(g, "location"))) {
197 // as though a user had typed it in
198 unpercentURL(v);
199 g->newloc = v;
200 }
201
202 if (!g->newloc && (v = find_http_header(g, "refresh"))) {
203 int delay;
204 if (parseRefresh(v, &delay)) {
205 unpercentURL(v);
206 g->newloc = v;
207 g->newloc_d = delay;
208 v = NULL;
209 }
210 nzFree(v);
211 }
212 } /* scan_http_headers */
213
i_get_free(struct i_get * g,bool nodata)214 static void i_get_free(struct i_get *g, bool nodata)
215 {
216 if (nodata) {
217 nzFree(g->buffer);
218 g->buffer = 0;
219 g->length = 0;
220 }
221 nzFree(g->headers);
222 nzFree(g->urlcopy);
223 nzFree(g->cdfn);
224 nzFree(g->etag);
225 nzFree(g->newloc);
226 cnzFree(g->down_file);
227 // should not be necessary, but just to be safe:
228 g->headers = g->urlcopy = g->cdfn = g->etag = g->newloc = 0;
229 g->down_file = 0;
230 if (g->down_fd > 0) {
231 close(g->down_fd);
232 g->down_fd = 0;
233 }
234 }
235
236 /* actually run the curl request, http or ftp or whatever */
fetch_internet(struct i_get * g)237 static CURLcode fetch_internet(struct i_get *g)
238 {
239 CURLcode curlret;
240 g->buffer = initString(&g->length);
241 g->headers = initString(&g->headers_len);
242 curlret = curl_easy_perform(g->h);
243 if (g->is_http)
244 scan_http_headers(g, false);
245 return curlret;
246 } /* fetch_internet */
247
248 /* Callback used by libcurl. Captures data from http, ftp, pop3, gopher.
249 * download states:
250 * -1 user aborted the download
251 * 0 standard in-memory download
252 * 1 download but stop and ask user if he wants to download to disk
253 * 2 disk download in foreground
254 * 3 disk download parent thread
255 * 4 disk download child thread
256 * 5 disk download before the thread is spawned
257 * 6 mime type says this should be a stream */
258 size_t
eb_curl_callback(char * incoming,size_t size,size_t nitems,struct i_get * g)259 eb_curl_callback(char *incoming, size_t size, size_t nitems, struct i_get * g)
260 {
261 size_t num_bytes = nitems * size;
262 int dots1, dots2, rc;
263
264 if (g->down_state == 1 && g->is_http) {
265 /* don't do a download unless the code is 200. */
266 curl_easy_getinfo(g->h, CURLINFO_RESPONSE_CODE, &(g->code));
267 if (g->code != 200)
268 g->down_state = 0;
269 }
270
271 if (g->down_state == 1) {
272 if (g->hcl == 0) {
273 // http should always set http content length, this is just for ftp.
274 // And ftp downloading a file always has state = 1 on the first data block.
275 double d_size = 0.0; // download size, if we can get it
276 curl_easy_getinfo(g->h,
277 CURLINFO_CONTENT_LENGTH_DOWNLOAD,
278 &d_size);
279 g->hcl = d_size;
280 if (g->hcl < 0)
281 g->hcl = 0;
282 }
283
284 /* state 1, first data block, ask the user */
285 setup_download(g);
286 if (g->down_state == 0)
287 goto showdots;
288 if (g->down_state == -1 || g->down_state == 5)
289 return -1;
290 }
291
292 if (g->down_state == 2 || g->down_state == 4) { /* to disk */
293 rc = write(g->down_fd, incoming, num_bytes);
294 if (rc == num_bytes) {
295 if (g->down_state == 4) {
296 #if 0
297 // Deliberately delay background download, to get several running in parallel
298 // for testing purposes.
299 if (g->down_length == 0)
300 sleep(12);
301 g->down_length += rc;
302 #endif
303 return rc;
304 }
305 goto showdots;
306 }
307 if (g->down_state == 2) {
308 // has to be the foreground http thread, so ok to call setErro,
309 // which is not threadsafe.
310 setError(MSG_NoWrite2, g->down_file);
311 } else {
312 i_printf(MSG_NoWrite2, g->down_file);
313 printf(", ");
314 i_puts(MSG_DownAbort);
315 }
316 return -1;
317 }
318
319 showdots:
320 dots1 = g->length / CHUNKSIZE;
321 if (g->down_state == 0)
322 stringAndBytes(&g->buffer, &g->length, incoming, num_bytes);
323 else
324 g->length += num_bytes;
325 dots2 = g->length / CHUNKSIZE;
326 // showing dots in parallel background download threads
327 // gets jumbled and doesn't mean anything.
328 if (showProgress != 'q' && dots1 < dots2 && !g->down_force) {
329 if (showProgress == 'd') {
330 for (; dots1 < dots2; ++dots1)
331 putchar('.');
332 fflush(stdout);
333 }
334 if (showProgress == 'c' && g->hcl)
335 printf("%d/%d\n", dots2,
336 (int)((g->hcl + CHUNKSIZE - 1) / CHUNKSIZE));
337 }
338 return num_bytes;
339 }
340
341 /* We want to be able to abort transfers when SIGINT is received.
342 * During data transfers, libcurl ignores EINTR. So there's no obvious way
343 * to abort a transfer on SIGINT.
344 * However, libcurl does call a function periodically, to indicate the
345 * progress of the transfer. If the progress function returns a non-zero
346 * value, then libcurl aborts the transfer. The nice thing about libcurl
347 * is that it uses timeouts when reading and writing. It won't block
348 * forever in some system call.
349 * We can be certain that libcurl will, in fact, call the progress function
350 * periodically.
351 * Note: libcurl doesn't start calling the progress function until after the
352 * connection is made. So it can block indefinitely during connect().
353 * All of the progress arguments to the function are unused. */
354
355 static int
curl_progress(void * data_p,double dl_total,double dl_now,double ul_total,double ul_now)356 curl_progress(void *data_p, double dl_total, double dl_now,
357 double ul_total, double ul_now)
358 {
359 struct i_get *g = data_p;
360 int ret = 0;
361 // ^c will interrupt an http or ftp download but not a background download
362 if (intFlag && g->down_force != 1) {
363 if (g->down_force == 0)
364 i_puts(MSG_Interrupted);
365 ret = 1;
366 }
367 return ret;
368 } /* curl_progress */
369
370 static void
unpackUploadedFile(const char * post,const char * boundary,char ** postb,int * postb_l)371 unpackUploadedFile(const char *post, const char *boundary,
372 char **postb, int *postb_l)
373 {
374 static const char message64[] = "Content-Transfer-Encoding: base64";
375 const int boundlen = strlen(boundary);
376 const int m64len = strlen(message64);
377 char *post2;
378 char *b1, *b2, *b3, *b4; /* boundary points */
379 int unpack_ret;
380
381 *postb = 0;
382 *postb_l = 0;
383 if (!strstr(post, message64))
384 return;
385
386 post2 = cloneString(post);
387 b2 = strstr(post2, boundary);
388 while (true) {
389 b1 = b2 + boundlen;
390 if (*b1 != '\r')
391 break;
392 b1 += 2;
393 b1 = strstr(b1, "Content-Transfer");
394 b2 = strstr(b1, boundary);
395 if (memcmp(b1, message64, m64len))
396 continue;
397 b1 += m64len - 6;
398 strcpy(b1, "8bit\r\n\r\n");
399 b1 += 8;
400 b1[0] = b1[1] = ' ';
401 b3 = b2 - 4;
402
403 b4 = b3;
404 unpack_ret = base64Decode(b1, &b4);
405 if (unpack_ret != GOOD_BASE64_DECODE)
406 mail64Error(unpack_ret);
407 /* Should we *really* keep going at this point? */
408 strmove(b4, b3);
409 b2 = b4 + 4;
410 }
411
412 b1 += strlen(b1);
413 *postb = post2;
414 *postb_l = b1 - post2;
415 } /* unpackUploadedFile */
416
417 // Date format is: Mon, 03 Jan 2000 21:29:33 GMT|[+-]nnnn
418 // Or perhaps: Sun Nov 6 08:49:37 1994
419 // or perhaps: 1994-11-06 08:49:37.nnnnZ
420 // or perhaps 06-Jun-2018 21:47:09 +nnnn
parseHeaderDate(const char * date)421 time_t parseHeaderDate(const char *date)
422 {
423 static const char *const months[12] = {
424 "Jan", "Feb", "Mar", "Apr", "May", "Jun",
425 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
426 };
427 time_t t = 0;
428 int zone = 0;
429 time_t now = 0;
430 int y; // the type of format, 0 through 3
431 int m; // month
432 struct tm *temptm = NULL;
433 struct tm tm;
434 long utcoffset = 0;
435 const char *date0 = date; // remember for debugging
436 memset(&tm, 0, sizeof(struct tm));
437 tm.tm_isdst = -1;
438
439 now = time(NULL);
440 temptm = localtime(&now);
441 if (temptm == NULL)
442 goto fail;
443 #ifndef _MSC_VER
444 utcoffset = temptm->tm_gmtoff;
445 #endif
446
447 if (isdigitByte(date[0]) && isdigitByte(date[1]) &&
448 date[2] == '-' && isalphaByte(date[3])) {
449 y = 3;
450 tm.tm_mday = atoi(date);
451 date += 3;
452 for (m = 0; m < 12; m++)
453 if (memEqualCI(date, months[m], 3))
454 goto f5;
455 goto fail;
456 f5:
457 tm.tm_mon = m;
458 date += 3;
459 if (*date != '-' || !isdigitByte(date[1]))
460 goto fail;
461 tm.tm_year = atoi(date + 1) - 1900;
462 date += 5;
463 while (*date == ' ')
464 ++date;
465 goto f3;
466 }
467
468 if (isdigitByte(date[0]) && isdigitByte(date[1]) &&
469 isdigitByte(date[2]) && isdigitByte(date[3]) && date[4] == '-') {
470 y = 2;
471 tm.tm_year = atoi(date + 0) - 1900;
472 tm.tm_mon = atoi(date + 5) - 1;
473 tm.tm_mday = atoi(date + 8);
474 date += 11;
475 goto f3;
476 }
477
478 /* skip past day of the week */
479 date = strchr(date, ' ');
480 if (!date)
481 goto fail;
482 date++;
483
484 if (isdigitByte(*date)) { /* first format */
485 y = 0;
486 if (isdigitByte(date[1])) {
487 tm.tm_mday = (date[0] - '0') * 10 + date[1] - '0';
488 date += 2;
489 } else {
490 tm.tm_mday = *date - '0';
491 ++date;
492 }
493 if (*date != ' ' && *date != '-')
494 goto fail;
495 ++date;
496 for (m = 0; m < 12; m++)
497 if (memEqualCI(date, months[m], 3))
498 goto f1;
499 goto fail;
500 f1:
501 tm.tm_mon = m;
502 date += 3;
503 if (*date == ' ') {
504 date++;
505 if (!isdigitByte(date[0]) || !isdigitByte(date[1]) ||
506 !isdigitByte(date[2]) || !isdigitByte(date[3]))
507 goto fail;
508 tm.tm_year =
509 (date[0] - '0') * 1000 + (date[1] - '0') * 100 +
510 (date[2] - '0') * 10 + date[3] - '0' - 1900;
511 date += 4;
512 } else if (*date == '-') {
513 /* Sunday, 06-Nov-94 08:49:37 GMT */
514 date++;
515 if (!isdigitByte(date[0]) || !isdigitByte(date[1]))
516 goto fail;
517 if (!isdigitByte(date[2])) {
518 tm.tm_year =
519 (date[0] >=
520 '7' ? 1900 : 2000) + (date[0] - '0') * 10 +
521 date[1] - '0' - 1900;
522 date += 2;
523 } else {
524 tm.tm_year = atoi(date) - 1900;
525 date += 4;
526 }
527 } else
528 goto fail;
529 if (*date != ' ')
530 goto fail;
531 date++;
532 } else {
533 /* second format */
534 y = 1;
535 for (m = 0; m < 12; m++)
536 if (memEqualCI(date, months[m], 3))
537 goto f2;
538 goto fail;
539 f2:
540 tm.tm_mon = m;
541 date += 3;
542 while (*date == ' ')
543 date++;
544 if (!isdigitByte(date[0]))
545 goto fail;
546 tm.tm_mday = date[0] - '0';
547 date++;
548 if (*date != ' ') {
549 if (!isdigitByte(date[0]))
550 goto fail;
551 tm.tm_mday = tm.tm_mday * 10 + date[0] - '0';
552 date++;
553 }
554 if (*date != ' ')
555 goto fail;
556 date++;
557 }
558
559 f3:
560 /* ready to crack time */
561 if (!isdigitByte(date[0]) || !isdigitByte(date[1]))
562 goto fail;
563 tm.tm_hour = (date[0] - '0') * 10 + date[1] - '0';
564 date += 2;
565 if (*date != ':')
566 goto fail;
567 date++;
568 if (!isdigitByte(date[0]) || !isdigitByte(date[1]))
569 goto fail;
570 tm.tm_min = (date[0] - '0') * 10 + date[1] - '0';
571 date += 2;
572 if (*date != ':')
573 goto fail;
574 date++;
575 if (!isdigitByte(date[0]) || !isdigitByte(date[1]))
576 goto fail;
577 tm.tm_sec = (date[0] - '0') * 10 + date[1] - '0';
578 date += 2;
579 if (y == 2)
580 goto f4;
581
582 if (y == 1) {
583 /* year is at the end */
584 if (*date != ' ')
585 goto fail;
586 date++;
587 if (!isdigitByte(date[0]) || !isdigitByte(date[1]) ||
588 !isdigitByte(date[2]) || !isdigitByte(date[3]))
589 goto fail;
590 tm.tm_year =
591 (date[0] - '0') * 1000 + (date[1] - '0') * 100 + (date[2] -
592 '0') *
593 10 + date[3] - '0' - 1900;
594 date += 4;
595 }
596
597 if (*date != ' ' && *date)
598 goto fail;
599
600 while (*date == ' ')
601 ++date;
602 if ((*date == '+' || *date == '-') &&
603 isdigit(date[1]) && isdigit(date[2]) &&
604 isdigit(date[3]) && isdigit(date[4])) {
605 zone = 10 * (date[1] - '0') + date[2] - '0';
606 zone *= 60;
607 zone += 10 * (date[3] - '0') + date[4] - '0';
608 zone *= 60;
609 /* adjust to gmt */
610 if (*date == '+')
611 zone = -zone;
612 }
613
614 f4:
615 t = mktime(&tm);
616 if (t != (time_t) - 1)
617 return t + zone + utcoffset;
618
619 fail:
620 debugPrint(3, "parseHeaderDate fails on %s", date0);
621 return 0;
622 } /* parseHeaderDate */
623
parseRefresh(char * ref,int * delay_p)624 bool parseRefresh(char *ref, int *delay_p)
625 {
626 int delay = 0;
627 char *u = ref;
628 if (isdigitByte(*u))
629 delay = atoi(u);
630 while (isdigitByte(*u) || *u == '.')
631 ++u;
632 if (*u == ';')
633 ++u;
634 while (*u == ' ')
635 ++u;
636 if (memEqualCI(u, "url=", 4)) {
637 char qc;
638 u += 4;
639 while (isspace(*u))
640 ++u;
641 qc = *u;
642 if (qc == '"' || qc == '\'')
643 ++u;
644 else
645 qc = 0;
646 strmove(ref, u);
647 u = ref + strlen(ref);
648 if (u > ref && u[-1] == qc)
649 u[-1] = 0;
650 debugPrint(3, "delay %d %s", delay, ref);
651 /* avoid the obvious infinite loop */
652 if (sameURL(ref, cf->fileName)) {
653 *delay_p = 0;
654 return false;
655 }
656 *delay_p = delay;
657 return true;
658 }
659 i_printf(MSG_GarbledRefresh, ref);
660 *delay_p = 0;
661 return false;
662 } /* parseRefresh */
663
shortRefreshDelay(const char * r,int d)664 bool shortRefreshDelay(const char *r, int d)
665 {
666 /* the value 10 seconds is somewhat arbitrary */
667 if (d < 10)
668 return true;
669 i_printf(MSG_RedirectDelayed, r, d);
670 return false;
671 } /* shortRefreshDelay */
672
673 // encode the url, if it was supplied by the user.
674 // Otherwise just make a copy.
675 // Either way there is room for one more char at the end.
urlSanitize(struct i_get * g,const char * post)676 static void urlSanitize(struct i_get *g, const char *post)
677 {
678 const char *portloc;
679 const char *url = g->url;
680
681 if (g->uriEncoded && !looksPercented(url, post)) {
682 debugPrint(2, "Warning, url %s doesn't look encoded", url);
683 g->uriEncoded = false;
684 }
685
686 if (!g->uriEncoded) {
687 g->urlcopy = percentURL(url, post);
688 g->urlcopy_l = strlen(g->urlcopy);
689 } else {
690 char *frag;
691 if (post)
692 g->urlcopy_l = post - url;
693 else
694 g->urlcopy_l = strlen(url);
695 g->urlcopy = allocMem(g->urlcopy_l + 2);
696 strncpy(g->urlcopy, url, g->urlcopy_l);
697 g->urlcopy[g->urlcopy_l] = 0;
698 // percentURL strips off the hash, so we need to here.
699 frag = findHash(g->urlcopy);
700 if (frag)
701 *frag = 0;
702 }
703
704 // get rid of : in http://this.that.com:/path, curl can't handle it.
705 getPortLocURL(g->urlcopy, &portloc, 0);
706 if (portloc && !isdigit(portloc[1])) {
707 const char *s = portloc + strcspn(portloc, "/?#\1");
708 strmove((char *)portloc, s);
709 g->urlcopy_l = strlen(g->urlcopy);
710 }
711 } /* urlSanitize */
712
httpConnect(struct i_get * g)713 bool httpConnect(struct i_get *g)
714 {
715 const char *url = g->url;
716 char *cacheData = NULL;
717 int cacheDataLen = 0;
718 CURL *h; // the curl http handle
719 char *referrer = NULL;
720 CURLcode curlret = CURLE_OK;
721 struct curl_slist *custom_headers = NULL;
722 struct curl_slist *tmp_headers = NULL;
723 const struct MIMETYPE *mt;
724 char creds_buf[MAXUSERPASS * 2 + 2]; /* creds abr. for credentials */
725 bool still_fetching = true;
726 char prot[MAXPROTLEN], host[MAXHOSTLEN];
727 const char *post, *s;
728 char *postb = NULL;
729 int postb_l = 0;
730 bool transfer_status = false;
731 bool proceed_unauthenticated = false;
732 int redirect_count = 0;
733 bool post_request = false;
734 bool head_request = false;
735 uchar sxfirst = 0;
736 int n;
737
738 if (!getProtHostURL(url, prot, host)) {
739 // only the foreground http thread uses setError,
740 // the traditional /bin/ed error system.
741 if (g->foreground)
742 setError(MSG_DomainEmpty);
743 return false;
744 }
745 // plugins can only be ok from one thread, the interactive thread
746 // that calls up web pages at the user's behest.
747 // None of this machinery need be threadsafe.
748 if (g->pg_ok && (cf->mt = mt = findMimeByURL(url, &sxfirst)) &&
749 !(mt->from_file | mt->down_url) && !(mt->outtype && g->playonly)) {
750 char *f;
751 urlSanitize(g, 0);
752 mimestream:
753 // don't have to fetch the data, the program can handle it.
754 nzFree(g->buffer);
755 g->buffer = 0;
756 g->code = 200;
757 f = g->urlcopy;
758 if (mt->outtype) {
759 runPluginCommand(mt, f, 0, 0, 0, &g->buffer,
760 &g->length);
761 cf->render1 = true;
762 if (sxfirst)
763 cf->render2 = true;
764 i_get_free(g, false);
765 } else {
766 runPluginCommand(mt, f, 0, 0, 0, 0, 0);
767 i_get_free(g, true);
768 }
769 return true;
770 }
771
772 /* Pull user password out of the url */
773 n = getCredsURL(url, creds_buf);
774 if (n == 1) {
775 if (g->foreground)
776 setError(MSG_UserNameLong, MAXUSERPASS);
777 return false;
778 }
779 if (n == 2) {
780 if (g->foreground)
781 setError(MSG_PasswordLong, MAXUSERPASS);
782 return false;
783 }
784 unpercentString(creds_buf);
785
786 if (!curlActive) {
787 eb_curl_global_init();
788 cookiesFromJar();
789 setupEdbrowseCache();
790 }
791
792 if (stringEqualCI(prot, "http") || stringEqualCI(prot, "https")) {
793 ; /* ok for now */
794 } else if (stringEqualCI(prot, "gopher")) {
795 return gopherConnect(g);
796 } else if (stringEqualCI(prot, "ftp") ||
797 stringEqualCI(prot, "ftps") ||
798 stringEqualCI(prot, "scp") ||
799 stringEqualCI(prot, "tftp") || stringEqualCI(prot, "sftp")) {
800 return ftpConnect(g, creds_buf);
801 } else {
802 if (g->foreground)
803 setError(MSG_WebProtBad, prot);
804 else if (debugLevel >= 3) {
805 i_printf(MSG_WebProtBad, prot);
806 nl();
807 }
808 return false;
809 }
810
811 h = http_curl_init(g);
812 if (!h) { // should never happen
813 i_get_free(g, false);
814 return false;
815 }
816
817 /* "Expect:" header causes some servers to lose. Disable it. */
818 tmp_headers = curl_slist_append(custom_headers, "Expect:");
819 if (tmp_headers == NULL)
820 i_printfExit(MSG_NoMem);
821 custom_headers = tmp_headers;
822 if (httpLanguage) {
823 custom_headers =
824 curl_slist_append(custom_headers, httpLanguage);
825 if (custom_headers == NULL)
826 i_printfExit(MSG_NoMem);
827 }
828
829 post = strchr(url, '\1');
830 postb = 0;
831 urlSanitize(g, post);
832
833 if (post) {
834 post_request = true;
835 post++;
836
837 if (strncmp(post, "`mfd~", 5) == 0) {
838 int multipart_header_len = 0;
839 char *multipart_header =
840 initString(&multipart_header_len);
841 char thisbound[24];
842 post += 5;
843 stringAndString(&multipart_header,
844 &multipart_header_len,
845 "Content-Type: multipart/form-data; boundary=");
846 s = strchr(post, '\r');
847 stringAndBytes(&multipart_header, &multipart_header_len,
848 post, s - post);
849 tmp_headers =
850 curl_slist_append(custom_headers, multipart_header);
851 if (tmp_headers == NULL)
852 i_printfExit(MSG_NoMem);
853 custom_headers = tmp_headers;
854 /* curl_slist_append made a copy of multipart_header. */
855 nzFree(multipart_header);
856 memcpy(thisbound, post, s - post);
857 thisbound[s - post] = 0;
858 post = s + 2;
859 unpackUploadedFile(post, thisbound, &postb, &postb_l);
860 }
861 curlret = curl_easy_setopt(h, CURLOPT_POSTFIELDS,
862 (postb_l ? postb : post));
863 if (curlret != CURLE_OK)
864 goto curl_fail;
865 curlret =
866 curl_easy_setopt(h, CURLOPT_POSTFIELDSIZE,
867 postb_l ? postb_l : strlen(post));
868 if (curlret != CURLE_OK)
869 goto curl_fail;
870 } else {
871 curlret = curl_easy_setopt(h, CURLOPT_HTTPGET, 1);
872 if (curlret != CURLE_OK)
873 goto curl_fail;
874 }
875
876 if (sendReferrer && isURL(g->thisfile) &&
877 (memEqualCI(g->thisfile, "http:", 5)
878 || memEqualCI(g->thisfile, "https:", 6))) {
879 char *p, *p2, *p3;
880 referrer = cloneString(g->thisfile);
881 // lop off post data
882 p = strchr(referrer, '\1');
883 if (p)
884 *p = 0;
885 // lop off .browse
886 p = referrer + strlen(referrer);
887 if (p - referrer > 7 && !memcmp(p - 7, ".browse", 7))
888 p[-7] = 0;
889 // excise login:password
890 p = strchr(referrer, ':');
891 ++p;
892 if (*p == '/')
893 ++p;
894 if (*p == '/')
895 ++p;
896 p2 = strchr(p, '@');
897 p3 = strchr(p, '/');
898 if (p2 && (!p3 || p2 < p3))
899 strmove(p, p2 + 1);
900 // The current protocol should be http or https, we cleared out everything else.
901 // But https to http is not allowed. RFC 2616, section 15.1.3
902 p = strchr(referrer, ':');
903 if (strlen(prot) == 4 && p - referrer == 5) {
904 nzFree(referrer);
905 referrer = NULL;
906 }
907 }
908 // We keep the same referrer even after redirections, which I think is right.
909 // That's why it's here instead of inside the loop.
910 curlret = curl_easy_setopt(h, CURLOPT_REFERER, referrer);
911 if (curlret != CURLE_OK)
912 goto curl_fail;
913
914 // look for custom headers from the calling function
915 if (g->custom_h) {
916 const char *u, *v;
917 u = g->custom_h;
918 while (*u) {
919 int d;
920 char *w;
921 v = strchr(u, '\n');
922 if (!v)
923 break;
924 d = v - u;
925 w = allocMem(d + 1);
926 memcpy(w, u, d);
927 w[d] = 0;
928 tmp_headers = curl_slist_append(custom_headers, w);
929 if (tmp_headers == NULL)
930 i_printfExit(MSG_NoMem);
931 custom_headers = tmp_headers;
932 debugPrint(4, "custom %s", w);
933 nzFree(w);
934 u = v + 1;
935 }
936 }
937
938 curlret = curl_easy_setopt(h, CURLOPT_HTTPHEADER, custom_headers);
939 if (curlret != CURLE_OK)
940 goto curl_fail;
941 curlret = setCurlURL(h, g->urlcopy);
942 if (curlret != CURLE_OK)
943 goto curl_fail;
944
945 /* If we have a username and password, then tell libcurl about it.
946 * libcurl won't send it to the server unless server gave a 401 response.
947 * Libcurl selects the most secure form of auth provided by server. */
948
949 if (stringEqual(creds_buf, ":"))
950 getUserPass(g->urlcopy, creds_buf, false);
951 // If the URL didn't have user and password, and getUserPass failed,
952 // then creds_buf = ":".
953 curlret = curl_easy_setopt(h, CURLOPT_USERPWD, creds_buf);
954 if (curlret != CURLE_OK)
955 goto curl_fail;
956
957 /* We are ready to make a transfer. Here is where it gets complicated.
958 * At the top of the loop, we perform the HTTP request. It may fail entirely
959 * (I.E., libcurl returns an indicator other than CURLE_OK).
960 * We may be redirected. Edbrowse needs finer control over the redirection
961 * process than libcurl gives us.
962 * Decide whether to accept the redirection, using the following criteria.
963 * Does user permit redirects? Will we exceed maximum allowable redirects?
964 * We may be asked for authentication. In that case, grab username and
965 * password from the user. If the server accepts the username and password,
966 * then add it to the list of authentication records. */
967
968 still_fetching = true;
969
970 if (!post_request && presentInCache(g->urlcopy)) {
971 head_request = true;
972 curl_easy_setopt(h, CURLOPT_NOBODY, 1l);
973 }
974
975 while (still_fetching == true) {
976 char *redir = NULL;
977
978 // recheck the url after a redirect
979 if (redirect_count && g->pg_ok &&
980 (cf->mt = mt = findMimeByURL(g->urlcopy, &sxfirst)) &&
981 !(mt->from_file | mt->down_url) &&
982 !(mt->outtype && g->playonly)) {
983 curl_easy_cleanup(h);
984 goto mimestream;
985 }
986
987 if (head_request && g->down_force == 1) {
988 curl_easy_setopt(h, CURLOPT_NOBODY, 0l);
989 head_request = false;
990 }
991
992 if (g->down_force == 1)
993 truncate0(g->down_file, g->down_fd);
994
995 perform:
996 g->is_http = g->cacheable = true;
997 curlret = fetch_internet(g);
998
999 /*********************************************************************
1000 This is a one line workaround for an apparent bug in curl.
1001 The return CURLE_WRITE_ERROR means the data fetched from the internet
1002 could not be written to disk. And how does curl know?
1003 Because the callback function returns a lesser number of bytes.
1004 This is like write(), if it returns a lesser number
1005 of bytes then it was unable to write the entire block to disk.
1006 Ok, but I never return fewer bytes than was passed to me.
1007 I return the expected number of bytes, or -1 in the rare case
1008 that I want to abort the download.
1009 So you see, curl should never return this WRITE error.
1010 Yet it does, in version 7.58.0-2, on debian.
1011 And only on one page we have found so far:
1012 https://www.literotica.com/stories/new_submissions.php
1013 The entire page is downloaded, down to the very last byte,
1014 then the WRITE error is passed back.
1015 Well if it happens once it will happen elsewhere.
1016 Users will not be able to fetch pages from the internet, and not know why.
1017 The error message, can't write to disk, is not helpful at all.
1018 So this is a simple workaround.
1019 *********************************************************************/
1020
1021 if (curlret == CURLE_WRITE_ERROR)
1022 curlret = CURLE_OK;
1023
1024 if (g->down_state == 6) {
1025 // Header has indicated a plugin by content type or protocol or suffix.
1026 curl_easy_cleanup(h);
1027 goto mimestream;
1028 }
1029
1030 /*********************************************************************
1031 If the desired file is in cache for some reason, and we issued the head request,
1032 and it is application, or some such that triggers a download, then state = 1,
1033 but no data is forthcoming, and the user was never asked if he wants
1034 to download, so state is still 1.
1035 So ask, and then look at state.
1036 If state is nonzero, sorry, I'm not taking the file from cache,
1037 not yet, just because it's a whole bunch of new code.
1038 We don't generally store our downloaded files in cache anyways,
1039 they go where they go, so this doesn't come up very often.
1040 *********************************************************************/
1041
1042 if (head_request) {
1043 if (g->down_state == 1) {
1044 setup_download(g);
1045 /* now we have our answer */
1046 }
1047
1048 if (g->down_state != 0) {
1049 curl_easy_setopt(h, CURLOPT_NOBODY, 0l);
1050 head_request = false;
1051 debugPrint(3, "switch to get for download %d",
1052 g->down_state);
1053 }
1054
1055 if (g->down_state == 2) {
1056 curl_easy_getinfo(h, CURLINFO_RESPONSE_CODE,
1057 &g->code);
1058 if (g->code == 200)
1059 goto perform;
1060 g->down_state = 0;
1061 }
1062 }
1063
1064 if (g->down_state == 5) {
1065 /* user has directed a download of this file in the background. */
1066 /* We spawn a thread to do this, then return, but g could go away */
1067 /* before the child thread has a chance to read its contents. */
1068 struct i_get g0;
1069 pthread_t tid;
1070 nzFree(g->buffer);
1071 g->buffer = NULL;
1072 g->length = 0;
1073 g0 = *g; // structure copy
1074 if (custom_headers)
1075 curl_slist_free_all(custom_headers);
1076 curl_easy_cleanup(h);
1077 nzFree(postb);
1078 nzFree(referrer);
1079 pthread_create(&tid, NULL, httpConnectBack1,
1080 (void *)&g0);
1081 // I will assume the thread was created.
1082 // Don't call i_get_free(g); the child thread is using those strings.
1083 return true;
1084 }
1085
1086 if (g->down_state == 3 || g->down_state == -1) {
1087 i_get_free(g, true);
1088 curl_easy_cleanup(h);
1089 nzFree(referrer);
1090 return false;
1091 }
1092
1093 if (g->down_state == 4) {
1094 bool r = true;
1095 if (curlret != CURLE_OK) {
1096 r = false;
1097 ebcurl_setError(curlret, g->urlcopy, 1,
1098 g->error);
1099 } else {
1100 curl_easy_getinfo(h, CURLINFO_RESPONSE_CODE,
1101 &(g->code));
1102 if (g->code != 200) {
1103 r = false;
1104 } else {
1105 i_printf(MSG_DownSuccess);
1106 printf(": %s\n", g->down_file2);
1107 }
1108 }
1109 if (custom_headers)
1110 curl_slist_free_all(custom_headers);
1111 curl_easy_cleanup(h);
1112 nzFree(postb);
1113 nzFree(referrer);
1114 i_get_free(g, true);
1115 return r;
1116 }
1117
1118 if (g->length >= CHUNKSIZE && showProgress == 'd'
1119 && !g->down_force)
1120 nl(); /* We printed dots, so terminate them with newline */
1121
1122 if (g->down_state == 2) {
1123 close(g->down_fd);
1124 i_get_free(g, true);
1125 setError(MSG_DownSuccess);
1126 curl_easy_cleanup(h);
1127 nzFree(referrer);
1128 return false;
1129 }
1130
1131 if (curlret != CURLE_OK) {
1132 if (!head_request)
1133 goto curl_fail;
1134 ebcurl_setError(curlret, g->urlcopy, 1, g->error);
1135 debugPrint(3, "switch from head to get");
1136 curl_easy_setopt(h, CURLOPT_NOBODY, 0l);
1137 head_request = false;
1138 goto perform;
1139 }
1140 // get http code
1141 curl_easy_getinfo(h, CURLINFO_RESPONSE_CODE, &g->code);
1142 if (curlret != CURLE_OK)
1143 goto curl_fail;
1144
1145 if (g->tsn)
1146 debugPrint(3, "thread %d http code %ld", g->tsn,
1147 g->code);
1148 else
1149 debugPrint(3, "http code %ld", g->code);
1150
1151 /* refresh header is an alternate form of redirection */
1152 if (g->newloc && g->newloc_d >= 0) {
1153 if (shortRefreshDelay(g->newloc, g->newloc_d)) {
1154 g->code = 302;
1155 } else {
1156 nzFree(g->newloc);
1157 g->newloc = 0;
1158 }
1159 }
1160
1161 redir = g->newloc;
1162 g->newloc = 0;
1163
1164 if (allowRedirection &&
1165 ((g->code >= 301 && g->code <= 303) ||
1166 (g->code >= 307 && g->code <= 308))) {
1167 if (redir)
1168 redir = resolveURL(g->urlcopy, redir);
1169 still_fetching = false;
1170 if (redir == NULL) {
1171 /* Redirected, but we don't know where to go. */
1172 i_printf(MSG_RedirectNoURL, g->code);
1173 transfer_status = true;
1174 } else if (redirect_count >= 10) {
1175 i_puts(MSG_RedirectMany);
1176 transfer_status = true;
1177 nzFree(redir);
1178 } else { /* redirection looks good. */
1179 strcpy(creds_buf, ":"); /* Flush stale data. */
1180 nzFree(g->urlcopy);
1181 g->urlcopy = redir;
1182 g->urlcopy_l = strlen(g->urlcopy);
1183 redir = NULL;
1184
1185 /* Convert POST request to GET request after redirection. */
1186 /* This should only be done for 301 through 303 */
1187 if (g->code < 307) {
1188 curl_easy_setopt(h, CURLOPT_HTTPGET, 1);
1189 post_request = false;
1190 }
1191 /* I think there is more work to do for 307 308,
1192 * pasting the prior post string onto the new URL. Not sure about this. */
1193
1194 getUserPass(g->urlcopy, creds_buf, false);
1195 curlret =
1196 curl_easy_setopt(h, CURLOPT_USERPWD,
1197 creds_buf);
1198 if (curlret != CURLE_OK)
1199 goto curl_fail;
1200
1201 curlret = setCurlURL(h, g->urlcopy);
1202 if (curlret != CURLE_OK)
1203 goto curl_fail;
1204
1205 if (!post_request && presentInCache(g->urlcopy)) {
1206 head_request = true;
1207 curl_easy_setopt(h, CURLOPT_NOBODY, 1l);
1208 }
1209 // This is unusual in that we're using the i_get structure again,
1210 // so we need to reset some parts of it and not others.
1211 nzFree(g->buffer);
1212 g->buffer = 0;
1213 // This 302 redirection could set content type = application/binary,
1214 // which in turn sets state = 1, which is ignored since 302 takes precedence.
1215 // So state might still be 1, set it back to 0.
1216 g->down_state = 0;
1217 g->code = 0;
1218 g->csp = false;
1219 nzFree(g->headers);
1220 g->headers = 0;
1221 g->headers_len = 0;
1222 g->content[0] = 0;
1223 g->charset = 0;
1224 g->hcl = 0;
1225 nzFree(g->cdfn);
1226 g->cdfn = 0;
1227 g->modtime = 0;
1228 nzFree(g->etag);
1229 g->etag = 0;
1230 ++redirect_count;
1231 still_fetching = true;
1232 debugPrint(2, "redirect %s", g->urlcopy);
1233 }
1234 }
1235
1236 else if (g->code == 401 && !proceed_unauthenticated) {
1237 bool got_creds = false;
1238
1239 /* only try realm on first try - prevents loop */
1240 if (stringEqual(creds_buf, ":"))
1241 got_creds =
1242 getUserPassRealm(g->urlcopy, creds_buf,
1243 g->auth_realm);
1244 if (!got_creds && g->foreground) {
1245 i_printf(MSG_AuthRequired, g->urlcopy,
1246 g->auth_realm);
1247 nl();
1248 got_creds = read_credentials(creds_buf);
1249 }
1250 if (got_creds && g->foreground)
1251 addWebAuthorization(g->urlcopy, creds_buf,
1252 false, g->auth_realm);
1253 if (got_creds) {
1254 curl_easy_setopt(h, CURLOPT_USERPWD, creds_buf);
1255 nzFree(g->buffer);
1256 g->buffer = 0;
1257 g->length = 0;
1258 } else {
1259 /* User aborted the login process, try and at least get something. */
1260 proceed_unauthenticated = true;
1261 }
1262 } else { /* not redirect, not 401 */
1263 if (head_request) {
1264 if (fetchCache
1265 (g->urlcopy, g->etag, g->modtime,
1266 &cacheData, &cacheDataLen)) {
1267 nzFree(g->buffer);
1268 g->buffer = cacheData;
1269 g->length = cacheDataLen;
1270 still_fetching = false;
1271 transfer_status = true;
1272 } else {
1273 /* Back through the loop,
1274 * now doing GET rather than HEAD. */
1275 curl_easy_setopt(h, CURLOPT_NOBODY, 0l);
1276 head_request = false;
1277 --redirect_count;
1278 }
1279 } else {
1280 if (g->code == 200 && g->cacheable &&
1281 (g->modtime || g->etag) &&
1282 g->down_state == 0)
1283 storeCache(g->urlcopy, g->etag,
1284 g->modtime, g->buffer,
1285 g->length);
1286 still_fetching = false;
1287 transfer_status = true;
1288 }
1289 }
1290 }
1291
1292 curl_fail:
1293 if (custom_headers)
1294 curl_slist_free_all(custom_headers);
1295 curl_easy_cleanup(h);
1296 nzFree(postb);
1297
1298 if (curlret != CURLE_OK) {
1299 ebcurl_setError(curlret, g->urlcopy, (g->foreground ? 0 : 1),
1300 g->error);
1301 nzFree(referrer);
1302 i_get_free(g, true);
1303 return false;
1304 }
1305
1306 if (!transfer_status) {
1307 nzFree(referrer);
1308 i_get_free(g, true);
1309 return false;
1310 }
1311
1312 if ((g->code != 200 && g->code != 201 &&
1313 (g->foreground || debugLevel >= 2)) ||
1314 (g->code == 201 && debugLevel >= 3))
1315 i_printf(MSG_HTTPError,
1316 g->code, message_for_response_code(g->code));
1317
1318 // with lopping off post data, or encoding the url,
1319 // it's easier to just assume the name has changed,
1320 // even if there is no redirection.
1321 g->cfn = g->urlcopy;
1322 g->urlcopy = 0;
1323
1324 /* see if http header has set the filename */
1325 if (g->cdfn) {
1326 nzFree(g->cfn);
1327 g->cfn = g->cdfn;
1328 g->cdfn = NULL;
1329 }
1330
1331 if (g->headers_p) {
1332 *g->headers_p = g->headers;
1333 // The string is your responsibility now.
1334 g->headers = 0;
1335 }
1336
1337 i_get_free(g, false);
1338 g->referrer = referrer;
1339 return transfer_status;
1340 } /* httpConnect */
1341
1342 static int tsn; // thread sequence number
1343
httpConnectBack1(void * ptr)1344 void *httpConnectBack1(void *ptr)
1345 {
1346 struct i_get *g0 = ptr;
1347 struct i_get g = *g0; // structure copy
1348 struct BG_JOB *job;
1349 bool rc;
1350 g.down_force = 1;
1351 g.down_state = 4;
1352 // urlcopy will be recomputed on the next http call
1353 nzFree(g.urlcopy);
1354 g.urlcopy = 0;
1355 // Other things we should clean up?
1356 g.tsn = ++tsn;
1357 debugPrint(3, "bg thread %d", tsn);
1358 i_puts(MSG_DownProgress);
1359 /* push job onto the list for tracking and display */
1360 job = allocMem(sizeof(struct BG_JOB) + strlen(g.down_file));
1361 job->state = 4;
1362 strcpy(job->file, g.down_file);
1363 job->file2 = g.down_file2 - g.down_file;
1364 // round file size up to the nearest chunk.
1365 // This will come out 0 only if the true size is 0.
1366 job->fsize = ((g.hcl + (CHUNKSIZE - 1)) / CHUNKSIZE);
1367 addToListBack(&down_jobs, job);
1368 rc = httpConnect(&g);
1369 job->state = (rc ? 0 : -1);
1370 return NULL;
1371 }
1372
httpConnectBack2(void * ptr)1373 void *httpConnectBack2(void *ptr)
1374 {
1375 Tag *t = ptr;
1376 bool rc;
1377 struct i_get g;
1378 memset(&g, 0, sizeof(g));
1379 g.thisfile = cf->fileName;
1380 g.uriEncoded = true;
1381 g.url = t->href;
1382 g.down_force = 2;
1383 g.tsn = ++tsn;
1384 debugPrint(3, "jsbg thread %d", tsn);
1385 rc = httpConnect(&g);
1386 t->loadsuccess = rc;
1387 if (!rc)
1388 t->hcode = g.code;
1389 else {
1390 // Rarely, a js file is not in utf8; convert it here, inside the thread.
1391 char *b = force_utf8(g.buffer, g.length);
1392 if (!b)
1393 b = g.buffer;
1394 else
1395 nzFree(g.buffer);
1396 // don't know why t->value would be anything
1397 nzFree(t->value);
1398 t->value = b;
1399 }
1400 return NULL;
1401 }
1402
httpConnectBack3(void * ptr)1403 void *httpConnectBack3(void *ptr)
1404 {
1405 Tag *t = ptr;
1406 bool rc;
1407 struct i_get g;
1408 char *outgoing_body = 0, *outgoing_headers = 0;
1409 memset(&g, 0, sizeof(g));
1410 g.thisfile = cf->fileName;
1411 g.uriEncoded = true;
1412 g.url = t->href;
1413 g.custom_h = t->innerHTML;
1414 g.headers_p = &outgoing_headers;
1415 g.down_force = 2;
1416 g.tsn = ++tsn;
1417 debugPrint(3, "xhr thread %d", tsn);
1418 rc = httpConnect(&g);
1419 outgoing_body = g.buffer;
1420 t->loadsuccess = rc;
1421 if (!rc)
1422 t->hcode = g.code;
1423 else {
1424 char *a;
1425 int l;
1426 // don't know why t->value would be anything
1427 nzFree(t->value);
1428 a = initString(&l);
1429 if (outgoing_headers == 0)
1430 outgoing_headers = emptyString;
1431 if (outgoing_body == 0)
1432 outgoing_body = emptyString;
1433 stringAndNum(&a, &l, rc);
1434 stringAndString(&a, &l, "\r\n\r\n");
1435 stringAndNum(&a, &l, g.code);
1436 stringAndString(&a, &l, "\r\n\r\n");
1437 stringAndString(&a, &l, outgoing_headers);
1438 stringAndString(&a, &l, outgoing_body);
1439 while (l && isspace(a[l - 1]))
1440 a[--l] = 0;
1441 t->value = a;
1442 }
1443 nzFree(outgoing_headers);
1444 nzFree(outgoing_body);
1445 nzFree(t->innerHTML);
1446 t->innerHTML = 0;
1447 return NULL;
1448 }
1449
1450 // copy text over to the buffer but change < to < etc,
1451 // since this data will be browsed as if it were html.
prepHtmlString(struct i_get * g,const char * q)1452 static void prepHtmlString(struct i_get *g, const char *q)
1453 {
1454 char c;
1455 if (!strpbrk(q, "<>&")) { // no bad characters
1456 stringAndString(&g->buffer, &g->length, q);
1457 return;
1458 }
1459 for (; (c = *q); ++q) {
1460 char *meta = 0;
1461 if (c == '<')
1462 meta = "<";
1463 if (c == '>')
1464 meta = ">";
1465 if (c == '&')
1466 meta = "&";
1467 if (meta)
1468 stringAndString(&g->buffer, &g->length, meta);
1469 else
1470 stringAndChar(&g->buffer, &g->length, c);
1471 }
1472 }
1473
1474 /* Format a line from an ftp directory. */
ftp_ls_line(struct i_get * g,char * line)1475 static void ftp_ls_line(struct i_get *g, char *line)
1476 {
1477 int l = strlen(line);
1478 int j;
1479 if (l && line[l - 1] == '\r')
1480 line[--l] = 0;
1481
1482 /* blank line becomes paragraph break */
1483 if (!l || (memEqualCI(line, "total ", 6) && stringIsNum(line + 6))) {
1484 stringAndString(&g->buffer, &g->length, "<P>\n");
1485 return;
1486 }
1487 stringAndString(&g->buffer, &g->length, "<br>");
1488
1489 for (j = 0; line[j]; ++j)
1490 if (!strchr("-rwxdlsS", line[j]))
1491 break;
1492
1493 if (j == 10 && line[j] == ' ') { /* long list */
1494 int fsize, nlinks;
1495 char user[42], group[42];
1496 char month[8];
1497 int day;
1498 char *q, *t;
1499 sscanf(line + j, " %d %40s %40s %d %3s %d",
1500 &nlinks, user, group, &fsize, month + 1, &day);
1501 q = strchr(line, ':');
1502 if (q) {
1503 for (++q; isdigitByte(*q) || *q == ':'; ++q) ;
1504 while (*q == ' ')
1505 ++q;
1506 } else {
1507 /* old files won't have the time, but instead, they have the year. */
1508 /* bad news for us; no good/easy way to glom onto this one. */
1509 month[0] = month[4] = ' ';
1510 month[5] = 0;
1511 q = strstr(line, month);
1512 if (q) {
1513 q += 8;
1514 while (*q == ' ')
1515 ++q;
1516 while (isdigitByte(*q))
1517 ++q;
1518 while (*q == ' ')
1519 ++q;
1520 }
1521 }
1522
1523 if (q && *q) {
1524 char qc = '"';
1525 if (strchr(q, qc))
1526 qc = '\'';
1527 stringAndString(&g->buffer, &g->length, "<A HREF=x");
1528 g->buffer[g->length - 1] = qc;
1529 t = strstr(q, " -> ");
1530 if (t)
1531 stringAndBytes(&g->buffer, &g->length, q,
1532 t - q);
1533 else
1534 stringAndString(&g->buffer, &g->length, q);
1535 stringAndChar(&g->buffer, &g->length, qc);
1536 stringAndChar(&g->buffer, &g->length, '>');
1537 stringAndString(&g->buffer, &g->length, q);
1538 stringAndString(&g->buffer, &g->length, "</A>");
1539 if (line[0] == 'd')
1540 stringAndChar(&g->buffer, &g->length, '/');
1541 stringAndString(&g->buffer, &g->length, ": ");
1542 stringAndNum(&g->buffer, &g->length, fsize);
1543 stringAndChar(&g->buffer, &g->length, '\n');
1544 return;
1545 }
1546 }
1547
1548 prepHtmlString(g, line);
1549 stringAndChar(&g->buffer, &g->length, '\n');
1550 } /* ftp_ls_line */
1551
1552 /* ftp_listing: convert an FTP-style listing to html. */
1553 /* Repeatedly calls ftp_ls_line to parse each line of the data. */
ftp_listing(struct i_get * g)1554 static void ftp_listing(struct i_get *g)
1555 {
1556 char *s, *t;
1557 char *incomingData = g->buffer;
1558 int incomingLen = g->length;
1559 g->buffer = initString(&g->length);
1560 stringAndString(&g->buffer, &g->length, "<html>\n<body>\n");
1561
1562 if (!incomingLen) {
1563 i_stringAndMessage(&g->buffer, &g->length, MSG_FTPEmptyDir);
1564 } else {
1565
1566 s = incomingData;
1567 while (s < incomingData + incomingLen) {
1568 t = strchr(s, '\n');
1569 if (!t || t >= incomingData + incomingLen)
1570 break; /* should never happen */
1571 *t = 0;
1572 ftp_ls_line(g, s);
1573 s = t + 1;
1574 }
1575 }
1576
1577 stringAndString(&g->buffer, &g->length, "</body></html>\n");
1578 nzFree(incomingData);
1579 } /* ftp_listing */
1580
1581 /* Format a line from a gopher directory. */
gopher_ls_line(struct i_get * g,char * line)1582 static void gopher_ls_line(struct i_get *g, char *line)
1583 {
1584 int port;
1585 char first, *text, *pathname, *host, *s, *plus;
1586 int l = strlen(line);
1587 if (l && line[l - 1] == '\r')
1588 line[--l] = 0;
1589
1590 // first character is the type of line
1591 first = 'i';
1592 if (line[0])
1593 first = *line++;
1594 // . alone ends the listing
1595 if (first == '.')
1596 return;
1597
1598 // cut into pieces by tabs.
1599 pathname = host = 0;
1600 text = line;
1601 s = strchr(line, '\t');
1602 if (s) {
1603 *s++ = 0;
1604 pathname = s;
1605 s = strchr(pathname, '\t');
1606 if (s) {
1607 *s++ = 0;
1608 host = s;
1609 s = strchr(host, '\t');
1610 if (s) {
1611 *s++ = 0;
1612 if (*s) {
1613 // Gopher+ servers add an extra \t+,
1614 // which we need to truncate
1615 plus = strchr(s, '\t');
1616 if (plus)
1617 *plus = 0;
1618 port = atoi(s);
1619 }
1620 }
1621 }
1622 }
1623
1624 while (*text == ' ')
1625 ++text;
1626
1627 // gopher is very much line oriented.
1628 stringAndString(&g->buffer, &g->length, "<br>\n");
1629
1630 // i or 3 is informational, 3 being an error.
1631 if (first == 'i' || first == '3') {
1632 prepHtmlString(g, text);
1633 stringAndChar(&g->buffer, &g->length, '\n');
1634 return;
1635 }
1636 // everything else becomes hyperlink apart from item type 7 which becomes form
1637 if (host) {
1638 char qc = '"';
1639 // I just assume host and path can be quoted with either " or '
1640 if (strchr(host, qc) // should never happen
1641 || strchr(pathname, qc))
1642 qc = '\'';
1643 if (first != '7')
1644 stringAndString(&g->buffer, &g->length, "<a href=x");
1645 else
1646 stringAndString(&g->buffer, &g->length,
1647 "<form method='get' action=x");
1648 g->buffer[g->length - 1] = qc;
1649
1650 if (!strncmp(pathname, "URL:", 4)) {
1651 // Full URL in path so use it unencoded
1652 stringAndString(&g->buffer, &g->length, pathname + 4);
1653 pathname = 0;
1654 } else {
1655 // Just a path
1656 pathname = encodePostData(pathname, "./-_$");
1657 stringAndString(&g->buffer, &g->length, "gopher://");
1658 stringAndString(&g->buffer, &g->length, host);
1659 if (port && port != 70) {
1660 stringAndChar(&g->buffer, &g->length, ':');
1661 stringAndNum(&g->buffer, &g->length, port);
1662 }
1663 // gopher requires us to inject the "first" directive into the path. Wow.
1664 stringAndChar(&g->buffer, &g->length, '/');
1665 stringAndChar(&g->buffer, &g->length, first);
1666 stringAndString(&g->buffer, &g->length, pathname);
1667 }
1668 nzFree(pathname);
1669 stringAndChar(&g->buffer, &g->length, qc);
1670 stringAndChar(&g->buffer, &g->length, '>');
1671 }
1672
1673 s = strchr(text, '(');
1674 if (s && s == text)
1675 s = 0;
1676 if (s)
1677 *s = 0;
1678
1679 prepHtmlString(g, text);
1680 if (host) {
1681 if (first == '7')
1682 stringAndString(&g->buffer, &g->length,
1683 " <input type='text' /> <input type='submit' /></form>");
1684 else
1685 stringAndString(&g->buffer, &g->length, "</a>");
1686 }
1687 if (s) {
1688 *s = '(';
1689 prepHtmlString(g, s);
1690 }
1691 stringAndChar(&g->buffer, &g->length, '\n');
1692 } /* gopher_ls_line */
1693
1694 /* gopher_listing: convert a gopher-style listing to html. */
1695 /* Repeatedly calls gopher_ls_line to parse each line of the data. */
gopher_listing(struct i_get * g)1696 static void gopher_listing(struct i_get *g)
1697 {
1698 char *s, *t;
1699 char *incomingData = g->buffer;
1700 int incomingLen = g->length;
1701 g->buffer = initString(&g->length);
1702 stringAndString(&g->buffer, &g->length, "<html>\n<body>\n");
1703
1704 if (!incomingLen) {
1705 i_stringAndMessage(&g->buffer, &g->length, MSG_GopherEmptyDir);
1706 } else {
1707
1708 s = incomingData;
1709 while (s < incomingData + incomingLen) {
1710 t = strchr(s, '\n');
1711 if (!t || t >= incomingData + incomingLen)
1712 break; /* should never happen */
1713 *t = 0;
1714 gopher_ls_line(g, s);
1715 s = t + 1;
1716 }
1717 }
1718
1719 stringAndString(&g->buffer, &g->length, "</body></html>\n");
1720 nzFree(incomingData);
1721 } /* gopher_listing */
1722
1723 // action: 0 traditional set, 1 print, 2 print and exit
ebcurl_setError(CURLcode curlret,const char * url,int action,const char * curl_error)1724 void ebcurl_setError(CURLcode curlret, const char *url, int action,
1725 const char *curl_error)
1726 {
1727 char prot[MAXPROTLEN], host[MAXHOSTLEN];
1728 void (*fn) (int, ...);
1729
1730 if (!getProtHostURL(url, prot, host)) {
1731 /* this should never happen */
1732 prot[0] = host[0] = 0;
1733 }
1734
1735 fn = (action ? i_printf : setError);
1736
1737 switch (curlret) {
1738 case CURLE_UNSUPPORTED_PROTOCOL:
1739 (*fn) (MSG_WebProtBad, prot);
1740 break;
1741
1742 case CURLE_URL_MALFORMAT:
1743 (*fn) (MSG_BadURL, url);
1744 break;
1745
1746 case CURLE_COULDNT_RESOLVE_HOST:
1747 (*fn) (MSG_IdentifyHost, host);
1748 break;
1749
1750 case CURLE_REMOTE_ACCESS_DENIED:
1751 (*fn) (MSG_RemoteAccessDenied);
1752 break;
1753
1754 case CURLE_TOO_MANY_REDIRECTS:
1755 (*fn) (MSG_RedirectMany);
1756 break;
1757
1758 case CURLE_OPERATION_TIMEDOUT:
1759 (*fn) (MSG_Timeout);
1760 break;
1761
1762 case CURLE_PEER_FAILED_VERIFICATION:
1763 #if LIBCURL_VERSION_NUM < 0x073e00
1764 case CURLE_SSL_CACERT:
1765 #endif
1766 (*fn) (MSG_NoCertify, host);
1767 break;
1768
1769 case CURLE_GOT_NOTHING:
1770 case CURLE_RECV_ERROR:
1771 (*fn) (MSG_WebRead);
1772 break;
1773
1774 case CURLE_SEND_ERROR:
1775 (*fn) (MSG_CurlSendData);
1776 break;
1777
1778 case CURLE_COULDNT_CONNECT:
1779 (*fn) (MSG_WebConnect, host);
1780 break;
1781
1782 case CURLE_FTP_CANT_GET_HOST:
1783 (*fn) (MSG_FTPConnect);
1784 break;
1785
1786 case CURLE_ABORTED_BY_CALLBACK:
1787 #if 0
1788 // this is printed by the callback function
1789 (*fn) (MSG_Interrupted);
1790 #endif
1791 break;
1792
1793 /* These all look like session initiation failures. */
1794 case CURLE_FTP_WEIRD_SERVER_REPLY:
1795 case CURLE_FTP_WEIRD_PASS_REPLY:
1796 case CURLE_FTP_WEIRD_PASV_REPLY:
1797 case CURLE_FTP_WEIRD_227_FORMAT:
1798 case CURLE_FTP_COULDNT_SET_ASCII:
1799 case CURLE_FTP_COULDNT_SET_BINARY:
1800 case CURLE_FTP_PORT_FAILED:
1801 (*fn) (MSG_FTPSession);
1802 break;
1803
1804 case CURLE_FTP_USER_PASSWORD_INCORRECT:
1805 (*fn) (MSG_LogPass);
1806 break;
1807
1808 case CURLE_FTP_COULDNT_RETR_FILE:
1809 (*fn) (MSG_FTPTransfer);
1810 break;
1811
1812 case CURLE_SSL_CONNECT_ERROR:
1813 (*fn) (MSG_SSLConnectError, curl_error);
1814 break;
1815
1816 case CURLE_LOGIN_DENIED:
1817 (*fn) (MSG_LogPass);
1818 break;
1819
1820 default:
1821 (*fn) (MSG_CurlCatchAll, curl_easy_strerror(curlret));
1822 break;
1823 }
1824
1825 if (action)
1826 nl();
1827 if (action == 2)
1828 exit(2);
1829 } /* ebcurl_setError */
1830
1831 /* Like httpConnect, but for ftp */
ftpConnect(struct i_get * g,char * creds_buf)1832 static bool ftpConnect(struct i_get *g, char *creds_buf)
1833 {
1834 CURL *h; // the curl handle for ftp
1835 int protLength; /* length of "ftp://" */
1836 bool transfer_success = false;
1837 bool has_slash, is_scp;
1838 CURLcode curlret = CURLE_OK;
1839 const char *url = g->url;
1840
1841 protLength = strchr(url, ':') - url + 3;
1842 /* scp is somewhat unique among the protocols handled here */
1843 is_scp = memEqualCI(url, "scp", 3);
1844
1845 if (stringEqual(creds_buf, ":") && memEqualCI(url, "ftp", 3))
1846 strcpy(creds_buf, "anonymous:ftp@example.com");
1847
1848 h = http_curl_init(g);
1849 if (!h)
1850 goto ftp_transfer_fail;
1851 curlret = curl_easy_setopt(h, CURLOPT_USERPWD, creds_buf);
1852 if (curlret != CURLE_OK)
1853 goto ftp_transfer_fail;
1854
1855 urlSanitize(g, 0);
1856
1857 /* libcurl appends an implicit slash to URLs like "ftp://foo.com".
1858 * Be explicit, so that edbrowse knows that we have a directory. */
1859 if (!strchr(g->urlcopy + protLength, '/'))
1860 strcpy(g->urlcopy + g->urlcopy_l++, "/");
1861
1862 curlret = setCurlURL(h, g->urlcopy);
1863 if (curlret != CURLE_OK)
1864 goto ftp_transfer_fail;
1865
1866 has_slash = g->urlcopy[g->urlcopy_l - 1] == '/';
1867 /* don't download a directory listing, we want to see that */
1868 /* Fetching a directory will fail in the special case of scp. */
1869 if (!g->down_force)
1870 g->down_state = (has_slash ? 0 : 1);
1871 g->down_length = 0;
1872 g->down_msg = MSG_FTPDownload;
1873 if (is_scp)
1874 g->down_msg = MSG_SCPDownload;
1875
1876 curlret = fetch_internet(g);
1877
1878 if (g->down_state == 5) {
1879 /* user has directed a download of this file in the background. */
1880 /* We spawn a thread to do this, then return, but g could go away */
1881 /* before the child thread has a chance to read its contents. */
1882 struct i_get g0;
1883 pthread_t tid;
1884 nzFree(g->buffer);
1885 g->buffer = NULL;
1886 g->length = 0;
1887 g0 = *g; // structure copy
1888 curl_easy_cleanup(h);
1889 pthread_create(&tid, NULL, httpConnectBack1, (void *)&g0);
1890 // I will assume the thread was created.
1891 // Don't call i_get_free(g); the child thread is using those strings.
1892 return true;
1893 }
1894
1895 if (g->down_state == 3 || g->down_state == -1) {
1896 i_get_free(g, true);
1897 curl_easy_cleanup(h);
1898 return false;
1899 }
1900
1901 if (g->down_state == 4) {
1902 bool r = true;
1903 if (curlret != CURLE_OK) {
1904 r = false;
1905 ebcurl_setError(curlret, g->urlcopy, 1, g->error);
1906 } else {
1907 i_printf(MSG_DownSuccess);
1908 printf(": %s\n", g->down_file2);
1909 }
1910 curl_easy_cleanup(h);
1911 i_get_free(g, true);
1912 return r;
1913 }
1914
1915 if (g->length >= CHUNKSIZE && showProgress == 'd')
1916 nl(); /* We printed dots, so terminate them with newline */
1917
1918 if (g->down_state == 2) {
1919 close(g->down_fd);
1920 setError(MSG_DownSuccess);
1921 i_get_free(g, true);
1922 curl_easy_cleanup(h);
1923 return false;
1924 }
1925
1926 /* Should we run this code on any error condition? */
1927 /* The SSH error pops up under sftp. */
1928 if (curlret == CURLE_FTP_COULDNT_RETR_FILE ||
1929 curlret == CURLE_REMOTE_FILE_NOT_FOUND || curlret == CURLE_SSH) {
1930 if (has_slash | is_scp)
1931 transfer_success = false;
1932 else { /* try appending a slash. */
1933 strcpy(g->urlcopy + g->urlcopy_l++, "/");
1934 g->down_state = 0;
1935 cnzFree(g->down_file);
1936 g->down_file = 0;
1937 curlret = setCurlURL(h, g->urlcopy);
1938 if (curlret != CURLE_OK)
1939 goto ftp_transfer_fail;
1940
1941 curlret = fetch_internet(g);
1942 if (curlret != CURLE_OK)
1943 transfer_success = false;
1944 else {
1945 ftp_listing(g);
1946 transfer_success = true;
1947 }
1948 }
1949 } else if (curlret == CURLE_OK) {
1950 if (has_slash)
1951 ftp_listing(g);
1952 transfer_success = true;
1953 } else
1954 transfer_success = false;
1955
1956 ftp_transfer_fail:
1957 if (h)
1958 curl_easy_cleanup(h);
1959 if (transfer_success == false) {
1960 if (curlret != CURLE_OK)
1961 ebcurl_setError(curlret, g->urlcopy,
1962 (g->foreground ? 0 : 1), g->error);
1963 }
1964 if (transfer_success == true && !stringEqual(url, g->urlcopy))
1965 g->cfn = g->urlcopy;
1966 else
1967 nzFree(g->urlcopy);
1968 g->urlcopy = 0;
1969
1970 i_get_free(g, !transfer_success);
1971
1972 return transfer_success;
1973 } /* ftpConnect */
1974
1975 /* Like httpConnect, but for gopher */
gopherConnect(struct i_get * g)1976 static bool gopherConnect(struct i_get *g)
1977 {
1978 CURL *h; // the curl handle for gopher
1979 int protLength; /* length of "gopher://" */
1980 bool transfer_success = false;
1981 bool has_slash;
1982 char first = 0;
1983 char *s;
1984 CURLcode curlret = CURLE_OK;
1985 const char *url = g->url;
1986
1987 protLength = strchr(url, ':') - url + 3;
1988 h = http_curl_init(g);
1989 if (!h)
1990 goto gopher_transfer_fail;
1991 urlSanitize(g, 0);
1992
1993 /* libcurl appends an implicit slash to URLs like "gopher://foo.com".
1994 * Be explicit, so that edbrowse knows if we have a directory. */
1995 if (!strchr(g->urlcopy + protLength, '/'))
1996 strcpy(g->urlcopy + g->urlcopy_l, "/");
1997 curlret = setCurlURL(h, g->urlcopy);
1998 if (curlret != CURLE_OK)
1999 goto gopher_transfer_fail;
2000
2001 has_slash = g->urlcopy[strlen(g->urlcopy) - 1] == '/';
2002 /* don't download a directory listing, we want to see that */
2003 g->down_state = (has_slash ? 0 : 1);
2004 g->down_length = 0;
2005 g->down_msg = MSG_GopherDownload;
2006 // That's the default, let the leading character override
2007 s = strchr(g->urlcopy + protLength, '/');
2008 if (s && (first = s[1])) {
2009 // almost every file type downloads.
2010 g->down_state = 1;
2011 // 0 is tricky because "05" and "09" can mean binary
2012 // in doubt, treat as integer and skip leading 0s
2013 while (first == '0' && isdigit(s[2])) {
2014 s++;
2015 first = s[1];
2016 }
2017 if (strchr("017h", first))
2018 g->down_state = 0;
2019 if (first == '1' || first == '7')
2020 has_slash = true;
2021 }
2022
2023 if (g->down_force)
2024 g->down_state = 4;
2025
2026 curlret = fetch_internet(g);
2027
2028 if (g->down_state == 5) {
2029 /* user has directed a download of this file in the background. */
2030 /* We spawn a thread to do this, then return, but g could go away */
2031 /* before the child thread has a chance to read its contents. */
2032 struct i_get g0;
2033 pthread_t tid;
2034 nzFree(g->buffer);
2035 g->buffer = NULL;
2036 g->length = 0;
2037 g0 = *g; // structure copy
2038 curl_easy_cleanup(h);
2039 pthread_create(&tid, NULL, httpConnectBack1, (void *)&g0);
2040 // I will assume the thread was created.
2041 // Don't call i_get_free(g); the child thread is using those strings.
2042 return true;
2043 }
2044
2045 if (g->down_state == 3 || g->down_state == -1) {
2046 i_get_free(g, true);
2047 curl_easy_cleanup(h);
2048 return false;
2049 }
2050
2051 if (g->down_state == 4) {
2052 bool r = true;
2053 if (curlret != CURLE_OK) {
2054 r = false;
2055 ebcurl_setError(curlret, g->urlcopy, 1, g->error);
2056 } else {
2057 i_printf(MSG_DownSuccess);
2058 printf(": %s\n", g->down_file2);
2059 }
2060 curl_easy_cleanup(h);
2061 i_get_free(g, true);
2062 return r;
2063 }
2064
2065 if (g->length >= CHUNKSIZE && showProgress == 'd')
2066 nl(); /* We printed dots, so terminate them with newline */
2067
2068 if (g->down_state == 2) {
2069 close(g->down_fd);
2070 setError(MSG_DownSuccess);
2071 i_get_free(g, true);
2072 curl_easy_cleanup(h);
2073 return false;
2074 }
2075
2076 if (curlret == CURLE_OK) {
2077 if (has_slash)
2078 gopher_listing(g);
2079 transfer_success = true;
2080 } else
2081 transfer_success = false;
2082
2083 gopher_transfer_fail:
2084 if (h)
2085 curl_easy_cleanup(h);
2086 if (!transfer_success) {
2087 if (curlret != CURLE_OK)
2088 ebcurl_setError(curlret, g->urlcopy,
2089 (g->foreground ? 0 : 1), g->error);
2090 i_get_free(g, true);
2091 return false;
2092 }
2093
2094 if (!stringEqual(url, g->urlcopy))
2095 g->cfn = g->urlcopy;
2096 g->urlcopy = 0;
2097
2098 if (first == '0') {
2099 // it's a text file, neeed to undos.
2100 // The curl callback function always makes sure there is an extra byte at the end.
2101 int i, j;
2102 g->buffer[g->length] = 0;
2103 for (i = j = 0; i < g->length; ++i) {
2104 if (g->buffer[i] == '\r' && g->buffer[i + 1] == '\n')
2105 continue;
2106 g->buffer[j++] = g->buffer[i];
2107 }
2108 g->buffer[j] = 0;
2109 g->length = j;
2110 }
2111
2112 return true;
2113 } /* gopherConnect */
2114
2115 /* If the user has asked for locale-specific responses, then build an
2116 * appropriate Accept-Language: header. */
setHTTPLanguage(const char * lang)2117 void setHTTPLanguage(const char *lang)
2118 {
2119 int httpLanguage_l;
2120 char *s;
2121
2122 nzFree(httpLanguage);
2123 httpLanguage = NULL;
2124 if (!lang)
2125 return;
2126
2127 httpLanguage = initString(&httpLanguage_l);
2128 stringAndString(&httpLanguage, &httpLanguage_l, "Accept-Language: ");
2129 stringAndString(&httpLanguage, &httpLanguage_l, lang);
2130
2131 // Transliterate _ to -, some websites require this.
2132 // en-us not en_us
2133 for (s = httpLanguage; *s; ++s)
2134 if (*s == '_')
2135 *s = '-';
2136 } /* setHTTPLanguage */
2137
2138 /* Set the FD_CLOEXEC flag on a socket newly-created by libcurl.
2139 * Let's not leak libcurl's sockets to child processes created by the
2140 * ! (escape-to-shell) command.
2141 * This is a callback. It returns 0 on success, 1 on failure, per the
2142 * libcurl docs.
2143 */
2144 static int
my_curl_safeSocket(void * clientp,curl_socket_t socketfd,curlsocktype purpose)2145 my_curl_safeSocket(void *clientp, curl_socket_t socketfd, curlsocktype purpose)
2146 {
2147 #ifdef _MSC_VER
2148 return 0;
2149 #else // !_MSC_VER for success = fcntl(socketfd, F_SETFD, FD_CLOEXEC);
2150 int success = fcntl(socketfd, F_SETFD, FD_CLOEXEC);
2151 if (success == -1)
2152 success = 1;
2153 else
2154 success = 0;
2155 return success;
2156 #endif // _MSC_VER y/n
2157 }
2158
http_curl_init(struct i_get * g)2159 static CURL *http_curl_init(struct i_get *g)
2160 {
2161 CURLcode curl_init_status = CURLE_OK;
2162 int curl_auth;
2163 CURL *h = curl_easy_init();
2164 if (h == NULL)
2165 goto libcurl_init_fail;
2166 g->h = h;
2167 curl_init_status =
2168 curl_easy_setopt(h, CURLOPT_SHARE, global_share_handle);
2169 if (curl_init_status != CURLE_OK)
2170 goto libcurl_init_fail;
2171 curl_init_status = curl_easy_setopt(h, CURLOPT_COOKIEFILE, "");
2172 if (curl_init_status != CURLE_OK)
2173 goto libcurl_init_fail;
2174 /* Lots of these setopt calls shouldn't fail. They just diddle a struct. */
2175 curl_easy_setopt(h, CURLOPT_SOCKOPTFUNCTION, my_curl_safeSocket);
2176 curl_easy_setopt(h, CURLOPT_WRITEFUNCTION, eb_curl_callback);
2177 curl_easy_setopt(h, CURLOPT_WRITEDATA, g);
2178 curl_easy_setopt(h, CURLOPT_HEADERFUNCTION, curl_header_callback);
2179 curl_easy_setopt(h, CURLOPT_HEADERDATA, g);
2180 if (debugLevel >= 4)
2181 curl_easy_setopt(h, CURLOPT_VERBOSE, 1);
2182 curl_easy_setopt(h, CURLOPT_DEBUGFUNCTION, ebcurl_debug_handler);
2183 curl_easy_setopt(h, CURLOPT_DEBUGDATA, g);
2184 curl_easy_setopt(h, CURLOPT_NOPROGRESS, 0);
2185 curl_easy_setopt(h, CURLOPT_PROGRESSFUNCTION, curl_progress);
2186 curl_easy_setopt(h, CURLOPT_PROGRESSDATA, g);
2187 curl_easy_setopt(h, CURLOPT_CONNECTTIMEOUT, webTimeout);
2188 curl_easy_setopt(h, CURLOPT_USERAGENT, currentAgent);
2189 curl_easy_setopt(h, CURLOPT_SSLVERSION, CURL_SSLVERSION_DEFAULT);
2190 /* We're doing this manually for now.
2191 curl_easy_setopt(h, CURLOPT_FOLLOWLOCATION, allowRedirection);
2192 */
2193 curl_easy_setopt(h, CURLOPT_AUTOREFERER, sendReferrer);
2194 if (ftpActive)
2195 curl_easy_setopt(h, CURLOPT_FTPPORT, "-");
2196 else
2197 curl_easy_setopt(h, CURLOPT_FTPPORT, NULL);
2198 /* See "man curl_easy_setopt.3" for info on CURLOPT_FTPPORT. Supplying
2199 * "-" makes libcurl select the best IP address for active ftp. */
2200
2201 /*
2202 * tell libcurl to pick the strongest method from basic, digest and ntlm authentication
2203 * don't use any auth method by default as it will prefer Negotiate to NTLM,
2204 * and it looks like in most cases microsoft IIS says it supports both and libcurl
2205 * doesn't fall back to NTLM when it discovers that Negotiate isn't set up on a system
2206 */
2207 curl_auth = CURLAUTH_BASIC | CURLAUTH_DIGEST | CURLAUTH_NTLM;
2208 if (curlAuthNegotiate)
2209 #ifdef CURLAUTH_NEGOTIATE
2210 curl_auth |= CURLAUTH_NEGOTIATE;
2211 #else
2212 curl_auth |= CURLAUTH_GSSNEGOTIATE; /* libcurl < 7.38 */
2213 #endif
2214 curl_easy_setopt(h, CURLOPT_HTTPAUTH, curl_auth);
2215
2216 #if 0
2217 // in case you run into DH key too small
2218 // This may not be portable, e.g. curl compiled with gnutls;
2219 // though it is usually compiled with openssl.
2220 // Not sure of the best solution here.
2221 curl_easy_setopt(h, CURLOPT_SSL_CIPHER_LIST, "DEFAULT@SECLEVEL=1");
2222 #endif
2223
2224 /* The next few setopt calls could allocate or perform file I/O. */
2225 g->error[0] = '\0';
2226 curl_init_status = curl_easy_setopt(h, CURLOPT_ERRORBUFFER, g->error);
2227 if (curl_init_status != CURLE_OK)
2228 goto libcurl_init_fail;
2229 curl_init_status = curl_easy_setopt(h, CURLOPT_ENCODING, "");
2230 if (curl_init_status != CURLE_OK)
2231 goto libcurl_init_fail;
2232
2233 return h;
2234
2235 libcurl_init_fail:
2236 i_printf(MSG_LibcurlNoInit);
2237 if (h)
2238 curl_easy_cleanup(h);
2239 return 0;
2240 } /* http_curl_init */
2241
2242 /*
2243 * There's no easy way to get at the server's response message from libcurl.
2244 * So here are some tables and a function for translating response codes to
2245 * messages.
2246 */
2247
2248 static const char *response_codes_1xx[] = {
2249 "Continue",
2250 "Switching Protocols"
2251 };
2252
2253 static const char *response_codes_2xx[] = {
2254 "OK",
2255 "Created" "Accepted",
2256 "Non-Authoritative Information",
2257 "No Content",
2258 "Reset Content",
2259 "Partial Content"
2260 };
2261
2262 static const char *response_codes_3xx[] = {
2263 "Multiple Choices",
2264 "Moved Permanently",
2265 "Found",
2266 "See Other",
2267 "Not Modified",
2268 "Use Proxy",
2269 "(Unused)",
2270 "Temporary Redirect"
2271 };
2272
2273 static const char *response_codes_4xx[] = {
2274 "Bad Request",
2275 "Unauthorized",
2276 "Payment Required",
2277 "Forbidden",
2278 "Not Found",
2279 "Method Not Allowed",
2280 "Not Acceptable",
2281 "Proxy Authentication Required",
2282 "Request Timeout",
2283 "Conflict",
2284 "Gone",
2285 "Length Required",
2286 "Precondition Failed",
2287 "Request Entity Too Large",
2288 "Request-URI Too Long",
2289 "Unsupported Media Type",
2290 "Requested Range Not Satisfiable",
2291 "Expectation Failed"
2292 };
2293
2294 static const char *response_codes_5xx[] = {
2295 "Internal Server Error",
2296 "Not Implemented",
2297 "Bad Gateway",
2298 "Service Unavailable",
2299 "Gateway Timeout",
2300 "HTTP Version Not Supported"
2301 };
2302
2303 static const char *unknown_http_response =
2304 "Unknown response when accessing webpage.";
2305
2306 static int max_codes[] = {
2307 0,
2308 sizeof(response_codes_1xx) / sizeof(char *),
2309 sizeof(response_codes_2xx) / sizeof(char *),
2310 sizeof(response_codes_3xx) / sizeof(char *),
2311 sizeof(response_codes_4xx) / sizeof(char *),
2312 sizeof(response_codes_5xx) / sizeof(char *)
2313 };
2314
2315 static const char **responses[] = {
2316 NULL, response_codes_1xx, response_codes_2xx, response_codes_3xx,
2317 response_codes_4xx, response_codes_5xx
2318 };
2319
message_for_response_code(int code)2320 static const char *message_for_response_code(int code)
2321 {
2322 const char *message = NULL;
2323 if (code < 100 || code > 599)
2324 message = unknown_http_response;
2325 else {
2326 int primary = code / 100; /* Yields int in interval [1,6] */
2327 int subcode = code % 100;
2328 if (subcode >= max_codes[primary])
2329 message = unknown_http_response;
2330 else
2331 message = responses[primary][subcode];
2332 }
2333 return message;
2334 } /* message_for_response_code */
2335
2336 /*
2337 * Function: prompt_and_read
2338 * Arguments:
2339 ** prompt: prompt that user should see.
2340 ** buffer: buffer into which the data should be stored.
2341 ** max_length: maximum allowable length of input.
2342 ** error_msg: message to display if input exceeds maximum length.
2343 ** hide_echo: whether to disable terminal echo (sensitive input)
2344 * Note: prompt and error_message should be message constants from messages.h.
2345 * Return value: none. buffer contains input on return. */
2346
2347 /* We need to read two things from the user while authenticating: a username
2348 * and a password. Here, the task of prompting and reading is encapsulated
2349 * in a function, and we call that function twice.
2350 * After the call, the buffer contains the user's input, without a newline.
2351 * The return value is the length of the string in buffer. */
2352 int
prompt_and_read(int prompt,char * buffer,int buffer_length,int error_message,bool hide_echo)2353 prompt_and_read(int prompt, char *buffer, int buffer_length, int error_message,
2354 bool hide_echo)
2355 {
2356 bool reading = true;
2357 int n = 0;
2358
2359 while (reading) {
2360 char *s;
2361 if (hide_echo)
2362 ttySetEcho(false);
2363 i_printf(prompt);
2364 fflush(stdout);
2365 s = fgets(buffer, buffer_length, stdin);
2366 if (hide_echo)
2367 ttySetEcho(true);
2368 if (!s)
2369 ebClose(0);
2370 n = strlen(buffer);
2371 if (n && buffer[n - 1] == '\n')
2372 buffer[--n] = '\0'; /* replace newline with NUL */
2373 if (n >= (MAXUSERPASS - 1)) {
2374 i_printf(error_message, MAXUSERPASS - 2);
2375 nl();
2376 } else
2377 reading = false;
2378 }
2379 return n;
2380 } /* prompt_and_read */
2381
2382 /*
2383 * Function: read_credentials
2384 * Arguments:
2385 ** buffer: buffer in which to place username and password.
2386 * Return value: true if credentials were read, false otherwise.
2387
2388 * Behavior: read a username and password from the user. Store them in
2389 * the buffer, separated by a colon.
2390 * This function returns false in two situations.
2391 * 1. The program is not being run interactively. The error message is
2392 * set to indicate this.
2393 * 2. The user aborted the login process by typing x"x".
2394 * Again, the error message reflects this condition.
2395 */
2396
read_credentials(char * buffer)2397 static bool read_credentials(char *buffer)
2398 {
2399 int input_length = 0;
2400 bool got_creds = false;
2401
2402 if (!isInteractive)
2403 setError(MSG_Authorize2);
2404 else {
2405 i_puts(MSG_WebAuthorize);
2406 input_length =
2407 prompt_and_read(MSG_UserName, buffer, MAXUSERPASS,
2408 MSG_UserNameLong, false);
2409 if (!stringEqual(buffer, "x")) {
2410 char *password_ptr = buffer + input_length + 1;
2411 prompt_and_read(MSG_Password, password_ptr, MAXUSERPASS,
2412 MSG_PasswordLong, true);
2413 if (!stringEqual(password_ptr, "x")) {
2414 got_creds = true;
2415 *(password_ptr - 1) = ':'; /* separate user and password with colon. */
2416 }
2417 }
2418
2419 if (!got_creds)
2420 setError(MSG_LoginAbort);
2421 }
2422
2423 return got_creds;
2424 } /* read_credentials */
2425
2426 /* Callback used by libcurl.
2427 * Gather all the http headers into one long string. */
2428 static size_t
curl_header_callback(char * header_line,size_t size,size_t nmemb,struct i_get * g)2429 curl_header_callback(char *header_line, size_t size, size_t nmemb,
2430 struct i_get *g)
2431 {
2432 const struct MIMETYPE *mt;
2433 size_t bytes_in_line = size * nmemb;
2434 stringAndBytes(&g->headers, &g->headers_len,
2435 header_line, bytes_in_line);
2436
2437 scan_http_headers(g, true);
2438 mt = cf->mt;
2439
2440 // a from-the-web mime type causes a download interrupt
2441 if (g->pg_ok && mt && !(mt->down_url | mt->from_file) &&
2442 !(mt->outtype && g->playonly)) {
2443 g->down_state = 6;
2444 return -1;
2445 }
2446
2447 if (g->down_ok && g->down_state == 0 &&
2448 !(mt && g->pg_ok && mt->down_url && !mt->from_file) &&
2449 g->content[0] && !memEqualCI(g->content, "text/", 5) &&
2450 !memEqualCI(g->content, "application/xhtml+xml", 21)) {
2451 g->down_state = 1;
2452 g->down_msg = MSG_Down;
2453 debugPrint(3, "potential download based on type %s",
2454 g->content);
2455 }
2456
2457 return bytes_in_line;
2458 } /* curl_header_callback */
2459
2460 /* Print text, discarding the unnecessary carriage return character. */
2461 static void
prettify_network_text(const char * text,size_t size,FILE * destination)2462 prettify_network_text(const char *text, size_t size, FILE * destination)
2463 {
2464 size_t i;
2465 for (i = 0; i < size; i++) {
2466 if (text[i] != '\r')
2467 fputc(text[i], destination);
2468 }
2469 } /* prettify_network_text */
2470
2471 /* Print incoming and outgoing headers.
2472 * Incoming headers are prefixed with curl<, and outgoing headers are
2473 * prefixed with curl>
2474 * We may support more of the curl_infotype values soon. */
2475
2476 int
ebcurl_debug_handler(CURL * handle,curl_infotype info_desc,char * data,size_t size,struct i_get * g)2477 ebcurl_debug_handler(CURL * handle, curl_infotype info_desc, char *data,
2478 size_t size, struct i_get *g)
2479 {
2480 FILE *f = debugFile ? debugFile : stdout;
2481
2482 // There's a special case where this function is used
2483 // by the imap client to see if the server is move capable.
2484 if (ismc & isimap && info_desc == CURLINFO_HEADER_IN &&
2485 size > 17 && !strncmp(data, "* CAPABILITY IMAP", 17)) {
2486 char *s;
2487 // data may not be null terminated; can't use strstr
2488 for (s = data; s < data + size - 6; ++s)
2489 if (!strncmp(s, " MOVE", 5) && isspace(s[5])) {
2490 g->move_capable = true;
2491 break;
2492 }
2493 }
2494 if (debugLevel < 4)
2495 return 0;
2496
2497 if (info_desc == CURLINFO_HEADER_OUT) {
2498 fprintf(f, "curl>\n");
2499 prettify_network_text(data, size, f);
2500 } else if (info_desc == CURLINFO_HEADER_IN) {
2501 if (!g->last_curlin)
2502 fprintf(f, "curl<\n");
2503 prettify_network_text(data, size, f);
2504 } else; /* Do nothing. We don't care about this piece of data. */
2505
2506 if (info_desc == CURLINFO_HEADER_IN)
2507 g->last_curlin = true;
2508 else if (info_desc)
2509 g->last_curlin = false;
2510
2511 return 0;
2512 } /* ebcurl_debug_handler */
2513
2514 // At this point, down_state = 1
2515 // Only runs from the foreground thread, does not have to be threadsafe.
setup_download(struct i_get * g)2516 static void setup_download(struct i_get *g)
2517 {
2518 const char *filepart;
2519 const char *answer;
2520 char *fp2, *s;
2521
2522 /* if not run from a terminal then just return. */
2523 if (!isInteractive) {
2524 g->down_state = 0;
2525 return;
2526 }
2527
2528 if (g->cdfn)
2529 filepart = g->cdfn;
2530 else
2531 filepart = getFileURL(g->urlcopy, true);
2532 // transliterate to get rid of /
2533 fp2 = cloneString(filepart);
2534 for (s = fp2; *s; ++s)
2535 if (*s == '/' || *s == '\\')
2536 *s = '_';
2537
2538 top:
2539 answer = getFileName(g->down_msg, fp2, false, true);
2540 /* space for a filename means read into memory */
2541 if (stringEqual(answer, " ")) {
2542 g->down_state = 0; /* in memory download */
2543 nzFree(fp2);
2544 return;
2545 }
2546
2547 if (stringEqual(answer, "x") || stringEqual(answer, "X")) {
2548 g->down_state = -1;
2549 setError(MSG_DownAbort);
2550 nzFree(fp2);
2551 return;
2552 }
2553
2554 if (!envFileDown(answer, &answer)) {
2555 showError();
2556 goto top;
2557 }
2558
2559 g->down_fd = creat(answer, MODE_rw);
2560 if (g->down_fd < 0) {
2561 i_printf(MSG_NoCreate2, answer);
2562 nl();
2563 goto top;
2564 }
2565
2566 nzFree(fp2);
2567
2568 // we will free down_file, but not down_file2
2569 g->down_file = g->down_file2 = cloneString(answer);
2570 if (downDir) {
2571 int l = strlen(downDir);
2572 if (!strncmp(g->down_file2, downDir, l)) {
2573 g->down_file2 += l;
2574 if (g->down_file2[0] == '/')
2575 ++g->down_file2;
2576 }
2577 }
2578
2579 g->down_state = (down_bg ? 5 : 2);
2580 } /* setup_download */
2581
2582 /* show background jobs and return the number of jobs pending */
2583 /* if iponly is true then just show in progress */
bg_jobs(bool iponly)2584 int bg_jobs(bool iponly)
2585 {
2586 bool present = false, part;
2587 int numback = 0;
2588 struct BG_JOB *j;
2589
2590 /* three passes */
2591 /* in progress */
2592 part = false;
2593 foreach(j, down_jobs) {
2594 if (j->state != 4)
2595 continue;
2596 ++numback;
2597 if (!part) {
2598 i_printf(MSG_InProgress);
2599 puts(" {");
2600 part = present = true;
2601 }
2602 printf("%s", j->file + j->file2);
2603 if (j->fsize)
2604 printf(" %d/%zu",
2605 (int)(fileSizeByName(j->file) / CHUNKSIZE),
2606 j->fsize);
2607 nl();
2608 }
2609 if (part)
2610 puts("}");
2611
2612 if (iponly)
2613 return numback;
2614
2615 /* complete */
2616 part = false;
2617 foreach(j, down_jobs) {
2618 if (j->state != 0)
2619 continue;
2620 if (!part) {
2621 i_printf(MSG_Complete);
2622 puts(" {");
2623 part = present = true;
2624 }
2625 puts(j->file + j->file2);
2626 }
2627 if (part)
2628 puts("}");
2629
2630 /* failed */
2631 part = false;
2632 foreach(j, down_jobs) {
2633 if (j->state != -1)
2634 continue;
2635 if (!part) {
2636 i_printf(MSG_Failed);
2637 puts(" {");
2638 part = present = true;
2639 }
2640 puts(j->file + j->file2);
2641 }
2642 if (part)
2643 puts("}");
2644
2645 if (!present)
2646 i_puts(MSG_Empty);
2647
2648 return numback;
2649 }
2650
setCurlURL(CURL * h,const char * url)2651 CURLcode setCurlURL(CURL * h, const char *url)
2652 {
2653 unsigned long verify = mustVerifyHost(url);
2654 const char *proxy = findProxyForURL(url);
2655 const char *agent = findAgentForURL(url);
2656 if (!proxy)
2657 proxy = "";
2658 else
2659 debugPrint(4, "proxy %s", proxy);
2660 curl_easy_setopt(h, CURLOPT_PROXY, proxy);
2661 if (agent) {
2662 debugPrint(4, "agent %s", agent);
2663 curl_easy_setopt(h, CURLOPT_USERAGENT, agent);
2664 }
2665 curl_easy_setopt(h, CURLOPT_SSL_VERIFYPEER, verify);
2666 curl_easy_setopt(h, CURLOPT_SSL_VERIFYHOST, (verify ? 2 : 0));
2667 // certificate file is per handle, not global, so must be set here.
2668 // cookie file is however on the global handle, go figure.
2669 if (sslCerts)
2670 curl_easy_setopt(h, CURLOPT_CAINFO, sslCerts);
2671 return curl_easy_setopt(h, CURLOPT_URL, url);
2672 } /* setCurlURL */
2673
2674 /* expand a frame inline.
2675 * Pass a range of lines; you can expand all the frames in one go.
2676 * Return false if there is a problem fetching a web page,
2677 * or if none of the lines are frames. */
2678 static int frameContractLine(int lineNumber);
2679 static const char *stringInBufLine(const char *s, const char *t);
frameExpand(bool expand,int ln1,int ln2)2680 bool frameExpand(bool expand, int ln1, int ln2)
2681 {
2682 int ln; /* line number */
2683 int problem = 0, p;
2684 bool something_worked = false;
2685
2686 for (ln = ln1; ln <= ln2; ++ln) {
2687 if (expand)
2688 p = frameExpandLine(ln, NULL);
2689 else
2690 p = frameContractLine(ln);
2691 if (p > problem)
2692 problem = p;
2693 if (p == 0)
2694 something_worked = true;
2695 }
2696
2697 if (something_worked && problem < 3)
2698 problem = 0;
2699 if (problem == 1)
2700 setError(expand ? MSG_NoFrame1 : MSG_NoFrame2);
2701 if (problem == 2)
2702 setError(MSG_FrameNoURL);
2703 return (problem == 0);
2704 } /* frameExpand */
2705
2706 /* Problems: 0, frame expanded successfully.
2707 1 line is not a frame.
2708 2 frame doesn't have a valid url.
2709 3 Problem fetching the rul or rendering the page. */
frameExpandLine(int ln,jsobjtype fo)2710 int frameExpandLine(int ln, jsobjtype fo)
2711 {
2712 pst line;
2713 int tagno, start;
2714 const char *s;
2715 char *a;
2716 char *jssrc = 0;
2717 Tag *t;
2718 Frame *save_cf, *new_cf, *last_f;
2719 uchar save_local;
2720 Tag *cdt; // contentDocument tag
2721
2722 if (fo) {
2723 t = tagFromJavaVar(fo);
2724 if (!t)
2725 return 1;
2726 } else {
2727 line = fetchLine(ln, -1);
2728 s = stringInBufLine((char *)line, "Frame ");
2729 if (!s)
2730 return 1;
2731 if ((s = strchr(s, InternalCodeChar)) == NULL)
2732 return 2;
2733 tagno = strtol(s + 1, (char **)&s, 10);
2734 if (tagno < 0 || tagno >= cw->numTags || *s != '{')
2735 return 2;
2736 t = tagList[tagno];
2737 }
2738 if (t->action != TAGACT_FRAME)
2739 return 1;
2740
2741 /* the easy case is if it's already been expanded before, we just unhide it. */
2742 if (t->f1) {
2743 if (!fo)
2744 t->contracted = false;
2745 return 0;
2746 }
2747 // Check with js first, in case it changed.
2748 if (t->jv && (a = get_property_url(t->f0, t->jv, false)) && *a) {
2749 nzFree(t->href);
2750 t->href = a;
2751 }
2752 s = t->href;
2753
2754 // javascript in the src, what is this for?
2755 if (s && !strncmp(s, "javascript:", 11)) {
2756 jssrc = (char *)s;
2757 s = 0;
2758 }
2759
2760 if (!s) {
2761 // No source. If this is your request then return an error.
2762 // But if we're dipping into the objects then it needs to expand
2763 // into a separate window, a separate js space, with an empty body.
2764 if (!fo && !jssrc)
2765 return 2;
2766 // After expansion we need to be able to expand it,
2767 // because there's something there, well maybe.
2768 t->href = cloneString("#");
2769 // jssrc is the old href and we are responsible for it
2770 }
2771
2772 save_cf = cf = t->f0;
2773 /* have to push a new frame before we read the web page */
2774 for (last_f = &(cw->f0); last_f->next; last_f = last_f->next) ;
2775 last_f->next = cf = allocZeroMem(sizeof(Frame));
2776 cf->owner = cw;
2777 cf->frametag = t;
2778 cf->gsn = ++gfsn;
2779 debugPrint(2, "fetch frame %s",
2780 (s ? s : (jssrc ? "javascript" : "empty")));
2781
2782 if (s) {
2783 bool rc = readFileArgv(s, (fo ? 2 : 1));
2784 if (!rc) {
2785 /* serverData was never set, or was freed do to some other error. */
2786 /* We just need to pop the frame and return. */
2787 fileSize = -1; /* don't print 0 */
2788 nzFree(cf->fileName);
2789 free(cf);
2790 last_f->next = 0;
2791 cf = save_cf;
2792 return 3;
2793 }
2794
2795 /*********************************************************************
2796 readFile could return success and yet serverData is null.
2797 This happens if httpConnect did something other than fetching data,
2798 like playing a stream. Does that happen, even in a frame?
2799 It can, if the frame is a youtube video, which is not unusual at all.
2800 So check for serverData null here. Once again we pop the frame.
2801 *********************************************************************/
2802
2803 if (serverData == NULL) {
2804 nzFree(cf->fileName);
2805 free(cf);
2806 last_f->next = 0;
2807 cf = save_cf;
2808 fileSize = -1;
2809 return 0;
2810 }
2811 } else {
2812 serverData = cloneString("<body></body>");
2813 serverDataLen = strlen(serverData);
2814 }
2815
2816 new_cf = cf;
2817 if (changeFileName) {
2818 nzFree(cf->fileName);
2819 cf->fileName = changeFileName;
2820 cf->uriEncoded = true;
2821 changeFileName = 0;
2822 } else {
2823 cf->fileName = cloneString(s);
2824 }
2825
2826 /* don't print the size of what we just fetched */
2827 fileSize = -1;
2828
2829 /* If we got some data it has to be html.
2830 * I should check for that, something like htmlTest in html.c,
2831 * but I'm too lazy to do that right now, so I'll just assume it's good.
2832 * Also, we have verified content-type = text/html, so that's pretty good. */
2833
2834 cf->hbase = cloneString(cf->fileName);
2835 save_local = browseLocal;
2836 browseLocal = !isURL(cf->fileName);
2837 prepareForBrowse(serverData, serverDataLen);
2838 if (javaOK(cf->fileName))
2839 createJavaContext();
2840 nzFree(newlocation); /* should already be 0 */
2841 newlocation = 0;
2842
2843 start = cw->numTags;
2844 /* call the tidy parser to build the html nodes */
2845 html2nodes(serverData, true);
2846 nzFree(serverData); /* don't need it any more */
2847 serverData = 0;
2848 htmlGenerated = false;
2849 // in the edbrowse world, the only child of the frame tag
2850 // is the contentDocument tag.
2851 cdt = t->firstchild;
2852 // the placeholder document node will soon be orphaned.
2853 delete_property(cdt->f0, cdt->jv, "parentNode");
2854 htmlNodesIntoTree(start, cdt);
2855 cdt->step = 0;
2856 prerender(0);
2857
2858 /*********************************************************************
2859 At this point cdt->step is 1; the html tree is built, but not decorated.
2860 Well I put the object on cdt manually. Besides, we don't want to set up
2861 the fake cdt object and the getter that auto-expands the frame,
2862 we did that before and now it's being expanded. So bump step up to 2.
2863 *********************************************************************/
2864 cdt->step = 2;
2865
2866 if (cf->docobj) {
2867 jsobjtype topobj;
2868 decorate(0);
2869 set_basehref(cf->hbase);
2870 // parent points to the containing frame.
2871 set_property_object(cf, cf->winobj, "parent", save_cf->winobj);
2872 // And top points to the top.
2873 cf = save_cf;
2874 topobj = get_property_object(cf, cf->winobj, "top");
2875 cf = new_cf;
2876 set_property_object(cf, cf->winobj, "top", topobj);
2877 set_property_object(cf, cf->winobj, "frameElement", t->jv);
2878 run_function_bool(cf, cf->winobj, "eb$qs$start");
2879 if (jssrc) {
2880 jsRunScript(cf, cf->winobj, jssrc, "frame.src", 1);
2881 }
2882 runScriptsPending(true);
2883 runOnload();
2884 runScriptsPending(false);
2885 set_property_string(cf, cf->docobj, "readyState", "complete");
2886 run_event_bool(cf, cf->docobj, "document", "onreadystatechange");
2887 runScriptsPending(false);
2888 rebuildSelectors();
2889 }
2890 nzFree(jssrc);
2891
2892 if (cf->fileName) {
2893 int j = strlen(cf->fileName);
2894 cf->fileName = reallocMem(cf->fileName, j + 8);
2895 strcat(cf->fileName, ".browse");
2896 }
2897
2898 t->f1 = cf;
2899 cf = save_cf;
2900 browseLocal = save_local;
2901 if (fo)
2902 t->contracted = true;
2903 if (new_cf->docobj) {
2904 jsobjtype cdo; // contentDocument object
2905 jsobjtype cwo; // contentWindow object
2906 jsobjtype cna; // childNodes array
2907 cdo = new_cf->docobj;
2908 disconnectTagObject(cdt);
2909 connectTagObject(cdt, cdo);
2910 cdt->style = 0;
2911 // Should I switch this tag into the new frame? I don't really know.
2912 cdt->f0 = new_cf;
2913 set_property_object(new_cf, t->jv, "content$Document", cdo);
2914 cna = get_property_object(t->f0, t->jv, "childNodes");
2915 set_array_element_object(t->f0, cna, 0, cdo);
2916 // Should we do this? For consistency I guess yes.
2917 set_property_object(t->f0, cdo, "parentNode", t->jv);
2918 cwo = new_cf->winobj;
2919 set_property_object(new_cf, t->jv, "content$Window", cwo);
2920 // run the frame onload function if it is there.
2921 // I assume it should run in the higher frame.
2922 run_event_bool(t->f0, t->jv, t->info->name, "onload");
2923 }
2924
2925 return 0;
2926 } /* frameExpandLine */
2927
frameContractLine(int ln)2928 static int frameContractLine(int ln)
2929 {
2930 Tag *t = line2frame(ln);
2931 if (!t)
2932 return 1;
2933 t->contracted = true;
2934 return 0;
2935 } /* frameContractLine */
2936
line2frame(int ln)2937 Tag *line2frame(int ln)
2938 {
2939 const char *line;
2940 int n, opentag = 0, ln1 = ln;
2941 const char *s;
2942
2943 for (; ln; --ln) {
2944 line = (char *)fetchLine(ln, -1);
2945 if (!opentag && ln < ln1
2946 && (s = stringInBufLine(line, "*--`\n"))) {
2947 for (--s; s > line && *s != InternalCodeChar; --s) ;
2948 if (*s == InternalCodeChar)
2949 opentag = atoi(s + 1);
2950 continue;
2951 }
2952 s = stringInBufLine(line, "*`--\n");
2953 if (!s)
2954 continue;
2955 for (--s; s > line && *s != InternalCodeChar; --s) ;
2956 if (*s != InternalCodeChar)
2957 continue;
2958 n = atoi(s + 1);
2959 if (!opentag)
2960 return tagList[n];
2961 if (n == opentag)
2962 opentag = 0;
2963 }
2964
2965 return 0;
2966 } /* line2frame */
2967
2968 /* a text line in the buffer isn't a string; you can't use strstr */
stringInBufLine(const char * s,const char * t)2969 static const char *stringInBufLine(const char *s, const char *t)
2970 {
2971 int n = strlen(t);
2972 for (; *s != '\n'; ++s) {
2973 if (!strncmp(s, t, n))
2974 return s;
2975 }
2976 return 0;
2977 } /* stringInBufLine */
2978
reexpandFrame(void)2979 bool reexpandFrame(void)
2980 {
2981 int j, start;
2982 Tag *frametag;
2983 Tag *cdt; // contentDocument tag
2984 uchar save_local;
2985 bool rc;
2986 jsobjtype save_top, save_parent, save_fe;
2987
2988 cf = newloc_f;
2989 frametag = cf->frametag;
2990 cdt = frametag->firstchild;
2991 save_top = get_property_object(cf, cf->winobj, "top");
2992 save_parent = get_property_object(cf, cf->winobj, "parent");
2993 save_fe = get_property_object(cf, cf->winobj, "frameElement");
2994
2995 // Cut away our tree nodes from the previous document, which are now inaccessible.
2996 underKill(cdt);
2997
2998 // the previous document node will soon be orphaned.
2999 delete_property(cf, cdt->jv, "parentNode");
3000
3001 delTimers(cf);
3002 freeJavaContext(cf);
3003 nzFree(cf->dw);
3004 cf->dw = 0;
3005 nzFree(cf->hbase);
3006 cf->hbase = 0;
3007 nzFree(cf->fileName);
3008 cf->fileName = newlocation;
3009 newlocation = 0;
3010 cf->uriEncoded = false;
3011 nzFree(cf->firstURL);
3012 cf->firstURL = 0;
3013 rc = readFileArgv(cf->fileName, 2);
3014 if (!rc) {
3015 /* serverData was never set, or was freed do to some other error. */
3016 fileSize = -1; /* don't print 0 */
3017 return false;
3018 }
3019
3020 if (serverData == NULL) {
3021 /* frame replaced itself with a playable stream, what to do? */
3022 fileSize = -1;
3023 return true;
3024 }
3025
3026 if (changeFileName) {
3027 nzFree(cf->fileName);
3028 cf->fileName = changeFileName;
3029 cf->uriEncoded = true;
3030 changeFileName = 0;
3031 }
3032
3033 /* don't print the size of what we just fetched */
3034 fileSize = -1;
3035
3036 cf->hbase = cloneString(cf->fileName);
3037 save_local = browseLocal;
3038 browseLocal = !isURL(cf->fileName);
3039 prepareForBrowse(serverData, serverDataLen);
3040 if (javaOK(cf->fileName))
3041 createJavaContext();
3042
3043 start = cw->numTags;
3044 /* call the tidy parser to build the html nodes */
3045 html2nodes(serverData, true);
3046 nzFree(serverData); /* don't need it any more */
3047 serverData = 0;
3048 htmlGenerated = false;
3049 htmlNodesIntoTree(start, cdt);
3050 cdt->step = 0;
3051 prerender(0);
3052 cdt->step = 2;
3053 if (cf->docobj) {
3054 decorate(0);
3055 set_basehref(cf->hbase);
3056 set_property_object(cf, cf->winobj, "top", save_top);
3057 set_property_object(cf, cf->winobj, "parent", save_parent);
3058 set_property_object(cf, cf->winobj, "frameElement", save_fe);
3059 run_function_bool(cf, cf->winobj, "eb$qs$start");
3060 runScriptsPending(true);
3061 runOnload();
3062 runScriptsPending(false);
3063 set_property_string(cf, cf->docobj, "readyState", "complete");
3064 run_event_bool(cf, cf->docobj, "document", "onreadystatechange");
3065 runScriptsPending(false);
3066 rebuildSelectors();
3067 }
3068
3069 j = strlen(cf->fileName);
3070 cf->fileName = reallocMem(cf->fileName, j + 8);
3071 strcat(cf->fileName, ".browse");
3072 browseLocal = save_local;
3073
3074 if (cf->docobj) {
3075 Frame *save_cf;
3076 jsobjtype cdo; // contentDocument object
3077 jsobjtype cwo; // contentWindow object
3078 jsobjtype cna; // childNodes array
3079 cdo = cf->docobj;
3080 cwo = cf->winobj;
3081 disconnectTagObject(cdt);
3082 connectTagObject(cdt, cdo);
3083 cdt->style = 0;
3084 // Should I switch this tag into the new frame? I don't really know.
3085 cdt->f0 = cf;
3086 // have to point contentDocument to the new document object,
3087 // but that requires a change of context.
3088 save_cf = cf;
3089 cf = frametag->f0;
3090 set_property_object(cf, frametag->jv, "content$Document", cdo);
3091 cna = get_property_object(cf, frametag->jv, "childNodes");
3092 set_array_element_object(cf, cna, 0, cdo);
3093 // Should we do this? For consistency I guess yes.
3094 set_property_object(cf, cdo, "parentNode", frametag->jv);
3095 set_property_object(cf, frametag->jv, "content$Window", cwo);
3096 cf = save_cf;
3097 }
3098
3099 return true;
3100 } /* reexpandFrame */
3101
3102 // Make sure a web page is not trying to read a local file.
frameSecurityFile(const char * thisfile)3103 bool frameSecurityFile(const char *thisfile)
3104 {
3105 Frame *f = &cf->owner->f0;
3106 for (; f != cf; f = f->next) {
3107 if (!isURL(f->fileName))
3108 continue;
3109 setError(MSG_NoAccessSecure, thisfile);
3110 return false;
3111 }
3112 return true;
3113 }
3114
3115 static bool remember_contracted;
3116
3117 // Undo the above,as though the frame were never expanded.
unframe(jsobjtype fobj,jsobjtype newdoc)3118 void unframe(jsobjtype fobj, jsobjtype newdoc)
3119 {
3120 int i, n;
3121 Tag *t, *cdt;
3122 jsobjtype cdo;
3123 Frame *f, *f1;
3124
3125 t = tagFromJavaVar(fobj);
3126 if (!t) {
3127 debugPrint(1, "unframe couldn't find tag");
3128 return;
3129 }
3130 if (!(cdt = t->firstchild) || cdt->action != TAGACT_DOC || cdt->sibling
3131 || !(cdo = cdt->jv)) {
3132 debugPrint(1, "unframe child tag isn't right");
3133 return;
3134 }
3135 underKill(cdt);
3136 disconnectTagObject(cdt);
3137 connectTagObject(cdt, newdoc);
3138
3139 f1 = t->f1;
3140 t->f1 = 0;
3141 remember_contracted = t->contracted;
3142 if (f1 == cf) {
3143 debugPrint(1,
3144 "deleting the current frame, this shouldn't happen, edbrowse is corrupt");
3145 return;
3146 }
3147 for (f = &(cw->f0); f; f = f->next)
3148 if (f->next == f1)
3149 break;
3150 if (!f) {
3151 debugPrint(1, "unframe can't find prior frame to relink");
3152 return;
3153 }
3154 f->next = f1->next;
3155 delTimers(f1);
3156 freeJavaContext(f1);
3157 nzFree(f1->dw);
3158 nzFree(f1->hbase);
3159 nzFree(f1->fileName);
3160 nzFree(f1->firstURL);
3161 free(f1);
3162
3163 // cdt use to belong to f1, which no longer exists.
3164 cdt->f0 = f; // back to its parent frame
3165
3166 // A running frame could create nodes in its parent frame, or any other frame.
3167 n = 0;
3168 for (i = 0; i < cw->numTags; ++i) {
3169 t = tagList[i];
3170 if (t->f0 == f1)
3171 t->f0 = f, ++n;
3172 }
3173 if (n)
3174 debugPrint(3, "%d nodes pushed up to the parent frame", n);
3175 }
3176
unframe2(jsobjtype fobj)3177 void unframe2(jsobjtype fobj)
3178 {
3179 Tag *t = tagFromJavaVar(fobj);
3180 t->contracted = remember_contracted;
3181 }
3182