1 #include <u.h>
2 #include <libc.h>
3 #include <ctype.h>
4 #include <bio.h>
5 #include <ip.h>
6 #include <libsec.h>
7 #include <auth.h>
8 #include <thread.h>
9 
10 typedef struct URL URL;
11 struct URL
12 {
13 	int	method;
14 	char	*host;
15 	char	*port;
16 	char	*page;
17 	char	*etag;
18 	char	*redirect;
19 	char	*postbody;
20 	char	*cred;
21 	long	mtime;
22 };
23 
24 typedef struct Range Range;
25 struct Range
26 {
27 	long	start;	/* only 2 gig supported, tdb */
28 	long	end;
29 };
30 
31 typedef struct Out Out;
32 struct Out
33 {
34 	int fd;
35 	int offset;				/* notional current offset in output */
36 	int written;			/* number of bytes successfully transferred to output */
37 	DigestState *curr;		/* digest state up to offset (if known) */
38 	DigestState *hiwat;		/* digest state of all bytes written */
39 };
40 
41 enum
42 {
43 	Http,
44 	Https,
45 	Ftp,
46 	Other
47 };
48 
49 enum
50 {
51 	Eof = 0,
52 	Error = -1,
53 	Server = -2,
54 	Changed = -3
55 };
56 
57 int debug;
58 char *ofile;
59 
60 
61 int	doftp(URL*, URL*, Range*, Out*, long);
62 int	dohttp(URL*, URL*,  Range*, Out*, long);
63 int	crackurl(URL*, char*);
64 Range*	crackrange(char*);
65 int	getheader(int, char*, int);
66 int	httpheaders(int, int, URL*, Range*);
67 int	httprcode(int);
68 int	cistrncmp(char*, char*, int);
69 int	cistrcmp(char*, char*);
70 void	initibuf(void);
71 int	readline(int, char*, int);
72 int	readibuf(int, char*, int);
73 int	dfprint(int, char*, ...);
74 void	unreadline(char*);
75 int	output(Out*, char*, int);
76 void	setoffset(Out*, int);
77 
78 int	verbose;
79 char	*net;
80 char	tcpdir[NETPATHLEN];
81 int	headerprint;
82 
83 struct {
84 	char	*name;
85 	int	(*f)(URL*, URL*, Range*, Out*, long);
86 } method[] = {
87 	{ "http",	dohttp },
88 	{ "https",	dohttp },
89 	{ "ftp",	doftp },
90 	{ "_______",	nil },
91 };
92 
93 void
usage(void)94 usage(void)
95 {
96 	fprint(2, "usage: %s [-hv] [-o outfile] [-p body] [-x netmtpt] url\n", argv0);
97 	threadexitsall("usage");
98 }
99 
100 void
threadmain(int argc,char ** argv)101 threadmain(int argc, char **argv)
102 {
103 	URL u;
104 	Range r;
105 	int errs, n;
106 	ulong mtime;
107 	Dir *d;
108 	char postbody[4096], *p, *e, *t, *hpx;
109 	URL px; /* Proxy */
110 	Out out;
111 
112 	ofile = nil;
113 	p = postbody;
114 	e = p + sizeof(postbody);
115 	r.start = 0;
116 	r.end = -1;
117 	mtime = 0;
118 	memset(&u, 0, sizeof(u));
119 	memset(&px, 0, sizeof(px));
120 	hpx = getenv("httpproxy");
121 
122 	ARGBEGIN {
123 	case 'o':
124 		ofile = ARGF();
125 		break;
126 	case 'd':
127 		debug = 1;
128 		break;
129 	case 'h':
130 		headerprint = 1;
131 		break;
132 	case 'v':
133 		verbose = 1;
134 		break;
135 	case 'x':
136 		net = ARGF();
137 		if(net == nil)
138 			usage();
139 		break;
140 	case 'p':
141 		t = ARGF();
142 		if(t == nil)
143 			usage();
144 		if(p != postbody)
145 			p = seprint(p, e, "&%s", t);
146 		else
147 			p = seprint(p, e, "%s", t);
148 		u.postbody = postbody;
149 
150 		break;
151 	default:
152 		usage();
153 	} ARGEND;
154 
155 	if(net != nil){
156 		if(strlen(net) > sizeof(tcpdir)-5)
157 			sysfatal("network mount point too long");
158 		snprint(tcpdir, sizeof(tcpdir), "%s/tcp", net);
159 	} else
160 		snprint(tcpdir, sizeof(tcpdir), "tcp");
161 
162 	if(argc != 1)
163 		usage();
164 
165 
166 	out.fd = 1;
167 	out.written = 0;
168 	out.offset = 0;
169 	out.curr = nil;
170 	out.hiwat = nil;
171 	if(ofile != nil){
172 		d = dirstat(ofile);
173 		if(d == nil){
174 			out.fd = create(ofile, OWRITE, 0664);
175 			if(out.fd < 0)
176 				sysfatal("creating %s: %r", ofile);
177 		} else {
178 			out.fd = open(ofile, OWRITE);
179 			if(out.fd < 0)
180 				sysfatal("can't open %s: %r", ofile);
181 			r.start = d->length;
182 			mtime = d->mtime;
183 			free(d);
184 		}
185 	}
186 
187 	errs = 0;
188 
189 	if(crackurl(&u, argv[0]) < 0)
190 		sysfatal("%r");
191 	if(hpx && crackurl(&px, hpx) < 0)
192 		sysfatal("%r");
193 
194 	for(;;){
195 		setoffset(&out, 0);
196 		/* transfer data */
197 		werrstr("");
198 		n = (*method[u.method].f)(&u, &px, &r, &out, mtime);
199 
200 		switch(n){
201 		case Eof:
202 			threadexitsall(0);
203 			break;
204 		case Error:
205 			if(errs++ < 10)
206 				continue;
207 			sysfatal("too many errors with no progress %r");
208 			break;
209 		case Server:
210 			sysfatal("server returned: %r");
211 			break;
212 		}
213 
214 		/* forward progress */
215 		errs = 0;
216 		r.start += n;
217 		if(r.start >= r.end)
218 			break;
219 	}
220 
221 	threadexitsall(0);
222 }
223 
224 int
crackurl(URL * u,char * s)225 crackurl(URL *u, char *s)
226 {
227 	char *p;
228 	int i;
229 
230 	if(u->host != nil){
231 		free(u->host);
232 		u->host = nil;
233 	}
234 	if(u->page != nil){
235 		free(u->page);
236 		u->page = nil;
237 	}
238 
239 	/* get type */
240 	u->method = Other;
241 	for(p = s; *p; p++){
242 		if(*p == '/'){
243 			u->method = Http;
244 			p = s;
245 			break;
246 		}
247 		if(*p == ':' && *(p+1)=='/' && *(p+2)=='/'){
248 			*p = 0;
249 			p += 3;
250 			for(i = 0; i < nelem(method); i++){
251 				if(cistrcmp(s, method[i].name) == 0){
252 					u->method = i;
253 					break;
254 				}
255 			}
256 			break;
257 		}
258 	}
259 
260 	if(u->method == Other){
261 		werrstr("unsupported URL type %s", s);
262 		return -1;
263 	}
264 
265 	/* get system */
266 	s = p;
267 	p = strchr(s, '/');
268 	if(p == nil){
269 		u->host = strdup(s);
270 		u->page = strdup("/");
271 	} else {
272 		u->page = strdup(p);
273 		*p = 0;
274 		u->host = strdup(s);
275 		*p = '/';
276 	}
277 
278 	if(p = strchr(u->host, ':')) {
279 		*p++ = 0;
280 		u->port = p;
281 	} else
282 		u->port = method[u->method].name;
283 
284 	if(*(u->host) == 0){
285 		werrstr("bad url, null host");
286 		return -1;
287 	}
288 
289 	return 0;
290 }
291 
292 char *day[] = {
293 	"Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"
294 };
295 
296 char *month[] = {
297 	"Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
298 };
299 
300 struct
301 {
302 	int	fd;
303 	long	mtime;
304 } note;
305 
306 void
catch(void * v,char * s)307 catch(void *v, char *s)
308 {
309 	Dir d;
310 
311 	USED(v);
312 	USED(s);
313 
314 	nulldir(&d);
315 	d.mtime = note.mtime;
316 	if(dirfwstat(note.fd, &d) < 0)
317 		sysfatal("catch: can't dirfwstat: %r");
318 	noted(NDFLT);
319 }
320 
321 int
dohttp(URL * u,URL * px,Range * r,Out * out,long mtime)322 dohttp(URL *u, URL *px, Range *r, Out *out, long mtime)
323 {
324 	int fd, cfd;
325 	int redirect, auth, loop;
326 	int n, rv, code;
327 	long tot, vtime;
328 	Tm *tm;
329 	char buf[1024];
330 	char err[ERRMAX];
331 
332 
333 	/*  always move back to a previous 512 byte bound because some
334 	 *  servers can't seem to deal with requests that start at the
335 	 *  end of the file
336 	 */
337 	if(r->start)
338 		r->start = ((r->start-1)/512)*512;
339 
340 	/* loop for redirects, requires reading both response code and headers */
341 	fd = -1;
342 	for(loop = 0; loop < 32; loop++){
343 		if(px->host == nil){
344 			fd = dial(netmkaddr(u->host, tcpdir, u->port), 0, 0, 0);
345 		} else {
346 			fd = dial(netmkaddr(px->host, tcpdir, px->port), 0, 0, 0);
347 		}
348 		if(fd < 0)
349 			return Error;
350 
351 		if(u->method == Https){
352 			int tfd;
353 			TLSconn conn;
354 
355 			memset(&conn, 0, sizeof conn);
356 			tfd = tlsClient(fd, &conn);
357 			if(tfd < 0){
358 				fprint(2, "tlsClient: %r\n");
359 				close(fd);
360 				return Error;
361 			}
362 			/* BUG: check cert here? */
363 			if(conn.cert)
364 				free(conn.cert);
365 			close(fd);
366 			fd = tfd;
367 		}
368 
369 		/* write request, use range if not start of file */
370 		if(u->postbody == nil){
371 			if(px->host == nil){
372 				dfprint(fd,	"GET %s HTTP/1.0\r\n"
373 						"Host: %s\r\n"
374 						"User-agent: Plan9/hget\r\n"
375 						"Cache-Control: no-cache\r\n"
376 						"Pragma: no-cache\r\n",
377 						u->page, u->host);
378 			} else {
379 				dfprint(fd,	"GET http://%s%s HTTP/1.0\r\n"
380 						"Host: %s\r\n"
381 						"User-agent: Plan9/hget\r\n"
382 						"Cache-Control: no-cache\r\n"
383 						"Pragma: no-cache\r\n",
384 						u->host, u->page, u->host);
385 			}
386 			if(u->cred)
387 				dfprint(fd,	"Authorization: Basic %s\r\n",
388 						u->cred);
389 		} else {
390 			if(px->host == nil){
391 				dfprint(fd,	"POST %s HTTP/1.0\r\n"
392 						"Host: %s\r\n"
393 						"Content-type: application/x-www-form-urlencoded\r\n"
394 						"Content-length: %d\r\n"
395 						"User-agent: Plan9/hget\r\n"
396 						"\r\n",
397 						u->page, u->host, strlen(u->postbody));
398 			} else {
399 				dfprint(fd, "POST http://%s%s HTTP/1.0\r\n"
400 						"Host: %s\r\n"
401 						"Content-type: application/x-www-form-urlencoded\r\n"
402 						"Content-length: %d\r\n"
403 						"User-agent: Plan9/hget\r\n"
404 						"\r\n",
405 						u->host, u->page, u->host, strlen(u->postbody));
406 			}
407 			dfprint(fd,	"%s", u->postbody);
408 		}
409 		if(r->start != 0){
410 			dfprint(fd, "Range: bytes=%d-\n", r->start);
411 			if(u->etag != nil){
412 				dfprint(fd, "If-range: %s\n", u->etag);
413 			} else {
414 				tm = gmtime(mtime);
415 				dfprint(fd, "If-range: %s, %d %s %d %2d:%2.2d:%2.2d GMT\n",
416 					day[tm->wday], tm->mday, month[tm->mon],
417 					tm->year+1900, tm->hour, tm->min, tm->sec);
418 			}
419 		}
420 		if((cfd = open("/mnt/webcookies/http", ORDWR)) >= 0){
421 			if(fprint(cfd, "http://%s%s", u->host, u->page) > 0){
422 				while((n = read(cfd, buf, sizeof buf)) > 0){
423 					if(debug)
424 						write(2, buf, n);
425 					write(fd, buf, n);
426 				}
427 			}else{
428 				close(cfd);
429 				cfd = -1;
430 			}
431 		}
432 
433 		dfprint(fd, "\r\n", u->host);
434 
435 		auth = 0;
436 		redirect = 0;
437 		initibuf();
438 		code = httprcode(fd);
439 		switch(code){
440 		case Error:	/* connection timed out */
441 		case Eof:
442 			close(fd);
443 			close(cfd);
444 			return code;
445 
446 		case 200:	/* OK */
447 		case 201:	/* Created */
448 		case 202:	/* Accepted */
449 			if(ofile == nil && r->start != 0)
450 				sysfatal("page changed underfoot");
451 			break;
452 
453 		case 204:	/* No Content */
454 			sysfatal("No Content");
455 
456 		case 206:	/* Partial Content */
457 			setoffset(out, r->start);
458 			break;
459 
460 		case 301:	/* Moved Permanently */
461 		case 302:	/* Moved Temporarily */
462 			redirect = 1;
463 			u->postbody = nil;
464 			break;
465 
466 		case 304:	/* Not Modified */
467 			break;
468 
469 		case 400:	/* Bad Request */
470 			sysfatal("Bad Request");
471 
472 		case 401:	/* Unauthorized */
473 			if (auth)
474 				sysfatal("Authentication failed");
475 			auth = 1;
476 			break;
477 
478 		case 402:	/* ??? */
479 			sysfatal("Unauthorized");
480 
481 		case 403:	/* Forbidden */
482 			sysfatal("Forbidden by server");
483 
484 		case 404:	/* Not Found */
485 			sysfatal("Not found on server");
486 
487 		case 407:	/* Proxy Authentication */
488 			sysfatal("Proxy authentication required");
489 
490 		case 500:	/* Internal server error */
491 			sysfatal("Server choked");
492 
493 		case 501:	/* Not implemented */
494 			sysfatal("Server can't do it!");
495 
496 		case 502:	/* Bad gateway */
497 			sysfatal("Bad gateway");
498 
499 		case 503:	/* Service unavailable */
500 			sysfatal("Service unavailable");
501 
502 		default:
503 			sysfatal("Unknown response code %d", code);
504 		}
505 
506 		if(u->redirect != nil){
507 			free(u->redirect);
508 			u->redirect = nil;
509 		}
510 
511 		rv = httpheaders(fd, cfd, u, r);
512 		close(cfd);
513 		if(rv != 0){
514 			close(fd);
515 			return rv;
516 		}
517 
518 		if(!redirect && !auth)
519 			break;
520 
521 		if (redirect){
522 			if(u->redirect == nil)
523 				sysfatal("redirect: no URL");
524 			if(crackurl(u, u->redirect) < 0)
525 				sysfatal("redirect: %r");
526 		}
527 	}
528 
529 	/* transfer whatever you get */
530 	if(ofile != nil && u->mtime != 0){
531 		note.fd = out->fd;
532 		note.mtime = u->mtime;
533 		notify(catch);
534 	}
535 
536 	tot = 0;
537 	vtime = 0;
538 	for(;;){
539 		n = readibuf(fd, buf, sizeof(buf));
540 		if(n <= 0)
541 			break;
542 		if(output(out, buf, n) != n)
543 			break;
544 		tot += n;
545 		if(verbose && (vtime != time(0) || r->start == r->end)) {
546 			vtime = time(0);
547 			fprint(2, "%ld %ld\n", r->start+tot, r->end);
548 		}
549 	}
550 	notify(nil);
551 	close(fd);
552 
553 	if(ofile != nil && u->mtime != 0){
554 		Dir d;
555 
556 		rerrstr(err, sizeof err);
557 		nulldir(&d);
558 		d.mtime = u->mtime;
559 		if(dirfwstat(out->fd, &d) < 0)
560 			fprint(2, "couldn't set mtime: %r\n");
561 		errstr(err, sizeof err);
562 	}
563 
564 	return tot;
565 }
566 
567 /* get the http response code */
568 int
httprcode(int fd)569 httprcode(int fd)
570 {
571 	int n;
572 	char *p;
573 	char buf[256];
574 
575 	n = readline(fd, buf, sizeof(buf)-1);
576 	if(n <= 0)
577 		return n;
578 	if(debug)
579 		fprint(2, "%d <- %s\n", fd, buf);
580 	p = strchr(buf, ' ');
581 	if(strncmp(buf, "HTTP/", 5) != 0 || p == nil){
582 		werrstr("bad response from server");
583 		return -1;
584 	}
585 	buf[n] = 0;
586 	return atoi(p+1);
587 }
588 
589 /* read in and crack the http headers, update u and r */
590 void	hhetag(char*, URL*, Range*);
591 void	hhmtime(char*, URL*, Range*);
592 void	hhclen(char*, URL*, Range*);
593 void	hhcrange(char*, URL*, Range*);
594 void	hhuri(char*, URL*, Range*);
595 void	hhlocation(char*, URL*, Range*);
596 void	hhauth(char*, URL*, Range*);
597 
598 struct {
599 	char *name;
600 	void (*f)(char*, URL*, Range*);
601 } headers[] = {
602 	{ "etag:", hhetag },
603 	{ "last-modified:", hhmtime },
604 	{ "content-length:", hhclen },
605 	{ "content-range:", hhcrange },
606 	{ "uri:", hhuri },
607 	{ "location:", hhlocation },
608 	{ "WWW-Authenticate:", hhauth },
609 };
610 int
httpheaders(int fd,int cfd,URL * u,Range * r)611 httpheaders(int fd, int cfd, URL *u, Range *r)
612 {
613 	char buf[2048];
614 	char *p;
615 	int i, n;
616 
617 	for(;;){
618 		n = getheader(fd, buf, sizeof(buf));
619 		if(n <= 0)
620 			break;
621 		if(cfd >= 0)
622 			fprint(cfd, "%s\n", buf);
623 		for(i = 0; i < nelem(headers); i++){
624 			n = strlen(headers[i].name);
625 			if(cistrncmp(buf, headers[i].name, n) == 0){
626 				/* skip field name and leading white */
627 				p = buf + n;
628 				while(*p == ' ' || *p == '\t')
629 					p++;
630 
631 				(*headers[i].f)(p, u, r);
632 				break;
633 			}
634 		}
635 	}
636 	return n;
637 }
638 
639 /*
640  *  read a single mime header, collect continuations.
641  *
642  *  this routine assumes that there is a blank line twixt
643  *  the header and the message body, otherwise bytes will
644  *  be lost.
645  */
646 int
getheader(int fd,char * buf,int n)647 getheader(int fd, char *buf, int n)
648 {
649 	char *p, *e;
650 	int i;
651 
652 	n--;
653 	p = buf;
654 	for(e = p + n; ; p += i){
655 		i = readline(fd, p, e-p);
656 		if(i < 0)
657 			return i;
658 
659 		if(p == buf){
660 			/* first line */
661 			if(strchr(buf, ':') == nil)
662 				break;		/* end of headers */
663 		} else {
664 			/* continuation line */
665 			if(*p != ' ' && *p != '\t'){
666 				unreadline(p);
667 				*p = 0;
668 				break;		/* end of this header */
669 			}
670 		}
671 	}
672 	if(headerprint)
673 		print("%s\n", buf);
674 
675 	if(debug)
676 		fprint(2, "%d <- %s\n", fd, buf);
677 	return p-buf;
678 }
679 
680 void
hhetag(char * p,URL * u,Range * r)681 hhetag(char *p, URL *u, Range *r)
682 {
683 	USED(r);
684 
685 	if(u->etag != nil){
686 		if(strcmp(u->etag, p) != 0)
687 			sysfatal("file changed underfoot");
688 	} else
689 		u->etag = strdup(p);
690 }
691 
692 char*	monthchars = "janfebmaraprmayjunjulaugsepoctnovdec";
693 
694 void
hhmtime(char * p,URL * u,Range * r)695 hhmtime(char *p, URL *u, Range *r)
696 {
697 	char *month, *day, *yr, *hms;
698 	char *fields[6];
699 	Tm tm, now;
700 	int i;
701 
702 	USED(r);
703 
704 	i = getfields(p, fields, 6, 1, " \t");
705 	if(i < 5)
706 		return;
707 
708 	day = fields[1];
709 	month = fields[2];
710 	yr = fields[3];
711 	hms = fields[4];
712 
713 	/* default time */
714 	now = *gmtime(time(0));
715 	tm = now;
716 	tm.yday = 0;
717 
718 	/* convert ascii month to a number twixt 1 and 12 */
719 	if(*month >= '0' && *month <= '9'){
720 		tm.mon = atoi(month) - 1;
721 		if(tm.mon < 0 || tm.mon > 11)
722 			tm.mon = 5;
723 	} else {
724 		for(p = month; *p; p++)
725 			*p = tolower((uchar)*p);
726 		for(i = 0; i < 12; i++)
727 			if(strncmp(&monthchars[i*3], month, 3) == 0){
728 				tm.mon = i;
729 				break;
730 			}
731 	}
732 
733 	tm.mday = atoi(day);
734 
735 	if(hms) {
736 		tm.hour = strtoul(hms, &p, 10);
737 		if(*p == ':') {
738 			p++;
739 			tm.min = strtoul(p, &p, 10);
740 			if(*p == ':') {
741 				p++;
742 				tm.sec = strtoul(p, &p, 10);
743 			}
744 		}
745 		if(tolower((uchar)*p) == 'p')
746 			tm.hour += 12;
747 	}
748 
749 	if(yr) {
750 		tm.year = atoi(yr);
751 		if(tm.year >= 1900)
752 			tm.year -= 1900;
753 	} else {
754 		if(tm.mon > now.mon || (tm.mon == now.mon && tm.mday > now.mday+1))
755 			tm.year--;
756 	}
757 
758 	strcpy(tm.zone, "GMT");
759 	/* convert to epoch seconds */
760 	u->mtime = tm2sec(&tm);
761 }
762 
763 void
hhclen(char * p,URL * u,Range * r)764 hhclen(char *p, URL *u, Range *r)
765 {
766 	USED(u);
767 
768 	r->end = atoi(p);
769 }
770 
771 void
hhcrange(char * p,URL * u,Range * r)772 hhcrange(char *p, URL *u, Range *r)
773 {
774 	char *x;
775 	vlong l;
776 
777 	USED(u);
778 	l = 0;
779 	x = strchr(p, '/');
780 	if(x)
781 		l = atoll(x+1);
782 	if(l == 0)
783 	x = strchr(p, '-');
784 	if(x)
785 		l = atoll(x+1);
786 	if(l)
787 		r->end = l;
788 }
789 
790 void
hhuri(char * p,URL * u,Range * r)791 hhuri(char *p, URL *u, Range *r)
792 {
793 	USED(r);
794 
795 	if(*p != '<')
796 		return;
797 	u->redirect = strdup(p+1);
798 	p = strchr(u->redirect, '>');
799 	if(p != nil)
800 		*p = 0;
801 }
802 
803 void
hhlocation(char * p,URL * u,Range * r)804 hhlocation(char *p, URL *u, Range *r)
805 {
806 	USED(r);
807 
808 	u->redirect = strdup(p);
809 }
810 
811 void
hhauth(char * p,URL * u,Range * r)812 hhauth(char *p, URL *u, Range *r)
813 {
814 	char *f[4];
815 	UserPasswd *up;
816 	char *s, cred[64];
817 
818 	USED(r);
819 
820 	if (cistrncmp(p, "basic ", 6) != 0)
821 		sysfatal("only Basic authentication supported");
822 
823 	if (gettokens(p, f, nelem(f), "\"") < 2)
824 		sysfatal("garbled auth data");
825 
826 	if ((up = auth_getuserpasswd(auth_getkey, "proto=pass service=http dom=%q relm=%q",
827 	    	u->host, f[1])) == nil)
828 			sysfatal("cannot authenticate");
829 
830 	s = smprint("%s:%s", up->user, up->passwd);
831 	if(enc64(cred, sizeof(cred), (uchar *)s, strlen(s)) == -1)
832 		sysfatal("enc64");
833   		free(s);
834 
835 	assert(u->cred = strdup(cred));
836 }
837 
838 enum
839 {
840 	/* ftp return codes */
841 	Extra=		1,
842 	Success=	2,
843 	Incomplete=	3,
844 	TempFail=	4,
845 	PermFail=	5,
846 
847 	Nnetdir=	64,	/* max length of network directory paths */
848 	Ndialstr=	64		/* max length of dial strings */
849 };
850 
851 int ftpcmd(int, char*, ...);
852 int ftprcode(int, char*, int);
853 int hello(int);
854 int logon(int);
855 int xfertype(int, char*);
856 int passive(int, URL*);
857 int active(int, URL*);
858 int ftpxfer(int, Out*, Range*);
859 int terminateftp(int, int);
860 int getaddrport(char*, uchar*, uchar*);
861 int ftprestart(int, Out*, URL*, Range*, long);
862 
863 int
doftp(URL * u,URL * px,Range * r,Out * out,long mtime)864 doftp(URL *u, URL *px, Range *r, Out *out, long mtime)
865 {
866 	int pid, ctl, data, rv;
867 	Waitmsg *w;
868 	char msg[64];
869 
870 	/* untested, proxy dosn't work with ftp (I think) */
871 	if(px->host == nil){
872 		ctl = dial(netmkaddr(u->host, tcpdir, u->port), 0, 0, 0);
873 	} else {
874 		ctl = dial(netmkaddr(px->host, tcpdir, px->port), 0, 0, 0);
875 	}
876 
877 	if(ctl < 0)
878 		return Error;
879 	if(net == nil)
880 		strcpy(tcpdir, "tcp");
881 
882 	initibuf();
883 
884 	rv = hello(ctl);
885 	if(rv < 0)
886 		return terminateftp(ctl, rv);
887 
888 	rv = logon(ctl);
889 	if(rv < 0)
890 		return terminateftp(ctl, rv);
891 
892 	rv = xfertype(ctl, "I");
893 	if(rv < 0)
894 		return terminateftp(ctl, rv);
895 
896 	/* if file is up to date and the right size, stop */
897 	if(ftprestart(ctl, out, u, r, mtime) > 0){
898 		close(ctl);
899 		return Eof;
900 	}
901 
902 	/* first try passive mode, then active */
903 	data = passive(ctl, u);
904 	if(data < 0){
905 		data = active(ctl, u);
906 		if(data < 0)
907 			return Error;
908 	}
909 
910 	/* fork */
911 	switch(pid = fork()){
912 	case -1:
913 		close(data);
914 		return terminateftp(ctl, Error);
915 	case 0:
916 		ftpxfer(data, out, r);
917 		close(data);
918 		#undef _exits
919 		_exits(0);
920 	default:
921 		close(data);
922 		break;
923 	}
924 
925 	/* wait for reply message */
926 	rv = ftprcode(ctl, msg, sizeof(msg));
927 	close(ctl);
928 
929 	/* wait for process to terminate */
930 	w = nil;
931 	for(;;){
932 		free(w);
933 		w = wait();
934 		if(w == nil)
935 			return Error;
936 		if(w->pid == pid){
937 			if(w->msg[0] == 0){
938 				free(w);
939 				break;
940 			}
941 			werrstr("xfer: %s", w->msg);
942 			free(w);
943 			return Error;
944 		}
945 	}
946 
947 	switch(rv){
948 	case Success:
949 		return Eof;
950 	case TempFail:
951 		return Server;
952 	default:
953 		return Error;
954 	}
955 }
956 
957 int
ftpcmd(int ctl,char * fmt,...)958 ftpcmd(int ctl, char *fmt, ...)
959 {
960 	va_list arg;
961 	char buf[2*1024], *s;
962 
963 	va_start(arg, fmt);
964 	s = vseprint(buf, buf + (sizeof(buf)-4) / sizeof(*buf), fmt, arg);
965 	va_end(arg);
966 	if(debug)
967 		fprint(2, "%d -> %s\n", ctl, buf);
968 	*s++ = '\r';
969 	*s++ = '\n';
970 	if(write(ctl, buf, s - buf) != s - buf)
971 		return -1;
972 	return 0;
973 }
974 
975 int
ftprcode(int ctl,char * msg,int len)976 ftprcode(int ctl, char *msg, int len)
977 {
978 	int rv;
979 	int i;
980 	char *p;
981 
982 	len--;	/* room for terminating null */
983 	for(;;){
984 		*msg = 0;
985 		i = readline(ctl, msg, len);
986 		if(i < 0)
987 			break;
988 		if(debug)
989 			fprint(2, "%d <- %s\n", ctl, msg);
990 
991 		/* stop if not a continuation */
992 		rv = strtol(msg, &p, 10);
993 		if(rv >= 100 && rv < 600 && p==msg+3 && *p == ' ')
994 			return rv/100;
995 	}
996 	*msg = 0;
997 
998 	return -1;
999 }
1000 
1001 int
hello(int ctl)1002 hello(int ctl)
1003 {
1004 	char msg[1024];
1005 
1006 	/* wait for hello from other side */
1007 	if(ftprcode(ctl, msg, sizeof(msg)) != Success){
1008 		werrstr("HELLO: %s", msg);
1009 		return Server;
1010 	}
1011 	return 0;
1012 }
1013 
1014 int
getdec(char * p,int n)1015 getdec(char *p, int n)
1016 {
1017 	int x = 0;
1018 	int i;
1019 
1020 	for(i = 0; i < n; i++)
1021 		x = x*10 + (*p++ - '0');
1022 	return x;
1023 }
1024 
1025 int
ftprestart(int ctl,Out * out,URL * u,Range * r,long mtime)1026 ftprestart(int ctl, Out *out, URL *u, Range *r, long mtime)
1027 {
1028 	Tm tm;
1029 	char msg[1024];
1030 	long x, rmtime;
1031 
1032 	ftpcmd(ctl, "MDTM %s", u->page);
1033 	if(ftprcode(ctl, msg, sizeof(msg)) != Success){
1034 		r->start = 0;
1035 		return 0;		/* need to do something */
1036 	}
1037 
1038 	/* decode modification time */
1039 	if(strlen(msg) < 4 + 4 + 2 + 2 + 2 + 2 + 2){
1040 		r->start = 0;
1041 		return 0;		/* need to do something */
1042 	}
1043 	memset(&tm, 0, sizeof(tm));
1044 	tm.year = getdec(msg+4, 4) - 1900;
1045 	tm.mon = getdec(msg+4+4, 2) - 1;
1046 	tm.mday = getdec(msg+4+4+2, 2);
1047 	tm.hour = getdec(msg+4+4+2+2, 2);
1048 	tm.min = getdec(msg+4+4+2+2+2, 2);
1049 	tm.sec = getdec(msg+4+4+2+2+2+2, 2);
1050 	strcpy(tm.zone, "GMT");
1051 	rmtime = tm2sec(&tm);
1052 	if(rmtime > mtime)
1053 		r->start = 0;
1054 
1055 	/* get size */
1056 	ftpcmd(ctl, "SIZE %s", u->page);
1057 	if(ftprcode(ctl, msg, sizeof(msg)) == Success){
1058 		x = atol(msg+4);
1059 		if(r->start == x)
1060 			return 1;	/* we're up to date */
1061 		r->end = x;
1062 	}
1063 
1064 	/* seek to restart point */
1065 	if(r->start > 0){
1066 		ftpcmd(ctl, "REST %lud", r->start);
1067 		if(ftprcode(ctl, msg, sizeof(msg)) == Incomplete){
1068 			setoffset(out, r->start);
1069 		}else
1070 			r->start = 0;
1071 	}
1072 
1073 	return 0;	/* need to do something */
1074 }
1075 
1076 int
logon(int ctl)1077 logon(int ctl)
1078 {
1079 	char msg[1024];
1080 
1081 	/* login anonymous */
1082 	ftpcmd(ctl, "USER anonymous");
1083 	switch(ftprcode(ctl, msg, sizeof(msg))){
1084 	case Success:
1085 		return 0;
1086 	case Incomplete:
1087 		break;	/* need password */
1088 	default:
1089 		werrstr("USER: %s", msg);
1090 		return Server;
1091 	}
1092 
1093 	/* send user id as password */
1094 	sprint(msg, "%s@closedmind.org", getuser());
1095 	ftpcmd(ctl, "PASS %s", msg);
1096 	if(ftprcode(ctl, msg, sizeof(msg)) != Success){
1097 		werrstr("PASS: %s", msg);
1098 		return Server;
1099 	}
1100 
1101 	return 0;
1102 }
1103 
1104 int
xfertype(int ctl,char * t)1105 xfertype(int ctl, char *t)
1106 {
1107 	char msg[1024];
1108 
1109 	ftpcmd(ctl, "TYPE %s", t);
1110 	if(ftprcode(ctl, msg, sizeof(msg)) != Success){
1111 		werrstr("TYPE %s: %s", t, msg);
1112 		return Server;
1113 	}
1114 
1115 	return 0;
1116 }
1117 
1118 int
passive(int ctl,URL * u)1119 passive(int ctl, URL *u)
1120 {
1121 	char msg[1024];
1122 	char ipaddr[32];
1123 	char *f[6];
1124 	char *p;
1125 	int fd;
1126 	int port;
1127 	char aport[12];
1128 
1129 	ftpcmd(ctl, "PASV");
1130 	if(ftprcode(ctl, msg, sizeof(msg)) != Success)
1131 		return Error;
1132 
1133 	/* get address and port number from reply, this is AI */
1134 	p = strchr(msg, '(');
1135 	if(p == nil){
1136 		for(p = msg+3; *p; p++)
1137 			if(isdigit((uchar)*p))
1138 				break;
1139 	} else
1140 		p++;
1141 	if(getfields(p, f, 6, 0, ",)") < 6){
1142 		werrstr("ftp protocol botch");
1143 		return Server;
1144 	}
1145 	snprint(ipaddr, sizeof(ipaddr), "%s.%s.%s.%s",
1146 		f[0], f[1], f[2], f[3]);
1147 	port = ((atoi(f[4])&0xff)<<8) + (atoi(f[5])&0xff);
1148 	sprint(aport, "%d", port);
1149 
1150 	/* open data connection */
1151 	fd = dial(netmkaddr(ipaddr, tcpdir, aport), 0, 0, 0);
1152 	if(fd < 0){
1153 		werrstr("passive mode failed: %r");
1154 		return Error;
1155 	}
1156 
1157 	/* tell remote to send a file */
1158 	ftpcmd(ctl, "RETR %s", u->page);
1159 	if(ftprcode(ctl, msg, sizeof(msg)) != Extra){
1160 		werrstr("RETR %s: %s", u->page, msg);
1161 		return Error;
1162 	}
1163 	return fd;
1164 }
1165 
1166 int
active(int ctl,URL * u)1167 active(int ctl, URL *u)
1168 {
1169 	char msg[1024];
1170 	char dir[40], ldir[40];
1171 	uchar ipaddr[4];
1172 	uchar port[2];
1173 	int lcfd, dfd, afd;
1174 
1175 	/* announce a port for the call back */
1176 	snprint(msg, sizeof(msg), "%s!*!0", tcpdir);
1177 	afd = announce(msg, dir);
1178 	if(afd < 0)
1179 		return Error;
1180 
1181 	/* get a local address/port of the annoucement */
1182 	if(getaddrport(dir, ipaddr, port) < 0){
1183 		close(afd);
1184 		return Error;
1185 	}
1186 
1187 	/* tell remote side address and port*/
1188 	ftpcmd(ctl, "PORT %d,%d,%d,%d,%d,%d", ipaddr[0], ipaddr[1], ipaddr[2],
1189 		ipaddr[3], port[0], port[1]);
1190 	if(ftprcode(ctl, msg, sizeof(msg)) != Success){
1191 		close(afd);
1192 		werrstr("active: %s", msg);
1193 		return Error;
1194 	}
1195 
1196 	/* tell remote to send a file */
1197 	ftpcmd(ctl, "RETR %s", u->page);
1198 	if(ftprcode(ctl, msg, sizeof(msg)) != Extra){
1199 		close(afd);
1200 		werrstr("RETR: %s", msg);
1201 		return Server;
1202 	}
1203 
1204 	/* wait for a connection */
1205 	lcfd = listen(dir, ldir);
1206 	if(lcfd < 0){
1207 		close(afd);
1208 		return Error;
1209 	}
1210 	dfd = accept(lcfd, ldir);
1211 	if(dfd < 0){
1212 		close(afd);
1213 		close(lcfd);
1214 		return Error;
1215 	}
1216 	close(afd);
1217 	close(lcfd);
1218 
1219 	return dfd;
1220 }
1221 
1222 int
ftpxfer(int in,Out * out,Range * r)1223 ftpxfer(int in, Out *out, Range *r)
1224 {
1225 	char buf[1024];
1226 	long vtime;
1227 	int i, n;
1228 
1229 	vtime = 0;
1230 	for(n = 0;;n += i){
1231 		i = read(in, buf, sizeof(buf));
1232 		if(i == 0)
1233 			break;
1234 		if(i < 0)
1235 			return Error;
1236 		if(output(out, buf, i) != i)
1237 			return Error;
1238 		r->start += i;
1239 		if(verbose && (vtime != time(0) || r->start == r->end)) {
1240 			vtime = time(0);
1241 			fprint(2, "%ld %ld\n", r->start, r->end);
1242 		}
1243 	}
1244 	return n;
1245 }
1246 
1247 int
terminateftp(int ctl,int rv)1248 terminateftp(int ctl, int rv)
1249 {
1250 	close(ctl);
1251 	return rv;
1252 }
1253 
1254 /*
1255  * case insensitive strcmp (why aren't these in libc?)
1256  */
1257 int
cistrncmp(char * a,char * b,int n)1258 cistrncmp(char *a, char *b, int n)
1259 {
1260 	while(n-- > 0){
1261 		if(tolower((uchar)*a++) != tolower((uchar)*b++))
1262 			return -1;
1263 	}
1264 	return 0;
1265 }
1266 
1267 int
cistrcmp(char * a,char * b)1268 cistrcmp(char *a, char *b)
1269 {
1270 	while(*a || *b)
1271 		if(tolower((uchar)*a++) != tolower((uchar)*b++))
1272 			return -1;
1273 
1274 	return 0;
1275 }
1276 
1277 /*
1278  *  buffered io
1279  */
1280 struct
1281 {
1282 	char *rp;
1283 	char *wp;
1284 	char buf[4*1024];
1285 } b;
1286 
1287 void
initibuf(void)1288 initibuf(void)
1289 {
1290 	b.rp = b.wp = b.buf;
1291 }
1292 
1293 /*
1294  *  read a possibly buffered line, strip off trailing while
1295  */
1296 int
readline(int fd,char * buf,int len)1297 readline(int fd, char *buf, int len)
1298 {
1299 	int n;
1300 	char *p;
1301 	int eof = 0;
1302 
1303 	len--;
1304 
1305 	for(p = buf;;){
1306 		if(b.rp >= b.wp){
1307 			n = read(fd, b.wp, sizeof(b.buf)/2);
1308 			if(n < 0)
1309 				return -1;
1310 			if(n == 0){
1311 				eof = 1;
1312 				break;
1313 			}
1314 			b.wp += n;
1315 		}
1316 		n = *b.rp++;
1317 		if(len > 0){
1318 			*p++ = n;
1319 			len--;
1320 		}
1321 		if(n == '\n')
1322 			break;
1323 	}
1324 
1325 	/* drop trailing white */
1326 	for(;;){
1327 		if(p <= buf)
1328 			break;
1329 		n = *(p-1);
1330 		if(n != ' ' && n != '\t' && n != '\r' && n != '\n')
1331 			break;
1332 		p--;
1333 	}
1334 	*p = 0;
1335 
1336 	if(eof && p == buf)
1337 		return -1;
1338 
1339 	return p-buf;
1340 }
1341 
1342 void
unreadline(char * line)1343 unreadline(char *line)
1344 {
1345 	int i, n;
1346 
1347 	i = strlen(line);
1348 	n = b.wp-b.rp;
1349 	memmove(&b.buf[i+1], b.rp, n);
1350 	memmove(b.buf, line, i);
1351 	b.buf[i] = '\n';
1352 	b.rp = b.buf;
1353 	b.wp = b.rp + i + 1 + n;
1354 }
1355 
1356 int
readibuf(int fd,char * buf,int len)1357 readibuf(int fd, char *buf, int len)
1358 {
1359 	int n;
1360 
1361 	n = b.wp-b.rp;
1362 	if(n > 0){
1363 		if(n > len)
1364 			n = len;
1365 		memmove(buf, b.rp, n);
1366 		b.rp += n;
1367 		return n;
1368 	}
1369 	return read(fd, buf, len);
1370 }
1371 
1372 int
dfprint(int fd,char * fmt,...)1373 dfprint(int fd, char *fmt, ...)
1374 {
1375 	char buf[4*1024];
1376 	va_list arg;
1377 
1378 	va_start(arg, fmt);
1379 	vseprint(buf, buf+sizeof(buf), fmt, arg);
1380 	va_end(arg);
1381 	if(debug)
1382 		fprint(2, "%d -> %s", fd, buf);
1383 	return fprint(fd, "%s", buf);
1384 }
1385 
1386 int
getaddrport(char * dir,uchar * ipaddr,uchar * port)1387 getaddrport(char *dir, uchar *ipaddr, uchar *port)
1388 {
1389 	char buf[256];
1390 	int fd, i;
1391 	char *p;
1392 
1393 	snprint(buf, sizeof(buf), "%s/local", dir);
1394 	fd = open(buf, OREAD);
1395 	if(fd < 0)
1396 		return -1;
1397 	i = read(fd, buf, sizeof(buf)-1);
1398 	close(fd);
1399 	if(i <= 0)
1400 		return -1;
1401 	buf[i] = 0;
1402 	p = strchr(buf, '!');
1403 	if(p != nil)
1404 		*p++ = 0;
1405 	v4parseip(ipaddr, buf);
1406 	i = atoi(p);
1407 	port[0] = i>>8;
1408 	port[1] = i;
1409 	return 0;
1410 }
1411 
1412 void
md5free(DigestState * state)1413 md5free(DigestState *state)
1414 {
1415 	uchar x[MD5dlen];
1416 	md5(nil, 0, x, state);
1417 }
1418 
1419 DigestState*
md5dup(DigestState * state)1420 md5dup(DigestState *state)
1421 {
1422 	DigestState *s2;
1423 
1424 	s2 = malloc(sizeof(DigestState));
1425 	if(s2 == nil)
1426 		sysfatal("malloc: %r");
1427 	*s2 = *state;
1428 	s2->malloced = 1;
1429 	return s2;
1430 }
1431 
1432 void
setoffset(Out * out,int offset)1433 setoffset(Out *out, int offset)
1434 {
1435 	md5free(out->curr);
1436 	if(offset == 0)
1437 		out->curr = md5(nil, 0, nil, nil);
1438 	else
1439 		out->curr = nil;
1440 	out->offset = offset;
1441 }
1442 
1443 /*
1444  * write some output, discarding it (but keeping track)
1445  * if we've already written it. if we've gone backwards,
1446  * verify that everything previously written matches
1447  * that which would have been written from the current
1448  * output.
1449  */
1450 int
output(Out * out,char * buf,int nb)1451 output(Out *out, char *buf, int nb)
1452 {
1453 	int n, d;
1454 	uchar m0[MD5dlen], m1[MD5dlen];
1455 
1456 	n = nb;
1457 	d = out->written - out->offset;
1458 	assert(d >= 0);
1459 	if(d > 0){
1460 		if(n < d){
1461 			if(out->curr != nil)
1462 				md5((uchar*)buf, n, nil, out->curr);
1463 			out->offset += n;
1464 			return n;
1465 		}
1466 		if(out->curr != nil){
1467 			md5((uchar*)buf, d, m0, out->curr);
1468 			out->curr = nil;
1469 			md5(nil, 0, m1, md5dup(out->hiwat));
1470 			if(memcmp(m0, m1, MD5dlen) != 0){
1471 				fprint(2, "integrity check failure at offset %d\n", out->written);
1472 				return -1;
1473 			}
1474 		}
1475 		buf += d;
1476 		n -= d;
1477 		out->offset += d;
1478 	}
1479 	if(n > 0){
1480 		out->hiwat = md5((uchar*)buf, n, nil, out->hiwat);
1481 		n = write(out->fd, buf, n);
1482 		if(n > 0){
1483 			out->offset += n;
1484 			out->written += n;
1485 		}
1486 	}
1487 	return n + d;
1488 }
1489