xref: /dragonfly/lib/libfetch/http.c (revision 650094e1)
1 /*-
2  * Copyright (c) 2000-2004 Dag-Erling Co�dan Sm�rgrav
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer
10  *    in this position and unchanged.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. The name of the author may not be used to endorse or promote products
15  *    derived from this software without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  * $FreeBSD: src/lib/libfetch/http.c,v 1.86 2008/12/15 08:27:44 murray Exp $
29  * $DragonFly: src/lib/libfetch/http.c,v 1.4 2007/08/05 21:48:12 swildner Exp $
30  */
31 
32 /*
33  * The following copyright applies to the base64 code:
34  *
35  *-
36  * Copyright 1997 Massachusetts Institute of Technology
37  *
38  * Permission to use, copy, modify, and distribute this software and
39  * its documentation for any purpose and without fee is hereby
40  * granted, provided that both the above copyright notice and this
41  * permission notice appear in all copies, that both the above
42  * copyright notice and this permission notice appear in all
43  * supporting documentation, and that the name of M.I.T. not be used
44  * in advertising or publicity pertaining to distribution of the
45  * software without specific, written prior permission.  M.I.T. makes
46  * no representations about the suitability of this software for any
47  * purpose.  It is provided "as is" without express or implied
48  * warranty.
49  *
50  * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''.  M.I.T. DISCLAIMS
51  * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE,
52  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
53  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT
54  * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
55  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
56  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
57  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
58  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
59  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
60  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61  * SUCH DAMAGE.
62  */
63 
64 #include <sys/param.h>
65 #include <sys/socket.h>
66 #include <sys/time.h>
67 
68 #include <ctype.h>
69 #include <err.h>
70 #include <errno.h>
71 #include <locale.h>
72 #include <netdb.h>
73 #include <stdarg.h>
74 #include <stdio.h>
75 #include <stdlib.h>
76 #include <string.h>
77 #include <time.h>
78 #include <unistd.h>
79 
80 #include <netinet/in.h>
81 #include <netinet/tcp.h>
82 
83 #include "fetch.h"
84 #include "common.h"
85 #include "httperr.h"
86 
87 /* Maximum number of redirects to follow */
88 #define MAX_REDIRECT 5
89 
90 /* Symbolic names for reply codes we care about */
91 #define HTTP_OK			200
92 #define HTTP_PARTIAL		206
93 #define HTTP_MOVED_PERM		301
94 #define HTTP_MOVED_TEMP		302
95 #define HTTP_SEE_OTHER		303
96 #define HTTP_NOT_MODIFIED	304
97 #define HTTP_TEMP_REDIRECT	307
98 #define HTTP_NEED_AUTH		401
99 #define HTTP_NEED_PROXY_AUTH	407
100 #define HTTP_BAD_RANGE		416
101 #define HTTP_PROTOCOL_ERROR	999
102 
103 #define HTTP_REDIRECT(xyz) ((xyz) == HTTP_MOVED_PERM \
104 			    || (xyz) == HTTP_MOVED_TEMP \
105 			    || (xyz) == HTTP_TEMP_REDIRECT \
106 			    || (xyz) == HTTP_SEE_OTHER)
107 
108 #define HTTP_ERROR(xyz) ((xyz) > 400 && (xyz) < 599)
109 
110 
111 /*****************************************************************************
112  * I/O functions for decoding chunked streams
113  */
114 
115 struct httpio
116 {
117 	conn_t		*conn;		/* connection */
118 	int		 chunked;	/* chunked mode */
119 	char		*buf;		/* chunk buffer */
120 	size_t		 bufsize;	/* size of chunk buffer */
121 	ssize_t		 buflen;	/* amount of data currently in buffer */
122 	int		 bufpos;	/* current read offset in buffer */
123 	int		 eof;		/* end-of-file flag */
124 	int		 error;		/* error flag */
125 	size_t		 chunksize;	/* remaining size of current chunk */
126 #ifndef NDEBUG
127 	size_t		 total;
128 #endif
129 };
130 
131 static int http_cmd(conn_t *, const char *, ...) __printflike(2, 3);
132 
133 /*
134  * Get next chunk header
135  */
136 static int
137 http_new_chunk(struct httpio *io)
138 {
139 	char *p;
140 
141 	if (fetch_getln(io->conn) == -1)
142 		return (-1);
143 
144 	if (io->conn->buflen < 2 || !isxdigit((unsigned char)*io->conn->buf))
145 		return (-1);
146 
147 	for (p = io->conn->buf; *p && !isspace((unsigned char)*p); ++p) {
148 		if (*p == ';')
149 			break;
150 		if (!isxdigit((unsigned char)*p))
151 			return (-1);
152 		if (isdigit((unsigned char)*p)) {
153 			io->chunksize = io->chunksize * 16 +
154 			    *p - '0';
155 		} else {
156 			io->chunksize = io->chunksize * 16 +
157 			    10 + tolower((unsigned char)*p) - 'a';
158 		}
159 	}
160 
161 #ifndef NDEBUG
162 	if (fetchDebug) {
163 		io->total += io->chunksize;
164 		if (io->chunksize == 0)
165 			fprintf(stderr, "%s(): end of last chunk\n", __func__);
166 		else
167 			fprintf(stderr, "%s(): new chunk: %lu (%lu)\n",
168 			    __func__, (unsigned long)io->chunksize,
169 			    (unsigned long)io->total);
170 	}
171 #endif
172 
173 	return (io->chunksize);
174 }
175 
176 /*
177  * Grow the input buffer to at least len bytes
178  */
179 static inline int
180 http_growbuf(struct httpio *io, size_t len)
181 {
182 	char *tmp;
183 
184 	if (io->bufsize >= len)
185 		return (0);
186 
187 	if ((tmp = realloc(io->buf, len)) == NULL)
188 		return (-1);
189 	io->buf = tmp;
190 	io->bufsize = len;
191 	return (0);
192 }
193 
194 /*
195  * Fill the input buffer, do chunk decoding on the fly
196  */
197 static int
198 http_fillbuf(struct httpio *io, size_t len)
199 {
200 	if (io->error)
201 		return (-1);
202 	if (io->eof)
203 		return (0);
204 
205 	if (io->chunked == 0) {
206 		if (http_growbuf(io, len) == -1)
207 			return (-1);
208 		if ((io->buflen = fetch_read(io->conn, io->buf, len)) == -1) {
209 			io->error = 1;
210 			return (-1);
211 		}
212 		io->bufpos = 0;
213 		return (io->buflen);
214 	}
215 
216 	if (io->chunksize == 0) {
217 		switch (http_new_chunk(io)) {
218 		case -1:
219 			io->error = 1;
220 			return (-1);
221 		case 0:
222 			io->eof = 1;
223 			return (0);
224 		}
225 	}
226 
227 	if (len > io->chunksize)
228 		len = io->chunksize;
229 	if (http_growbuf(io, len) == -1)
230 		return (-1);
231 	if ((io->buflen = fetch_read(io->conn, io->buf, len)) == -1) {
232 		io->error = 1;
233 		return (-1);
234 	}
235 	io->chunksize -= io->buflen;
236 
237 	if (io->chunksize == 0) {
238 		char endl[2];
239 
240 		if (fetch_read(io->conn, endl, 2) != 2 ||
241 		    endl[0] != '\r' || endl[1] != '\n')
242 			return (-1);
243 	}
244 
245 	io->bufpos = 0;
246 
247 	return (io->buflen);
248 }
249 
250 /*
251  * Read function
252  */
253 static int
254 http_readfn(void *v, char *buf, int len)
255 {
256 	struct httpio *io = (struct httpio *)v;
257 	int l, pos;
258 
259 	if (io->error)
260 		return (-1);
261 	if (io->eof)
262 		return (0);
263 
264 	for (pos = 0; len > 0; pos += l, len -= l) {
265 		/* empty buffer */
266 		if (!io->buf || io->bufpos == io->buflen)
267 			if (http_fillbuf(io, len) < 1)
268 				break;
269 		l = io->buflen - io->bufpos;
270 		if (len < l)
271 			l = len;
272 		memcpy(buf + pos, io->buf + io->bufpos, l);
273 		io->bufpos += l;
274 	}
275 
276 	if (!pos && io->error)
277 		return (-1);
278 	return (pos);
279 }
280 
281 /*
282  * Write function
283  */
284 static int
285 http_writefn(void *v, const char *buf, int len)
286 {
287 	struct httpio *io = (struct httpio *)v;
288 
289 	return (fetch_write(io->conn, buf, len));
290 }
291 
292 /*
293  * Close function
294  */
295 static int
296 http_closefn(void *v)
297 {
298 	struct httpio *io = (struct httpio *)v;
299 	int r;
300 
301 	r = fetch_close(io->conn);
302 	if (io->buf)
303 		free(io->buf);
304 	free(io);
305 	return (r);
306 }
307 
308 /*
309  * Wrap a file descriptor up
310  */
311 static FILE *
312 http_funopen(conn_t *conn, int chunked)
313 {
314 	struct httpio *io;
315 	FILE *f;
316 
317 	if ((io = calloc(1, sizeof(*io))) == NULL) {
318 		fetch_syserr();
319 		return (NULL);
320 	}
321 	io->conn = conn;
322 	io->chunked = chunked;
323 	f = funopen(io, http_readfn, http_writefn, NULL, http_closefn);
324 	if (f == NULL) {
325 		fetch_syserr();
326 		free(io);
327 		return (NULL);
328 	}
329 	return (f);
330 }
331 
332 
333 /*****************************************************************************
334  * Helper functions for talking to the server and parsing its replies
335  */
336 
337 /* Header types */
338 typedef enum {
339 	hdr_syserror = -2,
340 	hdr_error = -1,
341 	hdr_end = 0,
342 	hdr_unknown = 1,
343 	hdr_content_length,
344 	hdr_content_range,
345 	hdr_last_modified,
346 	hdr_location,
347 	hdr_transfer_encoding,
348 	hdr_www_authenticate
349 } hdr_t;
350 
351 /* Names of interesting headers */
352 static struct {
353 	hdr_t		 num;
354 	const char	*name;
355 } hdr_names[] = {
356 	{ hdr_content_length,		"Content-Length" },
357 	{ hdr_content_range,		"Content-Range" },
358 	{ hdr_last_modified,		"Last-Modified" },
359 	{ hdr_location,			"Location" },
360 	{ hdr_transfer_encoding,	"Transfer-Encoding" },
361 	{ hdr_www_authenticate,		"WWW-Authenticate" },
362 	{ hdr_unknown,			NULL },
363 };
364 
365 /*
366  * Send a formatted line; optionally echo to terminal
367  */
368 static int
369 http_cmd(conn_t *conn, const char *fmt, ...)
370 {
371 	va_list ap;
372 	size_t len;
373 	char *msg;
374 	int r;
375 
376 	va_start(ap, fmt);
377 	len = vasprintf(&msg, fmt, ap);
378 	va_end(ap);
379 
380 	if (msg == NULL) {
381 		errno = ENOMEM;
382 		fetch_syserr();
383 		return (-1);
384 	}
385 
386 	r = fetch_putln(conn, msg, len);
387 	free(msg);
388 
389 	if (r == -1) {
390 		fetch_syserr();
391 		return (-1);
392 	}
393 
394 	return (0);
395 }
396 
397 /*
398  * Get and parse status line
399  */
400 static int
401 http_get_reply(conn_t *conn)
402 {
403 	char *p;
404 
405 	if (fetch_getln(conn) == -1)
406 		return (-1);
407 	/*
408 	 * A valid status line looks like "HTTP/m.n xyz reason" where m
409 	 * and n are the major and minor protocol version numbers and xyz
410 	 * is the reply code.
411 	 * Unfortunately, there are servers out there (NCSA 1.5.1, to name
412 	 * just one) that do not send a version number, so we can't rely
413 	 * on finding one, but if we do, insist on it being 1.0 or 1.1.
414 	 * We don't care about the reason phrase.
415 	 */
416 	if (strncmp(conn->buf, "HTTP", 4) != 0)
417 		return (HTTP_PROTOCOL_ERROR);
418 	p = conn->buf + 4;
419 	if (*p == '/') {
420 		if (p[1] != '1' || p[2] != '.' || (p[3] != '0' && p[3] != '1'))
421 			return (HTTP_PROTOCOL_ERROR);
422 		p += 4;
423 	}
424 	if (*p != ' ' ||
425 	    !isdigit((unsigned char)p[1]) ||
426 	    !isdigit((unsigned char)p[2]) ||
427 	    !isdigit((unsigned char)p[3]))
428 		return (HTTP_PROTOCOL_ERROR);
429 
430 	conn->err = (p[1] - '0') * 100 + (p[2] - '0') * 10 + (p[3] - '0');
431 	return (conn->err);
432 }
433 
434 /*
435  * Check a header; if the type matches the given string, return a pointer
436  * to the beginning of the value.
437  */
438 static const char *
439 http_match(const char *str, const char *hdr)
440 {
441 	while (*str && *hdr &&
442 	    tolower((unsigned char)*str++) == tolower((unsigned char)*hdr++))
443 		/* nothing */;
444 	if (*str || *hdr != ':')
445 		return (NULL);
446 	while (*hdr && isspace((unsigned char)*++hdr))
447 		/* nothing */;
448 	return (hdr);
449 }
450 
451 /*
452  * Get the next header and return the appropriate symbolic code.
453  */
454 static hdr_t
455 http_next_header(conn_t *conn, const char **p)
456 {
457 	int i;
458 
459 	if (fetch_getln(conn) == -1)
460 		return (hdr_syserror);
461 	while (conn->buflen && isspace((unsigned char)conn->buf[conn->buflen - 1]))
462 		conn->buflen--;
463 	conn->buf[conn->buflen] = '\0';
464 	if (conn->buflen == 0)
465 		return (hdr_end);
466 	/*
467 	 * We could check for malformed headers but we don't really care.
468 	 * A valid header starts with a token immediately followed by a
469 	 * colon; a token is any sequence of non-control, non-whitespace
470 	 * characters except "()<>@,;:\\\"{}".
471 	 */
472 	for (i = 0; hdr_names[i].num != hdr_unknown; i++)
473 		if ((*p = http_match(hdr_names[i].name, conn->buf)) != NULL)
474 			return (hdr_names[i].num);
475 	return (hdr_unknown);
476 }
477 
478 /*
479  * Parse a last-modified header
480  */
481 static int
482 http_parse_mtime(const char *p, time_t *mtime)
483 {
484 	char locale[64], *r;
485 	struct tm tm;
486 
487 	strncpy(locale, setlocale(LC_TIME, NULL), sizeof(locale));
488 	setlocale(LC_TIME, "C");
489 	r = strptime(p, "%a, %d %b %Y %H:%M:%S GMT", &tm);
490 	/* XXX should add support for date-2 and date-3 */
491 	setlocale(LC_TIME, locale);
492 	if (r == NULL)
493 		return (-1);
494 	DEBUG(fprintf(stderr, "last modified: [%04d-%02d-%02d "
495 		  "%02d:%02d:%02d]\n",
496 		  tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday,
497 		  tm.tm_hour, tm.tm_min, tm.tm_sec));
498 	*mtime = timegm(&tm);
499 	return (0);
500 }
501 
502 /*
503  * Parse a content-length header
504  */
505 static int
506 http_parse_length(const char *p, off_t *length)
507 {
508 	off_t len;
509 
510 	for (len = 0; *p && isdigit((unsigned char)*p); ++p)
511 		len = len * 10 + (*p - '0');
512 	if (*p)
513 		return (-1);
514 	DEBUG(fprintf(stderr, "content length: [%lld]\n",
515 	    (long long)len));
516 	*length = len;
517 	return (0);
518 }
519 
520 /*
521  * Parse a content-range header
522  */
523 static int
524 http_parse_range(const char *p, off_t *offset, off_t *length, off_t *size)
525 {
526 	off_t first, last, len;
527 
528 	if (strncasecmp(p, "bytes ", 6) != 0)
529 		return (-1);
530 	p += 6;
531 	if (*p == '*') {
532 		first = last = -1;
533 		++p;
534 	} else {
535 		for (first = 0; *p && isdigit((unsigned char)*p); ++p)
536 			first = first * 10 + *p - '0';
537 		if (*p != '-')
538 			return (-1);
539 		for (last = 0, ++p; *p && isdigit((unsigned char)*p); ++p)
540 			last = last * 10 + *p - '0';
541 	}
542 	if (first > last || *p != '/')
543 		return (-1);
544 	for (len = 0, ++p; *p && isdigit((unsigned char)*p); ++p)
545 		len = len * 10 + *p - '0';
546 	if (*p || len < last - first + 1)
547 		return (-1);
548 	if (first == -1) {
549 		DEBUG(fprintf(stderr, "content range: [*/%lld]\n",
550 		    (long long)len));
551 		*length = 0;
552 	} else {
553 		DEBUG(fprintf(stderr, "content range: [%lld-%lld/%lld]\n",
554 		    (long long)first, (long long)last, (long long)len));
555 		*length = last - first + 1;
556 	}
557 	*offset = first;
558 	*size = len;
559 	return (0);
560 }
561 
562 
563 /*****************************************************************************
564  * Helper functions for authorization
565  */
566 
567 /*
568  * Base64 encoding
569  */
570 static char *
571 http_base64(const char *src)
572 {
573 	static const char base64[] =
574 	    "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
575 	    "abcdefghijklmnopqrstuvwxyz"
576 	    "0123456789+/";
577 	char *str, *dst;
578 	size_t l;
579 	int t, r;
580 
581 	l = strlen(src);
582 	if ((str = malloc(((l + 2) / 3) * 4 + 1)) == NULL)
583 		return (NULL);
584 	dst = str;
585 	r = 0;
586 
587 	while (l >= 3) {
588 		t = (src[0] << 16) | (src[1] << 8) | src[2];
589 		dst[0] = base64[(t >> 18) & 0x3f];
590 		dst[1] = base64[(t >> 12) & 0x3f];
591 		dst[2] = base64[(t >> 6) & 0x3f];
592 		dst[3] = base64[(t >> 0) & 0x3f];
593 		src += 3; l -= 3;
594 		dst += 4; r += 4;
595 	}
596 
597 	switch (l) {
598 	case 2:
599 		t = (src[0] << 16) | (src[1] << 8);
600 		dst[0] = base64[(t >> 18) & 0x3f];
601 		dst[1] = base64[(t >> 12) & 0x3f];
602 		dst[2] = base64[(t >> 6) & 0x3f];
603 		dst[3] = '=';
604 		dst += 4;
605 		r += 4;
606 		break;
607 	case 1:
608 		t = src[0] << 16;
609 		dst[0] = base64[(t >> 18) & 0x3f];
610 		dst[1] = base64[(t >> 12) & 0x3f];
611 		dst[2] = dst[3] = '=';
612 		dst += 4;
613 		r += 4;
614 		break;
615 	case 0:
616 		break;
617 	}
618 
619 	*dst = 0;
620 	return (str);
621 }
622 
623 /*
624  * Encode username and password
625  */
626 static int
627 http_basic_auth(conn_t *conn, const char *hdr, const char *usr, const char *pwd)
628 {
629 	char *upw, *auth;
630 	int r;
631 
632 	DEBUG(fprintf(stderr, "usr: [%s]\n", usr));
633 	DEBUG(fprintf(stderr, "pwd: [%s]\n", pwd));
634 	if (asprintf(&upw, "%s:%s", usr, pwd) == -1)
635 		return (-1);
636 	auth = http_base64(upw);
637 	free(upw);
638 	if (auth == NULL)
639 		return (-1);
640 	r = http_cmd(conn, "%s: Basic %s", hdr, auth);
641 	free(auth);
642 	return (r);
643 }
644 
645 /*
646  * Send an authorization header
647  */
648 static int
649 http_authorize(conn_t *conn, const char *hdr, const char *p)
650 {
651 	/* basic authorization */
652 	if (strncasecmp(p, "basic:", 6) == 0) {
653 		char *user, *pwd, *str;
654 		int r;
655 
656 		/* skip realm */
657 		for (p += 6; *p && *p != ':'; ++p)
658 			/* nothing */ ;
659 		if (!*p || strchr(++p, ':') == NULL)
660 			return (-1);
661 		if ((str = strdup(p)) == NULL)
662 			return (-1); /* XXX */
663 		user = str;
664 		pwd = strchr(str, ':');
665 		*pwd++ = '\0';
666 		r = http_basic_auth(conn, hdr, user, pwd);
667 		free(str);
668 		return (r);
669 	}
670 	return (-1);
671 }
672 
673 
674 /*****************************************************************************
675  * Helper functions for connecting to a server or proxy
676  */
677 
678 /*
679  * Connect to the correct HTTP server or proxy.
680  */
681 static conn_t *
682 http_connect(struct url *URL, struct url *purl, const char *flags)
683 {
684 	conn_t *conn;
685 	int verbose;
686 	int af, val;
687 
688 #ifdef INET6
689 	af = AF_UNSPEC;
690 #else
691 	af = AF_INET;
692 #endif
693 
694 	verbose = CHECK_FLAG('v');
695 	if (CHECK_FLAG('4'))
696 		af = AF_INET;
697 #ifdef INET6
698 	else if (CHECK_FLAG('6'))
699 		af = AF_INET6;
700 #endif
701 
702 	if (purl && strcasecmp(URL->scheme, SCHEME_HTTPS) != 0) {
703 		URL = purl;
704 	} else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) {
705 		/* can't talk http to an ftp server */
706 		/* XXX should set an error code */
707 		return (NULL);
708 	}
709 
710 	if ((conn = fetch_connect(URL->host, URL->port, af, verbose)) == NULL)
711 		/* fetch_connect() has already set an error code */
712 		return (NULL);
713 	if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0 &&
714 	    fetch_ssl(conn, verbose) == -1) {
715 		fetch_close(conn);
716 		/* grrr */
717 		errno = EAUTH;
718 		fetch_syserr();
719 		return (NULL);
720 	}
721 
722 	val = 1;
723 	setsockopt(conn->sd, IPPROTO_TCP, TCP_NOPUSH, &val, sizeof(val));
724 
725 	return (conn);
726 }
727 
728 static struct url *
729 http_get_proxy(struct url * url, const char *flags)
730 {
731 	struct url *purl;
732 	char *p;
733 
734 	if (flags != NULL && strchr(flags, 'd') != NULL)
735 		return (NULL);
736 	if (fetch_no_proxy_match(url->host))
737 		return (NULL);
738 	if (((p = getenv("HTTP_PROXY")) || (p = getenv("http_proxy"))) &&
739 	    *p && (purl = fetchParseURL(p))) {
740 		if (!*purl->scheme)
741 			strcpy(purl->scheme, SCHEME_HTTP);
742 		if (!purl->port)
743 			purl->port = fetch_default_proxy_port(purl->scheme);
744 		if (strcasecmp(purl->scheme, SCHEME_HTTP) == 0)
745 			return (purl);
746 		fetchFreeURL(purl);
747 	}
748 	return (NULL);
749 }
750 
751 static void
752 http_print_html(FILE *out, FILE *in)
753 {
754 	size_t len;
755 	char *line, *p, *q;
756 	int comment, tag;
757 
758 	comment = tag = 0;
759 	while ((line = fgetln(in, &len)) != NULL) {
760 		while (len && isspace((unsigned char)line[len - 1]))
761 			--len;
762 		for (p = q = line; q < line + len; ++q) {
763 			if (comment && *q == '-') {
764 				if (q + 2 < line + len &&
765 				    strcmp(q, "-->") == 0) {
766 					tag = comment = 0;
767 					q += 2;
768 				}
769 			} else if (tag && !comment && *q == '>') {
770 				p = q + 1;
771 				tag = 0;
772 			} else if (!tag && *q == '<') {
773 				if (q > p)
774 					fwrite(p, q - p, 1, out);
775 				tag = 1;
776 				if (q + 3 < line + len &&
777 				    strcmp(q, "<!--") == 0) {
778 					comment = 1;
779 					q += 3;
780 				}
781 			}
782 		}
783 		if (!tag && q > p)
784 			fwrite(p, q - p, 1, out);
785 		fputc('\n', out);
786 	}
787 }
788 
789 
790 /*****************************************************************************
791  * Core
792  */
793 
794 /*
795  * Send a request and process the reply
796  *
797  * XXX This function is way too long, the do..while loop should be split
798  * XXX off into a separate function.
799  */
800 FILE *
801 http_request(struct url *URL, const char *op, struct url_stat *us,
802     struct url *purl, const char *flags)
803 {
804 	char timebuf[80];
805 	char hbuf[MAXHOSTNAMELEN + 7], *host;
806 	conn_t *conn;
807 	struct url *url, *new;
808 	int chunked, direct, ims, need_auth, noredirect, verbose;
809 	int e, i, n, val;
810 	off_t offset, clength, length, size;
811 	time_t mtime;
812 	const char *p;
813 	FILE *f;
814 	hdr_t h;
815 	struct tm *timestruct;
816 
817 	direct = CHECK_FLAG('d');
818 	noredirect = CHECK_FLAG('A');
819 	verbose = CHECK_FLAG('v');
820 	ims = CHECK_FLAG('i');
821 
822 	if (direct && purl) {
823 		fetchFreeURL(purl);
824 		purl = NULL;
825 	}
826 
827 	/* try the provided URL first */
828 	url = URL;
829 
830 	/* if the A flag is set, we only get one try */
831 	n = noredirect ? 1 : MAX_REDIRECT;
832 	i = 0;
833 
834 	e = HTTP_PROTOCOL_ERROR;
835 	need_auth = 0;
836 	do {
837 		new = NULL;
838 		chunked = 0;
839 		offset = 0;
840 		clength = -1;
841 		length = -1;
842 		size = -1;
843 		mtime = 0;
844 
845 		/* check port */
846 		if (!url->port)
847 			url->port = fetch_default_port(url->scheme);
848 
849 		/* were we redirected to an FTP URL? */
850 		if (purl == NULL && strcmp(url->scheme, SCHEME_FTP) == 0) {
851 			if (strcmp(op, "GET") == 0)
852 				return (ftp_request(url, "RETR", us, purl, flags));
853 			else if (strcmp(op, "HEAD") == 0)
854 				return (ftp_request(url, "STAT", us, purl, flags));
855 		}
856 
857 		/* connect to server or proxy */
858 		if ((conn = http_connect(url, purl, flags)) == NULL)
859 			goto ouch;
860 
861 		host = url->host;
862 #ifdef INET6
863 		if (strchr(url->host, ':')) {
864 			snprintf(hbuf, sizeof(hbuf), "[%s]", url->host);
865 			host = hbuf;
866 		}
867 #endif
868 		if (url->port != fetch_default_port(url->scheme)) {
869 			if (host != hbuf) {
870 				strcpy(hbuf, host);
871 				host = hbuf;
872 			}
873 			snprintf(hbuf + strlen(hbuf),
874 			    sizeof(hbuf) - strlen(hbuf), ":%d", url->port);
875 		}
876 
877 		/* send request */
878 		if (verbose)
879 			fetch_info("requesting %s://%s%s",
880 			    url->scheme, host, url->doc);
881 		if (purl) {
882 			http_cmd(conn, "%s %s://%s%s HTTP/1.1",
883 			    op, url->scheme, host, url->doc);
884 		} else {
885 			http_cmd(conn, "%s %s HTTP/1.1",
886 			    op, url->doc);
887 		}
888 
889 		if (ims && url->ims_time) {
890 			timestruct = gmtime((time_t *)&url->ims_time);
891 			(void)strftime(timebuf, 80, "%a, %d %b %Y %T GMT",
892 			    timestruct);
893 			if (verbose)
894 				fetch_info("If-Modified-Since: %s", timebuf);
895 			http_cmd(conn, "If-Modified-Since: %s", timebuf);
896 		}
897 		/* virtual host */
898 		http_cmd(conn, "Host: %s", host);
899 
900 		/* proxy authorization */
901 		if (purl) {
902 			if (*purl->user || *purl->pwd)
903 				http_basic_auth(conn, "Proxy-Authorization",
904 				    purl->user, purl->pwd);
905 			else if ((p = getenv("HTTP_PROXY_AUTH")) != NULL && *p != '\0')
906 				http_authorize(conn, "Proxy-Authorization", p);
907 		}
908 
909 		/* server authorization */
910 		if (need_auth || *url->user || *url->pwd) {
911 			if (*url->user || *url->pwd)
912 				http_basic_auth(conn, "Authorization", url->user, url->pwd);
913 			else if ((p = getenv("HTTP_AUTH")) != NULL && *p != '\0')
914 				http_authorize(conn, "Authorization", p);
915 			else if (fetchAuthMethod && fetchAuthMethod(url) == 0) {
916 				http_basic_auth(conn, "Authorization", url->user, url->pwd);
917 			} else {
918 				http_seterr(HTTP_NEED_AUTH);
919 				goto ouch;
920 			}
921 		}
922 
923 		/* other headers */
924 		if ((p = getenv("HTTP_REFERER")) != NULL && *p != '\0') {
925 			if (strcasecmp(p, "auto") == 0)
926 				http_cmd(conn, "Referer: %s://%s%s",
927 				    url->scheme, host, url->doc);
928 			else
929 				http_cmd(conn, "Referer: %s", p);
930 		}
931 		if ((p = getenv("HTTP_USER_AGENT")) != NULL && *p != '\0')
932 			http_cmd(conn, "User-Agent: %s", p);
933 		else
934 			http_cmd(conn, "User-Agent: %s " _LIBFETCH_VER, getprogname());
935 		if (url->offset > 0)
936 			http_cmd(conn, "Range: bytes=%lld-", (long long)url->offset);
937 		http_cmd(conn, "Connection: close");
938 		http_cmd(conn, "%s", "");
939 
940 		/*
941 		 * Force the queued request to be dispatched.  Normally, one
942 		 * would do this with shutdown(2) but squid proxies can be
943 		 * configured to disallow such half-closed connections.  To
944 		 * be compatible with such configurations, fiddle with socket
945 		 * options to force the pending data to be written.
946 		 */
947 		val = 0;
948 		setsockopt(conn->sd, IPPROTO_TCP, TCP_NOPUSH, &val,
949 			   sizeof(val));
950 		val = 1;
951 		setsockopt(conn->sd, IPPROTO_TCP, TCP_NODELAY, &val,
952 			   sizeof(val));
953 
954 		/* get reply */
955 		switch (http_get_reply(conn)) {
956 		case HTTP_OK:
957 		case HTTP_PARTIAL:
958 		case HTTP_NOT_MODIFIED:
959 			/* fine */
960 			break;
961 		case HTTP_MOVED_PERM:
962 		case HTTP_MOVED_TEMP:
963 		case HTTP_SEE_OTHER:
964 			/*
965 			 * Not so fine, but we still have to read the
966 			 * headers to get the new location.
967 			 */
968 			break;
969 		case HTTP_NEED_AUTH:
970 			if (need_auth) {
971 				/*
972 				 * We already sent out authorization code,
973 				 * so there's nothing more we can do.
974 				 */
975 				http_seterr(conn->err);
976 				goto ouch;
977 			}
978 			/* try again, but send the password this time */
979 			if (verbose)
980 				fetch_info("server requires authorization");
981 			break;
982 		case HTTP_NEED_PROXY_AUTH:
983 			/*
984 			 * If we're talking to a proxy, we already sent
985 			 * our proxy authorization code, so there's
986 			 * nothing more we can do.
987 			 */
988 			http_seterr(conn->err);
989 			goto ouch;
990 		case HTTP_BAD_RANGE:
991 			/*
992 			 * This can happen if we ask for 0 bytes because
993 			 * we already have the whole file.  Consider this
994 			 * a success for now, and check sizes later.
995 			 */
996 			break;
997 		case HTTP_PROTOCOL_ERROR:
998 			/* fall through */
999 		case -1:
1000 			fetch_syserr();
1001 			goto ouch;
1002 		default:
1003 			http_seterr(conn->err);
1004 			if (!verbose)
1005 				goto ouch;
1006 			/* fall through so we can get the full error message */
1007 		}
1008 
1009 		/* get headers */
1010 		do {
1011 			switch ((h = http_next_header(conn, &p))) {
1012 			case hdr_syserror:
1013 				fetch_syserr();
1014 				goto ouch;
1015 			case hdr_error:
1016 				http_seterr(HTTP_PROTOCOL_ERROR);
1017 				goto ouch;
1018 			case hdr_content_length:
1019 				http_parse_length(p, &clength);
1020 				break;
1021 			case hdr_content_range:
1022 				http_parse_range(p, &offset, &length, &size);
1023 				break;
1024 			case hdr_last_modified:
1025 				http_parse_mtime(p, &mtime);
1026 				break;
1027 			case hdr_location:
1028 				if (!HTTP_REDIRECT(conn->err))
1029 					break;
1030 				if (new)
1031 					free(new);
1032 				if (verbose)
1033 					fetch_info("%d redirect to %s", conn->err, p);
1034 				if (*p == '/')
1035 					/* absolute path */
1036 					new = fetchMakeURL(url->scheme, url->host, url->port, p,
1037 					    url->user, url->pwd);
1038 				else
1039 					new = fetchParseURL(p);
1040 				if (new == NULL) {
1041 					/* XXX should set an error code */
1042 					DEBUG(fprintf(stderr, "failed to parse new URL\n"));
1043 					goto ouch;
1044 				}
1045 				if (!*new->user && !*new->pwd) {
1046 					strcpy(new->user, url->user);
1047 					strcpy(new->pwd, url->pwd);
1048 				}
1049 				new->offset = url->offset;
1050 				new->length = url->length;
1051 				break;
1052 			case hdr_transfer_encoding:
1053 				/* XXX weak test*/
1054 				chunked = (strcasecmp(p, "chunked") == 0);
1055 				break;
1056 			case hdr_www_authenticate:
1057 				if (conn->err != HTTP_NEED_AUTH)
1058 					break;
1059 				/* if we were smarter, we'd check the method and realm */
1060 				break;
1061 			case hdr_end:
1062 				/* fall through */
1063 			case hdr_unknown:
1064 				/* ignore */
1065 				break;
1066 			}
1067 		} while (h > hdr_end);
1068 
1069 		/* we need to provide authentication */
1070 		if (conn->err == HTTP_NEED_AUTH) {
1071 			e = conn->err;
1072 			need_auth = 1;
1073 			fetch_close(conn);
1074 			conn = NULL;
1075 			continue;
1076 		}
1077 
1078 		/* requested range not satisfiable */
1079 		if (conn->err == HTTP_BAD_RANGE) {
1080 			if (url->offset == size && url->length == 0) {
1081 				/* asked for 0 bytes; fake it */
1082 				offset = url->offset;
1083 				clength = -1;
1084 				conn->err = HTTP_OK;
1085 				break;
1086 			} else {
1087 				http_seterr(conn->err);
1088 				goto ouch;
1089 			}
1090 		}
1091 
1092 		/* we have a hit or an error */
1093 		if (conn->err == HTTP_OK
1094 		    || conn->err == HTTP_NOT_MODIFIED
1095 		    || conn->err == HTTP_PARTIAL
1096 		    || HTTP_ERROR(conn->err))
1097 			break;
1098 
1099 		/* all other cases: we got a redirect */
1100 		e = conn->err;
1101 		need_auth = 0;
1102 		fetch_close(conn);
1103 		conn = NULL;
1104 		if (!new) {
1105 			DEBUG(fprintf(stderr, "redirect with no new location\n"));
1106 			break;
1107 		}
1108 		if (url != URL)
1109 			fetchFreeURL(url);
1110 		url = new;
1111 	} while (++i < n);
1112 
1113 	/* we failed, or ran out of retries */
1114 	if (conn == NULL) {
1115 		http_seterr(e);
1116 		goto ouch;
1117 	}
1118 
1119 	DEBUG(fprintf(stderr, "offset %lld, length %lld,"
1120 		  " size %lld, clength %lld\n",
1121 		  (long long)offset, (long long)length,
1122 		  (long long)size, (long long)clength));
1123 
1124 	if (conn->err == HTTP_NOT_MODIFIED) {
1125 		http_seterr(HTTP_NOT_MODIFIED);
1126 		return (NULL);
1127 	}
1128 
1129 	/* check for inconsistencies */
1130 	if (clength != -1 && length != -1 && clength != length) {
1131 		http_seterr(HTTP_PROTOCOL_ERROR);
1132 		goto ouch;
1133 	}
1134 	if (clength == -1)
1135 		clength = length;
1136 	if (clength != -1)
1137 		length = offset + clength;
1138 	if (length != -1 && size != -1 && length != size) {
1139 		http_seterr(HTTP_PROTOCOL_ERROR);
1140 		goto ouch;
1141 	}
1142 	if (size == -1)
1143 		size = length;
1144 
1145 	/* fill in stats */
1146 	if (us) {
1147 		us->size = size;
1148 		us->atime = us->mtime = mtime;
1149 	}
1150 
1151 	/* too far? */
1152 	if (URL->offset > 0 && offset > URL->offset) {
1153 		http_seterr(HTTP_PROTOCOL_ERROR);
1154 		goto ouch;
1155 	}
1156 
1157 	/* report back real offset and size */
1158 	URL->offset = offset;
1159 	URL->length = clength;
1160 
1161 	/* wrap it up in a FILE */
1162 	if ((f = http_funopen(conn, chunked)) == NULL) {
1163 		fetch_syserr();
1164 		goto ouch;
1165 	}
1166 
1167 	if (url != URL)
1168 		fetchFreeURL(url);
1169 	if (purl)
1170 		fetchFreeURL(purl);
1171 
1172 	if (HTTP_ERROR(conn->err)) {
1173 		http_print_html(stderr, f);
1174 		fclose(f);
1175 		f = NULL;
1176 	}
1177 
1178 	return (f);
1179 
1180 ouch:
1181 	if (url != URL)
1182 		fetchFreeURL(url);
1183 	if (purl)
1184 		fetchFreeURL(purl);
1185 	if (conn != NULL)
1186 		fetch_close(conn);
1187 	return (NULL);
1188 }
1189 
1190 
1191 /*****************************************************************************
1192  * Entry points
1193  */
1194 
1195 /*
1196  * Retrieve and stat a file by HTTP
1197  */
1198 FILE *
1199 fetchXGetHTTP(struct url *URL, struct url_stat *us, const char *flags)
1200 {
1201 	return (http_request(URL, "GET", us, http_get_proxy(URL, flags), flags));
1202 }
1203 
1204 /*
1205  * Retrieve a file by HTTP
1206  */
1207 FILE *
1208 fetchGetHTTP(struct url *URL, const char *flags)
1209 {
1210 	return (fetchXGetHTTP(URL, NULL, flags));
1211 }
1212 
1213 /*
1214  * Store a file by HTTP
1215  */
1216 FILE *
1217 fetchPutHTTP(struct url *URL __unused, const char *flags __unused)
1218 {
1219 	warnx("fetchPutHTTP(): not implemented");
1220 	return (NULL);
1221 }
1222 
1223 /*
1224  * Get an HTTP document's metadata
1225  */
1226 int
1227 fetchStatHTTP(struct url *URL, struct url_stat *us, const char *flags)
1228 {
1229 	FILE *f;
1230 
1231 	f = http_request(URL, "HEAD", us, http_get_proxy(URL, flags), flags);
1232 	if (f == NULL)
1233 		return (-1);
1234 	fclose(f);
1235 	return (0);
1236 }
1237 
1238 /*
1239  * List a directory
1240  */
1241 struct url_ent *
1242 fetchListHTTP(struct url *url __unused, const char *flags __unused)
1243 {
1244 	warnx("fetchListHTTP(): not implemented");
1245 	return (NULL);
1246 }
1247