xref: /netbsd/external/bsd/fetch/dist/libfetch/http.c (revision 75975423)
1 /*	$NetBSD: http.c,v 1.4 2020/06/01 00:55:24 kamil Exp $	*/
2 /*-
3  * Copyright (c) 2000-2004 Dag-Erling Co�dan Sm�rgrav
4  * Copyright (c) 2003 Thomas Klausner <wiz@NetBSD.org>
5  * Copyright (c) 2008, 2009 Joerg Sonnenberger <joerg@NetBSD.org>
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer
13  *    in this position and unchanged.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. The name of the author may not be used to endorse or promote products
18  *    derived from this software without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
21  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
22  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
23  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
24  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
25  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
29  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30  *
31  * $FreeBSD: http.c,v 1.83 2008/02/06 11:39:55 des Exp $
32  */
33 
34 /*
35  * The following copyright applies to the base64 code:
36  *
37  *-
38  * Copyright 1997 Massachusetts Institute of Technology
39  *
40  * Permission to use, copy, modify, and distribute this software and
41  * its documentation for any purpose and without fee is hereby
42  * granted, provided that both the above copyright notice and this
43  * permission notice appear in all copies, that both the above
44  * copyright notice and this permission notice appear in all
45  * supporting documentation, and that the name of M.I.T. not be used
46  * in advertising or publicity pertaining to distribution of the
47  * software without specific, written prior permission.  M.I.T. makes
48  * no representations about the suitability of this software for any
49  * purpose.  It is provided "as is" without express or implied
50  * warranty.
51  *
52  * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''.  M.I.T. DISCLAIMS
53  * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE,
54  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
55  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT
56  * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
57  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
58  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
59  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
60  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
61  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
62  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63  * SUCH DAMAGE.
64  */
65 
66 #if defined(__linux__) || defined(__MINT__)
67 /* Keep this down to Linux or MiNT, it can create surprises elsewhere. */
68 #define _GNU_SOURCE
69 #endif
70 
71 #ifndef _REENTRANT
72 /* Needed for gmtime_r on Interix */
73 #define _REENTRANT
74 #endif
75 
76 #if HAVE_CONFIG_H
77 #include "config.h"
78 #endif
79 #ifndef NETBSD
80 #include <nbcompat.h>
81 #endif
82 
83 #include <sys/types.h>
84 #include <sys/socket.h>
85 
86 #include <ctype.h>
87 #include <errno.h>
88 #include <locale.h>
89 #include <stdarg.h>
90 #ifndef NETBSD
91 #include <nbcompat/stdio.h>
92 #else
93 #include <stdio.h>
94 #endif
95 #include <stdlib.h>
96 #include <string.h>
97 #include <time.h>
98 #include <unistd.h>
99 
100 #include <netinet/in.h>
101 #include <netinet/tcp.h>
102 
103 #ifndef NETBSD
104 #include <nbcompat/netdb.h>
105 #else
106 #include <netdb.h>
107 #endif
108 
109 #include <arpa/inet.h>
110 
111 #include "fetch.h"
112 #include "common.h"
113 #include "httperr.h"
114 
115 /* Maximum number of redirects to follow */
116 #define MAX_REDIRECT 5
117 
118 /* Symbolic names for reply codes we care about */
119 #define HTTP_OK			200
120 #define HTTP_PARTIAL		206
121 #define HTTP_MOVED_PERM		301
122 #define HTTP_MOVED_TEMP		302
123 #define HTTP_SEE_OTHER		303
124 #define HTTP_NOT_MODIFIED	304
125 #define HTTP_TEMP_REDIRECT	307
126 #define HTTP_NEED_AUTH		401
127 #define HTTP_NEED_PROXY_AUTH	407
128 #define HTTP_BAD_RANGE		416
129 #define HTTP_PROTOCOL_ERROR	999
130 
131 #define HTTP_REDIRECT(xyz) ((xyz) == HTTP_MOVED_PERM \
132 			    || (xyz) == HTTP_MOVED_TEMP \
133 			    || (xyz) == HTTP_TEMP_REDIRECT \
134 			    || (xyz) == HTTP_SEE_OTHER)
135 
136 #define HTTP_ERROR(xyz) ((xyz) > 400 && (xyz) < 599)
137 
138 
139 /*****************************************************************************
140  * I/O functions for decoding chunked streams
141  */
142 
143 struct httpio
144 {
145 	conn_t		*conn;		/* connection */
146 	int		 chunked;	/* chunked mode */
147 	int		 keep_alive;	/* keep-alive mode */
148 	char		*buf;		/* chunk buffer */
149 	size_t		 bufsize;	/* size of chunk buffer */
150 	ssize_t		 buflen;	/* amount of data currently in buffer */
151 	size_t		 bufpos;	/* current read offset in buffer */
152 	int		 eof;		/* end-of-file flag */
153 	int		 error;		/* error flag */
154 	size_t		 chunksize;	/* remaining size of current chunk */
155 	off_t		 contentlength;	/* remaining size of the content */
156 };
157 
158 /*
159  * Get next chunk header
160  */
161 static ssize_t
http_new_chunk(struct httpio * io)162 http_new_chunk(struct httpio *io)
163 {
164 	char *p;
165 
166 	if (fetch_getln(io->conn) == -1)
167 		return (-1);
168 
169 	if (io->conn->buflen < 2 || !isxdigit((unsigned char)*io->conn->buf))
170 		return (-1);
171 
172 	for (p = io->conn->buf; *p && !isspace((unsigned char)*p); ++p) {
173 		if (*p == ';')
174 			break;
175 		if (!isxdigit((unsigned char)*p))
176 			return (-1);
177 		if (isdigit((unsigned char)*p)) {
178 			io->chunksize = io->chunksize * 16 +
179 			    *p - '0';
180 		} else {
181 			io->chunksize = io->chunksize * 16 +
182 			    10 + tolower((unsigned char)*p) - 'a';
183 		}
184 	}
185 
186 	return (io->chunksize);
187 }
188 
189 /*
190  * Grow the input buffer to at least len bytes
191  */
192 static int
http_growbuf(struct httpio * io,size_t len)193 http_growbuf(struct httpio *io, size_t len)
194 {
195 	char *tmp;
196 
197 	if (io->bufsize >= len)
198 		return (0);
199 
200 	if ((tmp = realloc(io->buf, len)) == NULL)
201 		return (-1);
202 	io->buf = tmp;
203 	io->bufsize = len;
204 	return (0);
205 }
206 
207 /*
208  * Fill the input buffer, do chunk decoding on the fly
209  */
210 static ssize_t
http_fillbuf(struct httpio * io,size_t len)211 http_fillbuf(struct httpio *io, size_t len)
212 {
213 	if (io->error)
214 		return (-1);
215 	if (io->eof)
216 		return (0);
217 
218 	if (io->contentlength >= 0 && (off_t)len > io->contentlength)
219 		len = io->contentlength;
220 
221 	if (io->chunked == 0) {
222 		if (http_growbuf(io, len) == -1)
223 			return (-1);
224 		if ((io->buflen = fetch_read(io->conn, io->buf, len)) == -1) {
225 			io->error = 1;
226 			return (-1);
227 		}
228 		if (io->contentlength)
229 			io->contentlength -= io->buflen;
230 		io->bufpos = 0;
231 		return (io->buflen);
232 	}
233 
234 	if (io->chunksize == 0) {
235 		switch (http_new_chunk(io)) {
236 		case -1:
237 			io->error = 1;
238 			return (-1);
239 		case 0:
240 			io->eof = 1;
241 			if (fetch_getln(io->conn) == -1)
242 				return (-1);
243 			return (0);
244 		}
245 	}
246 
247 	if (len > io->chunksize)
248 		len = io->chunksize;
249 	if (http_growbuf(io, len) == -1)
250 		return (-1);
251 	if ((io->buflen = fetch_read(io->conn, io->buf, len)) == -1) {
252 		io->error = 1;
253 		return (-1);
254 	}
255 	io->chunksize -= io->buflen;
256 	if (io->contentlength >= 0)
257 		io->contentlength -= io->buflen;
258 
259 	if (io->chunksize == 0) {
260 		char endl[2];
261 		ssize_t len2;
262 
263 		len2 = fetch_read(io->conn, endl, 2);
264 		if (len2 == 1 && fetch_read(io->conn, endl + 1, 1) != 1)
265 			return (-1);
266 		if (len2 == -1 || endl[0] != '\r' || endl[1] != '\n')
267 			return (-1);
268 	}
269 
270 	io->bufpos = 0;
271 
272 	return (io->buflen);
273 }
274 
275 /*
276  * Read function
277  */
278 static ssize_t
http_readfn(void * v,void * buf,size_t len)279 http_readfn(void *v, void *buf, size_t len)
280 {
281 	struct httpio *io = (struct httpio *)v;
282 	size_t l, pos;
283 
284 	if (io->error)
285 		return (-1);
286 	if (io->eof)
287 		return (0);
288 
289 	for (pos = 0; len > 0; pos += l, len -= l) {
290 		/* empty buffer */
291 		if (!io->buf || (ssize_t)io->bufpos == io->buflen)
292 			if (http_fillbuf(io, len) < 1)
293 				break;
294 		l = io->buflen - io->bufpos;
295 		if (len < l)
296 			l = len;
297 		memcpy((char *)buf + pos, io->buf + io->bufpos, l);
298 		io->bufpos += l;
299 	}
300 
301 	if (!pos && io->error)
302 		return (-1);
303 	return (pos);
304 }
305 
306 /*
307  * Write function
308  */
309 static ssize_t
http_writefn(void * v,const void * buf,size_t len)310 http_writefn(void *v, const void *buf, size_t len)
311 {
312 	struct httpio *io = (struct httpio *)v;
313 
314 	return (fetch_write(io->conn, buf, len));
315 }
316 
317 /*
318  * Close function
319  */
320 static void
http_closefn(void * v)321 http_closefn(void *v)
322 {
323 	struct httpio *io = (struct httpio *)v;
324 
325 	if (io->keep_alive) {
326 		int val;
327 
328 		val = 0;
329 		setsockopt(io->conn->sd, IPPROTO_TCP, TCP_NODELAY, &val,
330 			   (socklen_t)sizeof(val));
331 			  fetch_cache_put(io->conn, fetch_close);
332 #ifdef TCP_NOPUSH
333 		val = 1;
334 		setsockopt(io->conn->sd, IPPROTO_TCP, TCP_NOPUSH, &val,
335 		    sizeof(val));
336 #endif
337 	} else {
338 		fetch_close(io->conn);
339 	}
340 
341 	free(io->buf);
342 	free(io);
343 }
344 
345 /*
346  * Wrap a file descriptor up
347  */
348 static fetchIO *
http_funopen(conn_t * conn,int chunked,int keep_alive,off_t clength)349 http_funopen(conn_t *conn, int chunked, int keep_alive, off_t clength)
350 {
351 	struct httpio *io;
352 	fetchIO *f;
353 
354 	if ((io = calloc(1, sizeof(*io))) == NULL) {
355 		fetch_syserr();
356 		return (NULL);
357 	}
358 	io->conn = conn;
359 	io->chunked = chunked;
360 	io->contentlength = clength;
361 	io->keep_alive = keep_alive;
362 	f = fetchIO_unopen(io, http_readfn, http_writefn, http_closefn);
363 	if (f == NULL) {
364 		fetch_syserr();
365 		free(io);
366 		return (NULL);
367 	}
368 	return (f);
369 }
370 
371 
372 /*****************************************************************************
373  * Helper functions for talking to the server and parsing its replies
374  */
375 
376 /* Header types */
377 typedef enum {
378 	hdr_syserror = -2,
379 	hdr_error = -1,
380 	hdr_end = 0,
381 	hdr_unknown = 1,
382 	hdr_connection,
383 	hdr_content_length,
384 	hdr_content_range,
385 	hdr_last_modified,
386 	hdr_location,
387 	hdr_transfer_encoding,
388 	hdr_www_authenticate
389 } hdr_t;
390 
391 /* Names of interesting headers */
392 static struct {
393 	hdr_t		 num;
394 	const char	*name;
395 } hdr_names[] = {
396 	{ hdr_connection,		"Connection" },
397 	{ hdr_content_length,		"Content-Length" },
398 	{ hdr_content_range,		"Content-Range" },
399 	{ hdr_last_modified,		"Last-Modified" },
400 	{ hdr_location,			"Location" },
401 	{ hdr_transfer_encoding,	"Transfer-Encoding" },
402 	{ hdr_www_authenticate,		"WWW-Authenticate" },
403 	{ hdr_unknown,			NULL },
404 };
405 
406 /*
407  * Send a formatted line; optionally echo to terminal
408  */
409 __printflike(2, 3)
410 static int
http_cmd(conn_t * conn,const char * fmt,...)411 http_cmd(conn_t *conn, const char *fmt, ...)
412 {
413 	va_list ap;
414 	size_t len;
415 	char *msg;
416 	ssize_t r;
417 
418 	va_start(ap, fmt);
419 	len = vasprintf(&msg, fmt, ap);
420 	va_end(ap);
421 
422 	if (msg == NULL) {
423 		errno = ENOMEM;
424 		fetch_syserr();
425 		return (-1);
426 	}
427 
428 	r = fetch_write(conn, msg, len);
429 	free(msg);
430 
431 	if (r == -1) {
432 		fetch_syserr();
433 		return (-1);
434 	}
435 
436 	return (0);
437 }
438 
439 /*
440  * Get and parse status line
441  */
442 static int
http_get_reply(conn_t * conn)443 http_get_reply(conn_t *conn)
444 {
445 	char *p;
446 
447 	if (fetch_getln(conn) == -1)
448 		return (-1);
449 	/*
450 	 * A valid status line looks like "HTTP/m.n xyz reason" where m
451 	 * and n are the major and minor protocol version numbers and xyz
452 	 * is the reply code.
453 	 * Unfortunately, there are servers out there (NCSA 1.5.1, to name
454 	 * just one) that do not send a version number, so we can't rely
455 	 * on finding one, but if we do, insist on it being 1.0 or 1.1.
456 	 * We don't care about the reason phrase.
457 	 */
458 	if (strncmp(conn->buf, "HTTP", 4) != 0)
459 		return (HTTP_PROTOCOL_ERROR);
460 	p = conn->buf + 4;
461 	if (*p == '/') {
462 		if (p[1] != '1' || p[2] != '.' || (p[3] != '0' && p[3] != '1'))
463 			return (HTTP_PROTOCOL_ERROR);
464 		p += 4;
465 	}
466 	if (*p != ' ' ||
467 	    !isdigit((unsigned char)p[1]) ||
468 	    !isdigit((unsigned char)p[2]) ||
469 	    !isdigit((unsigned char)p[3]))
470 		return (HTTP_PROTOCOL_ERROR);
471 
472 	conn->err = (p[1] - '0') * 100 + (p[2] - '0') * 10 + (p[3] - '0');
473 	return (conn->err);
474 }
475 
476 /*
477  * Check a header; if the type matches the given string, return a pointer
478  * to the beginning of the value.
479  */
480 static const char *
http_match(const char * str,const char * hdr)481 http_match(const char *str, const char *hdr)
482 {
483 	while (*str && *hdr &&
484 	    tolower((unsigned char)*str++) == tolower((unsigned char)*hdr++))
485 		/* nothing */;
486 	if (*str || *hdr != ':')
487 		return (NULL);
488 	while (*hdr && isspace((unsigned char)*++hdr))
489 		/* nothing */;
490 	return (hdr);
491 }
492 
493 /*
494  * Get the next header and return the appropriate symbolic code.
495  */
496 static hdr_t
http_next_header(conn_t * conn,const char ** p)497 http_next_header(conn_t *conn, const char **p)
498 {
499 	int i;
500 
501 	if (fetch_getln(conn) == -1)
502 		return (hdr_syserror);
503 	while (conn->buflen && isspace((unsigned char)conn->buf[conn->buflen - 1]))
504 		conn->buflen--;
505 	conn->buf[conn->buflen] = '\0';
506 	if (conn->buflen == 0)
507 		return (hdr_end);
508 	/*
509 	 * We could check for malformed headers but we don't really care.
510 	 * A valid header starts with a token immediately followed by a
511 	 * colon; a token is any sequence of non-control, non-whitespace
512 	 * characters except "()<>@,;:\\\"{}".
513 	 */
514 	for (i = 0; hdr_names[i].num != hdr_unknown; i++)
515 		if ((*p = http_match(hdr_names[i].name, conn->buf)) != NULL)
516 			return (hdr_names[i].num);
517 	return (hdr_unknown);
518 }
519 
520 /*
521  * Parse a last-modified header
522  */
523 static int
http_parse_mtime(const char * p,time_t * mtime)524 http_parse_mtime(const char *p, time_t *mtime)
525 {
526 	char locale[64], *r;
527 	struct tm tm;
528 
529 	strncpy(locale, setlocale(LC_TIME, NULL), sizeof(locale));
530 	setlocale(LC_TIME, "C");
531 	r = strptime(p, "%a, %d %b %Y %H:%M:%S GMT", &tm);
532 	/* XXX should add support for date-2 and date-3 */
533 	setlocale(LC_TIME, locale);
534 	if (r == NULL)
535 		return (-1);
536 	*mtime = timegm(&tm);
537 	return (0);
538 }
539 
540 /*
541  * Parse a content-length header
542  */
543 static int
http_parse_length(const char * p,off_t * length)544 http_parse_length(const char *p, off_t *length)
545 {
546 	off_t len;
547 
548 	for (len = 0; *p && isdigit((unsigned char)*p); ++p)
549 		len = len * 10 + (*p - '0');
550 	if (*p)
551 		return (-1);
552 	*length = len;
553 	return (0);
554 }
555 
556 /*
557  * Parse a content-range header
558  */
559 static int
http_parse_range(const char * p,off_t * offset,off_t * length,off_t * size)560 http_parse_range(const char *p, off_t *offset, off_t *length, off_t *size)
561 {
562 	off_t first, last, len;
563 
564 	if (strncasecmp(p, "bytes ", 6) != 0)
565 		return (-1);
566 	p += 6;
567 	if (*p == '*') {
568 		first = last = -1;
569 		++p;
570 	} else {
571 		for (first = 0; *p && isdigit((unsigned char)*p); ++p)
572 			first = first * 10 + *p - '0';
573 		if (*p != '-')
574 			return (-1);
575 		for (last = 0, ++p; *p && isdigit((unsigned char)*p); ++p)
576 			last = last * 10 + *p - '0';
577 	}
578 	if (first > last || *p != '/')
579 		return (-1);
580 	for (len = 0, ++p; *p && isdigit((unsigned char)*p); ++p)
581 		len = len * 10 + *p - '0';
582 	if (*p || len < last - first + 1)
583 		return (-1);
584 	if (first == -1)
585 		*length = 0;
586 	else
587 		*length = last - first + 1;
588 	*offset = first;
589 	*size = len;
590 	return (0);
591 }
592 
593 
594 /*****************************************************************************
595  * Helper functions for authorization
596  */
597 
598 /*
599  * Base64 encoding
600  */
601 static char *
http_base64(const char * src)602 http_base64(const char *src)
603 {
604 	static const char base64[] =
605 	    "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
606 	    "abcdefghijklmnopqrstuvwxyz"
607 	    "0123456789+/";
608 	char *str, *dst;
609 	size_t l;
610 	unsigned int t, r;
611 
612 	l = strlen(src);
613 	if ((str = malloc(((l + 2) / 3) * 4 + 1)) == NULL)
614 		return (NULL);
615 	dst = str;
616 	r = 0;
617 
618 	while (l >= 3) {
619 		t = (src[0] << 16) | (src[1] << 8) | src[2];
620 		dst[0] = base64[(t >> 18) & 0x3f];
621 		dst[1] = base64[(t >> 12) & 0x3f];
622 		dst[2] = base64[(t >> 6) & 0x3f];
623 		dst[3] = base64[(t >> 0) & 0x3f];
624 		src += 3; l -= 3;
625 		dst += 4; r += 4;
626 	}
627 
628 	switch (l) {
629 	case 2:
630 		t = (src[0] << 16) | (src[1] << 8);
631 		dst[0] = base64[(t >> 18) & 0x3f];
632 		dst[1] = base64[(t >> 12) & 0x3f];
633 		dst[2] = base64[(t >> 6) & 0x3f];
634 		dst[3] = '=';
635 		dst += 4;
636 		r += 4;
637 		break;
638 	case 1:
639 		t = src[0] << 16;
640 		dst[0] = base64[(t >> 18) & 0x3f];
641 		dst[1] = base64[(t >> 12) & 0x3f];
642 		dst[2] = dst[3] = '=';
643 		dst += 4;
644 		r += 4;
645 		break;
646 	case 0:
647 		break;
648 	}
649 
650 	*dst = 0;
651 	return (str);
652 }
653 
654 /*
655  * Encode username and password
656  */
657 static int
http_basic_auth(conn_t * conn,const char * hdr,const char * usr,const char * pwd)658 http_basic_auth(conn_t *conn, const char *hdr, const char *usr, const char *pwd)
659 {
660 	char *upw, *auth;
661 	int r;
662 
663 	if (asprintf(&upw, "%s:%s", usr, pwd) == -1)
664 		return (-1);
665 	auth = http_base64(upw);
666 	free(upw);
667 	if (auth == NULL)
668 		return (-1);
669 	r = http_cmd(conn, "%s: Basic %s\r\n", hdr, auth);
670 	free(auth);
671 	return (r);
672 }
673 
674 /*
675  * Send an authorization header
676  */
677 static int
http_authorize(conn_t * conn,const char * hdr,const char * p)678 http_authorize(conn_t *conn, const char *hdr, const char *p)
679 {
680 	/* basic authorization */
681 	if (strncasecmp(p, "basic:", 6) == 0) {
682 		char *user, *pwd, *str;
683 		int r;
684 
685 		/* skip realm */
686 		for (p += 6; *p && *p != ':'; ++p)
687 			/* nothing */ ;
688 		if (!*p || strchr(++p, ':') == NULL)
689 			return (-1);
690 		if ((str = strdup(p)) == NULL)
691 			return (-1); /* XXX */
692 		user = str;
693 		pwd = strchr(str, ':');
694 		*pwd++ = '\0';
695 		r = http_basic_auth(conn, hdr, user, pwd);
696 		free(str);
697 		return (r);
698 	}
699 	return (-1);
700 }
701 
702 
703 /*****************************************************************************
704  * Helper functions for connecting to a server or proxy
705  */
706 
707 /*
708  * Connect to the correct HTTP server or proxy.
709  */
710 static conn_t *
http_connect(struct url * URL,struct url * purl,const char * flags,int * cached)711 http_connect(struct url *URL, struct url *purl, const char *flags, int *cached)
712 {
713 	conn_t *conn;
714 	int af, verbose;
715 #ifdef TCP_NOPUSH
716 	int val;
717 #endif
718 
719 	*cached = 1;
720 
721 #ifdef INET6
722 	af = AF_UNSPEC;
723 #else
724 	af = AF_INET;
725 #endif
726 
727 	verbose = CHECK_FLAG('v');
728 	if (CHECK_FLAG('4'))
729 		af = AF_INET;
730 #ifdef INET6
731 	else if (CHECK_FLAG('6'))
732 		af = AF_INET6;
733 #endif
734 
735 	if (purl && strcasecmp(URL->scheme, SCHEME_HTTPS) != 0) {
736 		URL = purl;
737 	} else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) {
738 		/* can't talk http to an ftp server */
739 		/* XXX should set an error code */
740 		return (NULL);
741 	}
742 
743 	if ((conn = fetch_cache_get(URL, af)) != NULL) {
744 		*cached = 1;
745 		return (conn);
746 	}
747 
748 	if ((conn = fetch_connect(URL, af, verbose)) == NULL)
749 		/* fetch_connect() has already set an error code */
750 		return (NULL);
751 	if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0 &&
752 	    fetch_ssl(conn, verbose) == -1) {
753 		fetch_close(conn);
754 		/* grrr */
755 #ifdef EAUTH
756 		errno = EAUTH;
757 #else
758 		errno = EPERM;
759 #endif
760 		fetch_syserr();
761 		return (NULL);
762 	}
763 
764 #ifdef TCP_NOPUSH
765 	val = 1;
766 	setsockopt(conn->sd, IPPROTO_TCP, TCP_NOPUSH, &val, sizeof(val));
767 #endif
768 
769 	return (conn);
770 }
771 
772 static struct url *
http_get_proxy(struct url * url,const char * flags)773 http_get_proxy(struct url * url, const char *flags)
774 {
775 	struct url *purl;
776 	char *p;
777 
778 	if (flags != NULL && strchr(flags, 'd') != NULL)
779 		return (NULL);
780 	if (fetch_no_proxy_match(url->host))
781 		return (NULL);
782 	if (((p = getenv("HTTP_PROXY")) || (p = getenv("http_proxy"))) &&
783 	    *p && (purl = fetchParseURL(p))) {
784 		if (!*purl->scheme)
785 			strcpy(purl->scheme, SCHEME_HTTP);
786 		if (!purl->port)
787 			purl->port = fetch_default_proxy_port(purl->scheme);
788 		if (strcasecmp(purl->scheme, SCHEME_HTTP) == 0)
789 			return (purl);
790 		fetchFreeURL(purl);
791 	}
792 	return (NULL);
793 }
794 
795 static void
set_if_modified_since(conn_t * conn,time_t last_modified)796 set_if_modified_since(conn_t *conn, time_t last_modified)
797 {
798 	static const char weekdays[] = "SunMonTueWedThuFriSat";
799 	static const char months[] = "JanFebMarAprMayJunJulAugSepOctNovDec";
800 	struct tm tm;
801 	char buf[80];
802 	gmtime_r(&last_modified, &tm);
803 	snprintf(buf, sizeof(buf), "%.3s, %02d %.3s %4d %02d:%02d:%02d GMT",
804 	    weekdays + tm.tm_wday * 3, tm.tm_mday, months + tm.tm_mon * 3,
805 	    tm.tm_year + 1900, tm.tm_hour, tm.tm_min, tm.tm_sec);
806 	http_cmd(conn, "If-Modified-Since: %s\r\n", buf);
807 }
808 
809 
810 /*****************************************************************************
811  * Core
812  */
813 
814 /*
815  * Send a request and process the reply
816  *
817  * XXX This function is way too long, the do..while loop should be split
818  * XXX off into a separate function.
819  */
820 fetchIO *
http_request(struct url * URL,const char * op,struct url_stat * us,struct url * purl,const char * flags)821 http_request(struct url *URL, const char *op, struct url_stat *us,
822     struct url *purl, const char *flags)
823 {
824 	conn_t *conn;
825 	struct url *url, *new;
826 	int chunked, direct, if_modified_since, need_auth, noredirect;
827 	int keep_alive, verbose, cached;
828 	int e, i, n, val;
829 	off_t offset, clength, length, size;
830 	time_t mtime;
831 	const char *p;
832 	fetchIO *f;
833 	hdr_t h;
834 	char hbuf[URL_HOSTLEN + 7], *host;
835 
836 	direct = CHECK_FLAG('d');
837 	noredirect = CHECK_FLAG('A');
838 	verbose = CHECK_FLAG('v');
839 	if_modified_since = CHECK_FLAG('i');
840 	keep_alive = 0;
841 
842 	if (direct && purl) {
843 		fetchFreeURL(purl);
844 		purl = NULL;
845 	}
846 
847 	/* try the provided URL first */
848 	url = URL;
849 
850 	/* if the A flag is set, we only get one try */
851 	n = noredirect ? 1 : MAX_REDIRECT;
852 	i = 0;
853 
854 	e = HTTP_PROTOCOL_ERROR;
855 	need_auth = 0;
856 	do {
857 		new = NULL;
858 		chunked = 0;
859 		offset = 0;
860 		clength = -1;
861 		length = -1;
862 		size = -1;
863 		mtime = 0;
864 
865 		/* check port */
866 		if (!url->port)
867 			url->port = fetch_default_port(url->scheme);
868 
869 		/* were we redirected to an FTP URL? */
870 		if (purl == NULL && strcmp(url->scheme, SCHEME_FTP) == 0) {
871 			if (strcmp(op, "GET") == 0)
872 				return (ftp_request(url, "RETR", NULL, us, purl, flags));
873 			else if (strcmp(op, "HEAD") == 0)
874 				return (ftp_request(url, "STAT", NULL, us, purl, flags));
875 		}
876 
877 		/* connect to server or proxy */
878 		if ((conn = http_connect(url, purl, flags, &cached)) == NULL)
879 			goto ouch;
880 
881 		host = url->host;
882 #ifdef INET6
883 		if (strchr(url->host, ':')) {
884 			snprintf(hbuf, sizeof(hbuf), "[%s]", url->host);
885 			host = hbuf;
886 		}
887 #endif
888 		if (url->port != fetch_default_port(url->scheme)) {
889 			if (host != hbuf) {
890 				strcpy(hbuf, host);
891 				host = hbuf;
892 			}
893 			snprintf(hbuf + strlen(hbuf),
894 			    sizeof(hbuf) - strlen(hbuf), ":%d", url->port);
895 		}
896 
897 		/* send request */
898 		if (verbose)
899 			fetch_info("requesting %s://%s%s",
900 			    url->scheme, host, url->doc);
901 		if (purl) {
902 			http_cmd(conn, "%s %s://%s%s HTTP/1.1\r\n",
903 			    op, url->scheme, host, url->doc);
904 		} else {
905 			http_cmd(conn, "%s %s HTTP/1.1\r\n",
906 			    op, url->doc);
907 		}
908 
909 		if (if_modified_since && url->last_modified > 0)
910 			set_if_modified_since(conn, url->last_modified);
911 
912 		/* virtual host */
913 		http_cmd(conn, "Host: %s\r\n", host);
914 
915 		/* proxy authorization */
916 		if (purl) {
917 			if (*purl->user || *purl->pwd)
918 				http_basic_auth(conn, "Proxy-Authorization",
919 				    purl->user, purl->pwd);
920 			else if ((p = getenv("HTTP_PROXY_AUTH")) != NULL && *p != '\0')
921 				http_authorize(conn, "Proxy-Authorization", p);
922 		}
923 
924 		/* server authorization */
925 		if (need_auth || *url->user || *url->pwd) {
926 			if (*url->user || *url->pwd)
927 				http_basic_auth(conn, "Authorization", url->user, url->pwd);
928 			else if ((p = getenv("HTTP_AUTH")) != NULL && *p != '\0')
929 				http_authorize(conn, "Authorization", p);
930 			else if (fetchAuthMethod && fetchAuthMethod(url) == 0) {
931 				http_basic_auth(conn, "Authorization", url->user, url->pwd);
932 			} else {
933 				http_seterr(HTTP_NEED_AUTH);
934 				goto ouch;
935 			}
936 		}
937 
938 		/* other headers */
939 		if ((p = getenv("HTTP_REFERER")) != NULL && *p != '\0') {
940 			if (strcasecmp(p, "auto") == 0)
941 				http_cmd(conn, "Referer: %s://%s%s\r\n",
942 				    url->scheme, host, url->doc);
943 			else
944 				http_cmd(conn, "Referer: %s\r\n", p);
945 		}
946 		if ((p = getenv("HTTP_USER_AGENT")) != NULL && *p != '\0')
947 			http_cmd(conn, "User-Agent: %s\r\n", p);
948 		else
949 			http_cmd(conn, "User-Agent: %s\r\n", _LIBFETCH_VER);
950 		if (url->offset > 0)
951 			http_cmd(conn, "Range: bytes=%lld-\r\n", (long long)url->offset);
952 		http_cmd(conn, "\r\n");
953 
954 		/*
955 		 * Force the queued request to be dispatched.  Normally, one
956 		 * would do this with shutdown(2) but squid proxies can be
957 		 * configured to disallow such half-closed connections.  To
958 		 * be compatible with such configurations, fiddle with socket
959 		 * options to force the pending data to be written.
960 		 */
961 #ifdef TCP_NOPUSH
962 		val = 0;
963 		setsockopt(conn->sd, IPPROTO_TCP, TCP_NOPUSH, &val,
964 			   sizeof(val));
965 #endif
966 		val = 1;
967 		setsockopt(conn->sd, IPPROTO_TCP, TCP_NODELAY, &val,
968 		    (socklen_t)sizeof(val));
969 
970 		/* get reply */
971 		switch (http_get_reply(conn)) {
972 		case HTTP_OK:
973 		case HTTP_PARTIAL:
974 		case HTTP_NOT_MODIFIED:
975 			/* fine */
976 			break;
977 		case HTTP_MOVED_PERM:
978 		case HTTP_MOVED_TEMP:
979 		case HTTP_SEE_OTHER:
980 			/*
981 			 * Not so fine, but we still have to read the
982 			 * headers to get the new location.
983 			 */
984 			break;
985 		case HTTP_NEED_AUTH:
986 			if (need_auth) {
987 				/*
988 				 * We already sent out authorization code,
989 				 * so there's nothing more we can do.
990 				 */
991 				http_seterr(conn->err);
992 				goto ouch;
993 			}
994 			/* try again, but send the password this time */
995 			if (verbose)
996 				fetch_info("server requires authorization");
997 			break;
998 		case HTTP_NEED_PROXY_AUTH:
999 			/*
1000 			 * If we're talking to a proxy, we already sent
1001 			 * our proxy authorization code, so there's
1002 			 * nothing more we can do.
1003 			 */
1004 			http_seterr(conn->err);
1005 			goto ouch;
1006 		case HTTP_BAD_RANGE:
1007 			/*
1008 			 * This can happen if we ask for 0 bytes because
1009 			 * we already have the whole file.  Consider this
1010 			 * a success for now, and check sizes later.
1011 			 */
1012 			break;
1013 		case HTTP_PROTOCOL_ERROR:
1014 			/* fall through */
1015 		case -1:
1016 			--i;
1017 			if (cached)
1018 				continue;
1019 			fetch_syserr();
1020 			goto ouch;
1021 		default:
1022 			http_seterr(conn->err);
1023 			if (!verbose)
1024 				goto ouch;
1025 			/* fall through so we can get the full error message */
1026 		}
1027 
1028 		/* get headers */
1029 		do {
1030 			switch ((h = http_next_header(conn, &p))) {
1031 			case hdr_syserror:
1032 				fetch_syserr();
1033 				goto ouch;
1034 			case hdr_error:
1035 				http_seterr(HTTP_PROTOCOL_ERROR);
1036 				goto ouch;
1037 			case hdr_connection:
1038 				/* XXX too weak? */
1039 				keep_alive = (strcasecmp(p, "keep-alive") == 0);
1040 				break;
1041 			case hdr_content_length:
1042 				http_parse_length(p, &clength);
1043 				break;
1044 			case hdr_content_range:
1045 				http_parse_range(p, &offset, &length, &size);
1046 				break;
1047 			case hdr_last_modified:
1048 				http_parse_mtime(p, &mtime);
1049 				break;
1050 			case hdr_location:
1051 				if (!HTTP_REDIRECT(conn->err))
1052 					break;
1053 				if (new)
1054 					free(new);
1055 				if (verbose)
1056 					fetch_info("%d redirect to %s", conn->err, p);
1057 				if (*p == '/')
1058 					/* absolute path */
1059 					new = fetchMakeURL(url->scheme, url->host, url->port, p,
1060 					    url->user, url->pwd);
1061 				else
1062 					new = fetchParseURL(p);
1063 				if (new == NULL) {
1064 					/* XXX should set an error code */
1065 					goto ouch;
1066 				}
1067 				if (!*new->user && !*new->pwd) {
1068 					strcpy(new->user, url->user);
1069 					strcpy(new->pwd, url->pwd);
1070 				}
1071 				new->offset = url->offset;
1072 				new->length = url->length;
1073 				break;
1074 			case hdr_transfer_encoding:
1075 				/* XXX weak test*/
1076 				chunked = (strcasecmp(p, "chunked") == 0);
1077 				break;
1078 			case hdr_www_authenticate:
1079 				if (conn->err != HTTP_NEED_AUTH)
1080 					break;
1081 				/* if we were smarter, we'd check the method and realm */
1082 				break;
1083 			case hdr_end:
1084 				/* fall through */
1085 			case hdr_unknown:
1086 				/* ignore */
1087 				break;
1088 			}
1089 		} while (h > hdr_end);
1090 
1091 		/* we need to provide authentication */
1092 		if (conn->err == HTTP_NEED_AUTH) {
1093 			e = conn->err;
1094 			need_auth = 1;
1095 			fetch_close(conn);
1096 			conn = NULL;
1097 			continue;
1098 		}
1099 
1100 		/* requested range not satisfiable */
1101 		if (conn->err == HTTP_BAD_RANGE) {
1102 			if (url->offset == size && url->length == 0) {
1103 				/* asked for 0 bytes; fake it */
1104 				offset = url->offset;
1105 				conn->err = HTTP_OK;
1106 				break;
1107 			} else {
1108 				http_seterr(conn->err);
1109 				goto ouch;
1110 			}
1111 		}
1112 
1113 		/* we have a hit or an error */
1114 		if (conn->err == HTTP_OK ||
1115 		    conn->err == HTTP_PARTIAL ||
1116 		    conn->err == HTTP_NOT_MODIFIED ||
1117 		    HTTP_ERROR(conn->err))
1118 			break;
1119 
1120 		/* all other cases: we got a redirect */
1121 		e = conn->err;
1122 		need_auth = 0;
1123 		fetch_close(conn);
1124 		conn = NULL;
1125 		if (!new)
1126 			break;
1127 		if (url != URL)
1128 			fetchFreeURL(url);
1129 		url = new;
1130 	} while (++i < n);
1131 
1132 	/* we failed, or ran out of retries */
1133 	if (conn == NULL) {
1134 		http_seterr(e);
1135 		goto ouch;
1136 	}
1137 
1138 	/* check for inconsistencies */
1139 	if (clength != -1 && length != -1 && clength != length) {
1140 		http_seterr(HTTP_PROTOCOL_ERROR);
1141 		goto ouch;
1142 	}
1143 	if (clength == -1)
1144 		clength = length;
1145 	if (clength != -1)
1146 		length = offset + clength;
1147 	if (length != -1 && size != -1 && length != size) {
1148 		http_seterr(HTTP_PROTOCOL_ERROR);
1149 		goto ouch;
1150 	}
1151 	if (size == -1)
1152 		size = length;
1153 
1154 	/* fill in stats */
1155 	if (us) {
1156 		us->size = size;
1157 		us->atime = us->mtime = mtime;
1158 	}
1159 
1160 	/* too far? */
1161 	if (URL->offset > 0 && offset > URL->offset) {
1162 		http_seterr(HTTP_PROTOCOL_ERROR);
1163 		goto ouch;
1164 	}
1165 
1166 	/* report back real offset and size */
1167 	URL->offset = offset;
1168 	URL->length = clength;
1169 
1170 	if (clength == -1 && !chunked)
1171 		keep_alive = 0;
1172 
1173 	if (conn->err == HTTP_NOT_MODIFIED) {
1174 		http_seterr(HTTP_NOT_MODIFIED);
1175 		if (keep_alive) {
1176 			fetch_cache_put(conn, fetch_close);
1177 			conn = NULL;
1178 		}
1179 		goto ouch;
1180 	}
1181 
1182 	/* wrap it up in a fetchIO */
1183 	if ((f = http_funopen(conn, chunked, keep_alive, clength)) == NULL) {
1184 		fetch_syserr();
1185 		goto ouch;
1186 	}
1187 
1188 	if (url != URL)
1189 		fetchFreeURL(url);
1190 	if (purl)
1191 		fetchFreeURL(purl);
1192 
1193 	if (HTTP_ERROR(conn->err)) {
1194 
1195 		if (keep_alive) {
1196 			char buf[512];
1197 			do {
1198 			} while (fetchIO_read(f, buf, sizeof(buf)) > 0);
1199 		}
1200 
1201 		fetchIO_close(f);
1202 		f = NULL;
1203 	}
1204 
1205 	return (f);
1206 
1207 ouch:
1208 	if (url != URL)
1209 		fetchFreeURL(url);
1210 	if (purl)
1211 		fetchFreeURL(purl);
1212 	if (conn != NULL)
1213 		fetch_close(conn);
1214 	return (NULL);
1215 }
1216 
1217 
1218 /*****************************************************************************
1219  * Entry points
1220  */
1221 
1222 /*
1223  * Retrieve and stat a file by HTTP
1224  */
1225 fetchIO *
fetchXGetHTTP(struct url * URL,struct url_stat * us,const char * flags)1226 fetchXGetHTTP(struct url *URL, struct url_stat *us, const char *flags)
1227 {
1228 	return (http_request(URL, "GET", us, http_get_proxy(URL, flags), flags));
1229 }
1230 
1231 /*
1232  * Retrieve a file by HTTP
1233  */
1234 fetchIO *
fetchGetHTTP(struct url * URL,const char * flags)1235 fetchGetHTTP(struct url *URL, const char *flags)
1236 {
1237 	return (fetchXGetHTTP(URL, NULL, flags));
1238 }
1239 
1240 /*
1241  * Store a file by HTTP
1242  */
1243 fetchIO *
1244 /*ARGSUSED*/
fetchPutHTTP(struct url * URL __unused,const char * flags __unused)1245 fetchPutHTTP(struct url *URL __unused, const char *flags __unused)
1246 {
1247 	fprintf(stderr, "fetchPutHTTP(): not implemented\n");
1248 	return (NULL);
1249 }
1250 
1251 /*
1252  * Get an HTTP document's metadata
1253  */
1254 int
fetchStatHTTP(struct url * URL,struct url_stat * us,const char * flags)1255 fetchStatHTTP(struct url *URL, struct url_stat *us, const char *flags)
1256 {
1257 	fetchIO *f;
1258 
1259 	f = http_request(URL, "HEAD", us, http_get_proxy(URL, flags), flags);
1260 	if (f == NULL)
1261 		return (-1);
1262 	fetchIO_close(f);
1263 	return (0);
1264 }
1265 
1266 enum http_states {
1267 	ST_NONE,
1268 	ST_LT,
1269 	ST_LTA,
1270 	ST_TAGA,
1271 	ST_H,
1272 	ST_R,
1273 	ST_E,
1274 	ST_F,
1275 	ST_HREF,
1276 	ST_HREFQ,
1277 	ST_TAG,
1278 	ST_TAGAX,
1279 	ST_TAGAQ
1280 };
1281 
1282 struct index_parser {
1283 	struct url_list *ue;
1284 	struct url *url;
1285 	enum http_states state;
1286 };
1287 
1288 static ssize_t
parse_index(struct index_parser * parser,const char * buf,size_t len)1289 parse_index(struct index_parser *parser, const char *buf, size_t len)
1290 {
1291 	char *end_attr, p = *buf;
1292 
1293 	switch (parser->state) {
1294 	case ST_NONE:
1295 		/* Plain text, not in markup */
1296 		if (p == '<')
1297 			parser->state = ST_LT;
1298 		return 1;
1299 	case ST_LT:
1300 		/* In tag -- "<" already found */
1301 		if (p == '>')
1302 			parser->state = ST_NONE;
1303 		else if (p == 'a' || p == 'A')
1304 			parser->state = ST_LTA;
1305 		else if (!isspace((unsigned char)p))
1306 			parser->state = ST_TAG;
1307 		return 1;
1308 	case ST_LTA:
1309 		/* In tag -- "<a" already found */
1310 		if (p == '>')
1311 			parser->state = ST_NONE;
1312 		else if (p == '"')
1313 			parser->state = ST_TAGAQ;
1314 		else if (isspace((unsigned char)p))
1315 			parser->state = ST_TAGA;
1316 		else
1317 			parser->state = ST_TAG;
1318 		return 1;
1319 	case ST_TAG:
1320 		/* In tag, but not "<a" -- disregard */
1321 		if (p == '>')
1322 			parser->state = ST_NONE;
1323 		return 1;
1324 	case ST_TAGA:
1325 		/* In a-tag -- "<a " already found */
1326 		if (p == '>')
1327 			parser->state = ST_NONE;
1328 		else if (p == '"')
1329 			parser->state = ST_TAGAQ;
1330 		else if (p == 'h' || p == 'H')
1331 			parser->state = ST_H;
1332 		else if (!isspace((unsigned char)p))
1333 			parser->state = ST_TAGAX;
1334 		return 1;
1335 	case ST_TAGAX:
1336 		/* In unknown keyword in a-tag */
1337 		if (p == '>')
1338 			parser->state = ST_NONE;
1339 		else if (p == '"')
1340 			parser->state = ST_TAGAQ;
1341 		else if (isspace((unsigned char)p))
1342 			parser->state = ST_TAGA;
1343 		return 1;
1344 	case ST_TAGAQ:
1345 		/* In a-tag, unknown argument for keys. */
1346 		if (p == '>')
1347 			parser->state = ST_NONE;
1348 		else if (p == '"')
1349 			parser->state = ST_TAGA;
1350 		return 1;
1351 	case ST_H:
1352 		/* In a-tag -- "<a h" already found */
1353 		if (p == '>')
1354 			parser->state = ST_NONE;
1355 		else if (p == '"')
1356 			parser->state = ST_TAGAQ;
1357 		else if (p == 'r' || p == 'R')
1358 			parser->state = ST_R;
1359 		else if (isspace((unsigned char)p))
1360 			parser->state = ST_TAGA;
1361 		else
1362 			parser->state = ST_TAGAX;
1363 		return 1;
1364 	case ST_R:
1365 		/* In a-tag -- "<a hr" already found */
1366 		if (p == '>')
1367 			parser->state = ST_NONE;
1368 		else if (p == '"')
1369 			parser->state = ST_TAGAQ;
1370 		else if (p == 'e' || p == 'E')
1371 			parser->state = ST_E;
1372 		else if (isspace((unsigned char)p))
1373 			parser->state = ST_TAGA;
1374 		else
1375 			parser->state = ST_TAGAX;
1376 		return 1;
1377 	case ST_E:
1378 		/* In a-tag -- "<a hre" already found */
1379 		if (p == '>')
1380 			parser->state = ST_NONE;
1381 		else if (p == '"')
1382 			parser->state = ST_TAGAQ;
1383 		else if (p == 'f' || p == 'F')
1384 			parser->state = ST_F;
1385 		else if (isspace((unsigned char)p))
1386 			parser->state = ST_TAGA;
1387 		else
1388 			parser->state = ST_TAGAX;
1389 		return 1;
1390 	case ST_F:
1391 		/* In a-tag -- "<a href" already found */
1392 		if (p == '>')
1393 			parser->state = ST_NONE;
1394 		else if (p == '"')
1395 			parser->state = ST_TAGAQ;
1396 		else if (p == '=')
1397 			parser->state = ST_HREF;
1398 		else if (!isspace((unsigned char)p))
1399 			parser->state = ST_TAGAX;
1400 		return 1;
1401 	case ST_HREF:
1402 		/* In a-tag -- "<a href=" already found */
1403 		if (p == '>')
1404 			parser->state = ST_NONE;
1405 		else if (p == '"')
1406 			parser->state = ST_HREFQ;
1407 		else if (!isspace((unsigned char)p))
1408 			parser->state = ST_TAGA;
1409 		return 1;
1410 	case ST_HREFQ:
1411 		/* In href of the a-tag */
1412 		end_attr = memchr(buf, '"', len);
1413 		if (end_attr == NULL)
1414 			return 0;
1415 		*end_attr = '\0';
1416 		parser->state = ST_TAGA;
1417 		if (fetch_add_entry(parser->ue, parser->url, buf, 1))
1418 			return -1;
1419 		return end_attr + 1 - buf;
1420 	}
1421 	/* NOTREACHED */
1422 	abort();
1423 }
1424 
1425 struct http_index_cache {
1426 	struct http_index_cache *next;
1427 	struct url *location;
1428 	struct url_list ue;
1429 };
1430 
1431 static struct http_index_cache *index_cache;
1432 
1433 /*
1434  * List a directory
1435  */
1436 int
1437 /*ARGSUSED*/
fetchListHTTP(struct url_list * ue,struct url * url,const char * pattern __unused,const char * flags)1438 fetchListHTTP(struct url_list *ue, struct url *url, const char *pattern __unused, const char *flags)
1439 {
1440 	fetchIO *f;
1441 	char buf[2 * PATH_MAX];
1442 	size_t buf_len, sum_processed;
1443 	ssize_t read_len, processed;
1444 	struct index_parser state;
1445 	struct http_index_cache *cache = NULL;
1446 	int do_cache, ret;
1447 
1448 	do_cache = CHECK_FLAG('c');
1449 
1450 	if (do_cache) {
1451 		for (cache = index_cache; cache != NULL; cache = cache->next) {
1452 			if (strcmp(cache->location->scheme, url->scheme))
1453 				continue;
1454 			if (strcmp(cache->location->user, url->user))
1455 				continue;
1456 			if (strcmp(cache->location->pwd, url->pwd))
1457 				continue;
1458 			if (strcmp(cache->location->host, url->host))
1459 				continue;
1460 			if (cache->location->port != url->port)
1461 				continue;
1462 			if (strcmp(cache->location->doc, url->doc))
1463 				continue;
1464 			return fetchAppendURLList(ue, &cache->ue);
1465 		}
1466 
1467 		cache = malloc(sizeof(*cache));
1468 		fetchInitURLList(&cache->ue);
1469 		cache->location = fetchCopyURL(url);
1470 	}
1471 
1472 	f = fetchGetHTTP(url, flags);
1473 	if (f == NULL) {
1474 		if (do_cache) {
1475 			fetchFreeURLList(&cache->ue);
1476 			fetchFreeURL(cache->location);
1477 			free(cache);
1478 		}
1479 		return -1;
1480 	}
1481 
1482 	state.url = url;
1483 	state.state = ST_NONE;
1484 	if (do_cache) {
1485 		state.ue = &cache->ue;
1486 	} else {
1487 		state.ue = ue;
1488 	}
1489 
1490 	buf_len = 0;
1491 
1492 	while ((read_len = fetchIO_read(f, buf + buf_len, sizeof(buf) - buf_len)) > 0) {
1493 		buf_len += read_len;
1494 		sum_processed = 0;
1495 		do {
1496 			processed = parse_index(&state, buf + sum_processed, buf_len);
1497 			if (processed == -1)
1498 				break;
1499 			buf_len -= processed;
1500 			sum_processed += processed;
1501 		} while (processed != 0 && buf_len > 0);
1502 		if (processed == -1) {
1503 			read_len = -1;
1504 			break;
1505 		}
1506 		memmove(buf, buf + sum_processed, buf_len);
1507 	}
1508 
1509 	fetchIO_close(f);
1510 
1511 	ret = read_len < 0 ? -1 : 0;
1512 
1513 	if (do_cache) {
1514 		if (ret == 0) {
1515 			cache->next = index_cache;
1516 			index_cache = cache;
1517 		}
1518 
1519 		if (fetchAppendURLList(ue, &cache->ue))
1520 			ret = -1;
1521 	}
1522 
1523 	return ret;
1524 }
1525