1 /* $NetBSD: http.c,v 1.4 2020/06/01 00:55:24 kamil Exp $ */
2 /*-
3 * Copyright (c) 2000-2004 Dag-Erling Co�dan Sm�rgrav
4 * Copyright (c) 2003 Thomas Klausner <wiz@NetBSD.org>
5 * Copyright (c) 2008, 2009 Joerg Sonnenberger <joerg@NetBSD.org>
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer
13 * in this position and unchanged.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. The name of the author may not be used to endorse or promote products
18 * derived from this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
21 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
22 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
23 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
25 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
29 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 *
31 * $FreeBSD: http.c,v 1.83 2008/02/06 11:39:55 des Exp $
32 */
33
34 /*
35 * The following copyright applies to the base64 code:
36 *
37 *-
38 * Copyright 1997 Massachusetts Institute of Technology
39 *
40 * Permission to use, copy, modify, and distribute this software and
41 * its documentation for any purpose and without fee is hereby
42 * granted, provided that both the above copyright notice and this
43 * permission notice appear in all copies, that both the above
44 * copyright notice and this permission notice appear in all
45 * supporting documentation, and that the name of M.I.T. not be used
46 * in advertising or publicity pertaining to distribution of the
47 * software without specific, written prior permission. M.I.T. makes
48 * no representations about the suitability of this software for any
49 * purpose. It is provided "as is" without express or implied
50 * warranty.
51 *
52 * THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS
53 * ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE,
54 * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
55 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT
56 * SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
57 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
58 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
59 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
60 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
61 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
62 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 */
65
66 #if defined(__linux__) || defined(__MINT__)
67 /* Keep this down to Linux or MiNT, it can create surprises elsewhere. */
68 #define _GNU_SOURCE
69 #endif
70
71 #ifndef _REENTRANT
72 /* Needed for gmtime_r on Interix */
73 #define _REENTRANT
74 #endif
75
76 #if HAVE_CONFIG_H
77 #include "config.h"
78 #endif
79 #ifndef NETBSD
80 #include <nbcompat.h>
81 #endif
82
83 #include <sys/types.h>
84 #include <sys/socket.h>
85
86 #include <ctype.h>
87 #include <errno.h>
88 #include <locale.h>
89 #include <stdarg.h>
90 #ifndef NETBSD
91 #include <nbcompat/stdio.h>
92 #else
93 #include <stdio.h>
94 #endif
95 #include <stdlib.h>
96 #include <string.h>
97 #include <time.h>
98 #include <unistd.h>
99
100 #include <netinet/in.h>
101 #include <netinet/tcp.h>
102
103 #ifndef NETBSD
104 #include <nbcompat/netdb.h>
105 #else
106 #include <netdb.h>
107 #endif
108
109 #include <arpa/inet.h>
110
111 #include "fetch.h"
112 #include "common.h"
113 #include "httperr.h"
114
115 /* Maximum number of redirects to follow */
116 #define MAX_REDIRECT 5
117
118 /* Symbolic names for reply codes we care about */
119 #define HTTP_OK 200
120 #define HTTP_PARTIAL 206
121 #define HTTP_MOVED_PERM 301
122 #define HTTP_MOVED_TEMP 302
123 #define HTTP_SEE_OTHER 303
124 #define HTTP_NOT_MODIFIED 304
125 #define HTTP_TEMP_REDIRECT 307
126 #define HTTP_NEED_AUTH 401
127 #define HTTP_NEED_PROXY_AUTH 407
128 #define HTTP_BAD_RANGE 416
129 #define HTTP_PROTOCOL_ERROR 999
130
131 #define HTTP_REDIRECT(xyz) ((xyz) == HTTP_MOVED_PERM \
132 || (xyz) == HTTP_MOVED_TEMP \
133 || (xyz) == HTTP_TEMP_REDIRECT \
134 || (xyz) == HTTP_SEE_OTHER)
135
136 #define HTTP_ERROR(xyz) ((xyz) > 400 && (xyz) < 599)
137
138
139 /*****************************************************************************
140 * I/O functions for decoding chunked streams
141 */
142
143 struct httpio
144 {
145 conn_t *conn; /* connection */
146 int chunked; /* chunked mode */
147 int keep_alive; /* keep-alive mode */
148 char *buf; /* chunk buffer */
149 size_t bufsize; /* size of chunk buffer */
150 ssize_t buflen; /* amount of data currently in buffer */
151 size_t bufpos; /* current read offset in buffer */
152 int eof; /* end-of-file flag */
153 int error; /* error flag */
154 size_t chunksize; /* remaining size of current chunk */
155 off_t contentlength; /* remaining size of the content */
156 };
157
158 /*
159 * Get next chunk header
160 */
161 static ssize_t
http_new_chunk(struct httpio * io)162 http_new_chunk(struct httpio *io)
163 {
164 char *p;
165
166 if (fetch_getln(io->conn) == -1)
167 return (-1);
168
169 if (io->conn->buflen < 2 || !isxdigit((unsigned char)*io->conn->buf))
170 return (-1);
171
172 for (p = io->conn->buf; *p && !isspace((unsigned char)*p); ++p) {
173 if (*p == ';')
174 break;
175 if (!isxdigit((unsigned char)*p))
176 return (-1);
177 if (isdigit((unsigned char)*p)) {
178 io->chunksize = io->chunksize * 16 +
179 *p - '0';
180 } else {
181 io->chunksize = io->chunksize * 16 +
182 10 + tolower((unsigned char)*p) - 'a';
183 }
184 }
185
186 return (io->chunksize);
187 }
188
189 /*
190 * Grow the input buffer to at least len bytes
191 */
192 static int
http_growbuf(struct httpio * io,size_t len)193 http_growbuf(struct httpio *io, size_t len)
194 {
195 char *tmp;
196
197 if (io->bufsize >= len)
198 return (0);
199
200 if ((tmp = realloc(io->buf, len)) == NULL)
201 return (-1);
202 io->buf = tmp;
203 io->bufsize = len;
204 return (0);
205 }
206
207 /*
208 * Fill the input buffer, do chunk decoding on the fly
209 */
210 static ssize_t
http_fillbuf(struct httpio * io,size_t len)211 http_fillbuf(struct httpio *io, size_t len)
212 {
213 if (io->error)
214 return (-1);
215 if (io->eof)
216 return (0);
217
218 if (io->contentlength >= 0 && (off_t)len > io->contentlength)
219 len = io->contentlength;
220
221 if (io->chunked == 0) {
222 if (http_growbuf(io, len) == -1)
223 return (-1);
224 if ((io->buflen = fetch_read(io->conn, io->buf, len)) == -1) {
225 io->error = 1;
226 return (-1);
227 }
228 if (io->contentlength)
229 io->contentlength -= io->buflen;
230 io->bufpos = 0;
231 return (io->buflen);
232 }
233
234 if (io->chunksize == 0) {
235 switch (http_new_chunk(io)) {
236 case -1:
237 io->error = 1;
238 return (-1);
239 case 0:
240 io->eof = 1;
241 if (fetch_getln(io->conn) == -1)
242 return (-1);
243 return (0);
244 }
245 }
246
247 if (len > io->chunksize)
248 len = io->chunksize;
249 if (http_growbuf(io, len) == -1)
250 return (-1);
251 if ((io->buflen = fetch_read(io->conn, io->buf, len)) == -1) {
252 io->error = 1;
253 return (-1);
254 }
255 io->chunksize -= io->buflen;
256 if (io->contentlength >= 0)
257 io->contentlength -= io->buflen;
258
259 if (io->chunksize == 0) {
260 char endl[2];
261 ssize_t len2;
262
263 len2 = fetch_read(io->conn, endl, 2);
264 if (len2 == 1 && fetch_read(io->conn, endl + 1, 1) != 1)
265 return (-1);
266 if (len2 == -1 || endl[0] != '\r' || endl[1] != '\n')
267 return (-1);
268 }
269
270 io->bufpos = 0;
271
272 return (io->buflen);
273 }
274
275 /*
276 * Read function
277 */
278 static ssize_t
http_readfn(void * v,void * buf,size_t len)279 http_readfn(void *v, void *buf, size_t len)
280 {
281 struct httpio *io = (struct httpio *)v;
282 size_t l, pos;
283
284 if (io->error)
285 return (-1);
286 if (io->eof)
287 return (0);
288
289 for (pos = 0; len > 0; pos += l, len -= l) {
290 /* empty buffer */
291 if (!io->buf || (ssize_t)io->bufpos == io->buflen)
292 if (http_fillbuf(io, len) < 1)
293 break;
294 l = io->buflen - io->bufpos;
295 if (len < l)
296 l = len;
297 memcpy((char *)buf + pos, io->buf + io->bufpos, l);
298 io->bufpos += l;
299 }
300
301 if (!pos && io->error)
302 return (-1);
303 return (pos);
304 }
305
306 /*
307 * Write function
308 */
309 static ssize_t
http_writefn(void * v,const void * buf,size_t len)310 http_writefn(void *v, const void *buf, size_t len)
311 {
312 struct httpio *io = (struct httpio *)v;
313
314 return (fetch_write(io->conn, buf, len));
315 }
316
317 /*
318 * Close function
319 */
320 static void
http_closefn(void * v)321 http_closefn(void *v)
322 {
323 struct httpio *io = (struct httpio *)v;
324
325 if (io->keep_alive) {
326 int val;
327
328 val = 0;
329 setsockopt(io->conn->sd, IPPROTO_TCP, TCP_NODELAY, &val,
330 (socklen_t)sizeof(val));
331 fetch_cache_put(io->conn, fetch_close);
332 #ifdef TCP_NOPUSH
333 val = 1;
334 setsockopt(io->conn->sd, IPPROTO_TCP, TCP_NOPUSH, &val,
335 sizeof(val));
336 #endif
337 } else {
338 fetch_close(io->conn);
339 }
340
341 free(io->buf);
342 free(io);
343 }
344
345 /*
346 * Wrap a file descriptor up
347 */
348 static fetchIO *
http_funopen(conn_t * conn,int chunked,int keep_alive,off_t clength)349 http_funopen(conn_t *conn, int chunked, int keep_alive, off_t clength)
350 {
351 struct httpio *io;
352 fetchIO *f;
353
354 if ((io = calloc(1, sizeof(*io))) == NULL) {
355 fetch_syserr();
356 return (NULL);
357 }
358 io->conn = conn;
359 io->chunked = chunked;
360 io->contentlength = clength;
361 io->keep_alive = keep_alive;
362 f = fetchIO_unopen(io, http_readfn, http_writefn, http_closefn);
363 if (f == NULL) {
364 fetch_syserr();
365 free(io);
366 return (NULL);
367 }
368 return (f);
369 }
370
371
372 /*****************************************************************************
373 * Helper functions for talking to the server and parsing its replies
374 */
375
376 /* Header types */
377 typedef enum {
378 hdr_syserror = -2,
379 hdr_error = -1,
380 hdr_end = 0,
381 hdr_unknown = 1,
382 hdr_connection,
383 hdr_content_length,
384 hdr_content_range,
385 hdr_last_modified,
386 hdr_location,
387 hdr_transfer_encoding,
388 hdr_www_authenticate
389 } hdr_t;
390
391 /* Names of interesting headers */
392 static struct {
393 hdr_t num;
394 const char *name;
395 } hdr_names[] = {
396 { hdr_connection, "Connection" },
397 { hdr_content_length, "Content-Length" },
398 { hdr_content_range, "Content-Range" },
399 { hdr_last_modified, "Last-Modified" },
400 { hdr_location, "Location" },
401 { hdr_transfer_encoding, "Transfer-Encoding" },
402 { hdr_www_authenticate, "WWW-Authenticate" },
403 { hdr_unknown, NULL },
404 };
405
406 /*
407 * Send a formatted line; optionally echo to terminal
408 */
409 __printflike(2, 3)
410 static int
http_cmd(conn_t * conn,const char * fmt,...)411 http_cmd(conn_t *conn, const char *fmt, ...)
412 {
413 va_list ap;
414 size_t len;
415 char *msg;
416 ssize_t r;
417
418 va_start(ap, fmt);
419 len = vasprintf(&msg, fmt, ap);
420 va_end(ap);
421
422 if (msg == NULL) {
423 errno = ENOMEM;
424 fetch_syserr();
425 return (-1);
426 }
427
428 r = fetch_write(conn, msg, len);
429 free(msg);
430
431 if (r == -1) {
432 fetch_syserr();
433 return (-1);
434 }
435
436 return (0);
437 }
438
439 /*
440 * Get and parse status line
441 */
442 static int
http_get_reply(conn_t * conn)443 http_get_reply(conn_t *conn)
444 {
445 char *p;
446
447 if (fetch_getln(conn) == -1)
448 return (-1);
449 /*
450 * A valid status line looks like "HTTP/m.n xyz reason" where m
451 * and n are the major and minor protocol version numbers and xyz
452 * is the reply code.
453 * Unfortunately, there are servers out there (NCSA 1.5.1, to name
454 * just one) that do not send a version number, so we can't rely
455 * on finding one, but if we do, insist on it being 1.0 or 1.1.
456 * We don't care about the reason phrase.
457 */
458 if (strncmp(conn->buf, "HTTP", 4) != 0)
459 return (HTTP_PROTOCOL_ERROR);
460 p = conn->buf + 4;
461 if (*p == '/') {
462 if (p[1] != '1' || p[2] != '.' || (p[3] != '0' && p[3] != '1'))
463 return (HTTP_PROTOCOL_ERROR);
464 p += 4;
465 }
466 if (*p != ' ' ||
467 !isdigit((unsigned char)p[1]) ||
468 !isdigit((unsigned char)p[2]) ||
469 !isdigit((unsigned char)p[3]))
470 return (HTTP_PROTOCOL_ERROR);
471
472 conn->err = (p[1] - '0') * 100 + (p[2] - '0') * 10 + (p[3] - '0');
473 return (conn->err);
474 }
475
476 /*
477 * Check a header; if the type matches the given string, return a pointer
478 * to the beginning of the value.
479 */
480 static const char *
http_match(const char * str,const char * hdr)481 http_match(const char *str, const char *hdr)
482 {
483 while (*str && *hdr &&
484 tolower((unsigned char)*str++) == tolower((unsigned char)*hdr++))
485 /* nothing */;
486 if (*str || *hdr != ':')
487 return (NULL);
488 while (*hdr && isspace((unsigned char)*++hdr))
489 /* nothing */;
490 return (hdr);
491 }
492
493 /*
494 * Get the next header and return the appropriate symbolic code.
495 */
496 static hdr_t
http_next_header(conn_t * conn,const char ** p)497 http_next_header(conn_t *conn, const char **p)
498 {
499 int i;
500
501 if (fetch_getln(conn) == -1)
502 return (hdr_syserror);
503 while (conn->buflen && isspace((unsigned char)conn->buf[conn->buflen - 1]))
504 conn->buflen--;
505 conn->buf[conn->buflen] = '\0';
506 if (conn->buflen == 0)
507 return (hdr_end);
508 /*
509 * We could check for malformed headers but we don't really care.
510 * A valid header starts with a token immediately followed by a
511 * colon; a token is any sequence of non-control, non-whitespace
512 * characters except "()<>@,;:\\\"{}".
513 */
514 for (i = 0; hdr_names[i].num != hdr_unknown; i++)
515 if ((*p = http_match(hdr_names[i].name, conn->buf)) != NULL)
516 return (hdr_names[i].num);
517 return (hdr_unknown);
518 }
519
520 /*
521 * Parse a last-modified header
522 */
523 static int
http_parse_mtime(const char * p,time_t * mtime)524 http_parse_mtime(const char *p, time_t *mtime)
525 {
526 char locale[64], *r;
527 struct tm tm;
528
529 strncpy(locale, setlocale(LC_TIME, NULL), sizeof(locale));
530 setlocale(LC_TIME, "C");
531 r = strptime(p, "%a, %d %b %Y %H:%M:%S GMT", &tm);
532 /* XXX should add support for date-2 and date-3 */
533 setlocale(LC_TIME, locale);
534 if (r == NULL)
535 return (-1);
536 *mtime = timegm(&tm);
537 return (0);
538 }
539
540 /*
541 * Parse a content-length header
542 */
543 static int
http_parse_length(const char * p,off_t * length)544 http_parse_length(const char *p, off_t *length)
545 {
546 off_t len;
547
548 for (len = 0; *p && isdigit((unsigned char)*p); ++p)
549 len = len * 10 + (*p - '0');
550 if (*p)
551 return (-1);
552 *length = len;
553 return (0);
554 }
555
556 /*
557 * Parse a content-range header
558 */
559 static int
http_parse_range(const char * p,off_t * offset,off_t * length,off_t * size)560 http_parse_range(const char *p, off_t *offset, off_t *length, off_t *size)
561 {
562 off_t first, last, len;
563
564 if (strncasecmp(p, "bytes ", 6) != 0)
565 return (-1);
566 p += 6;
567 if (*p == '*') {
568 first = last = -1;
569 ++p;
570 } else {
571 for (first = 0; *p && isdigit((unsigned char)*p); ++p)
572 first = first * 10 + *p - '0';
573 if (*p != '-')
574 return (-1);
575 for (last = 0, ++p; *p && isdigit((unsigned char)*p); ++p)
576 last = last * 10 + *p - '0';
577 }
578 if (first > last || *p != '/')
579 return (-1);
580 for (len = 0, ++p; *p && isdigit((unsigned char)*p); ++p)
581 len = len * 10 + *p - '0';
582 if (*p || len < last - first + 1)
583 return (-1);
584 if (first == -1)
585 *length = 0;
586 else
587 *length = last - first + 1;
588 *offset = first;
589 *size = len;
590 return (0);
591 }
592
593
594 /*****************************************************************************
595 * Helper functions for authorization
596 */
597
598 /*
599 * Base64 encoding
600 */
601 static char *
http_base64(const char * src)602 http_base64(const char *src)
603 {
604 static const char base64[] =
605 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
606 "abcdefghijklmnopqrstuvwxyz"
607 "0123456789+/";
608 char *str, *dst;
609 size_t l;
610 unsigned int t, r;
611
612 l = strlen(src);
613 if ((str = malloc(((l + 2) / 3) * 4 + 1)) == NULL)
614 return (NULL);
615 dst = str;
616 r = 0;
617
618 while (l >= 3) {
619 t = (src[0] << 16) | (src[1] << 8) | src[2];
620 dst[0] = base64[(t >> 18) & 0x3f];
621 dst[1] = base64[(t >> 12) & 0x3f];
622 dst[2] = base64[(t >> 6) & 0x3f];
623 dst[3] = base64[(t >> 0) & 0x3f];
624 src += 3; l -= 3;
625 dst += 4; r += 4;
626 }
627
628 switch (l) {
629 case 2:
630 t = (src[0] << 16) | (src[1] << 8);
631 dst[0] = base64[(t >> 18) & 0x3f];
632 dst[1] = base64[(t >> 12) & 0x3f];
633 dst[2] = base64[(t >> 6) & 0x3f];
634 dst[3] = '=';
635 dst += 4;
636 r += 4;
637 break;
638 case 1:
639 t = src[0] << 16;
640 dst[0] = base64[(t >> 18) & 0x3f];
641 dst[1] = base64[(t >> 12) & 0x3f];
642 dst[2] = dst[3] = '=';
643 dst += 4;
644 r += 4;
645 break;
646 case 0:
647 break;
648 }
649
650 *dst = 0;
651 return (str);
652 }
653
654 /*
655 * Encode username and password
656 */
657 static int
http_basic_auth(conn_t * conn,const char * hdr,const char * usr,const char * pwd)658 http_basic_auth(conn_t *conn, const char *hdr, const char *usr, const char *pwd)
659 {
660 char *upw, *auth;
661 int r;
662
663 if (asprintf(&upw, "%s:%s", usr, pwd) == -1)
664 return (-1);
665 auth = http_base64(upw);
666 free(upw);
667 if (auth == NULL)
668 return (-1);
669 r = http_cmd(conn, "%s: Basic %s\r\n", hdr, auth);
670 free(auth);
671 return (r);
672 }
673
674 /*
675 * Send an authorization header
676 */
677 static int
http_authorize(conn_t * conn,const char * hdr,const char * p)678 http_authorize(conn_t *conn, const char *hdr, const char *p)
679 {
680 /* basic authorization */
681 if (strncasecmp(p, "basic:", 6) == 0) {
682 char *user, *pwd, *str;
683 int r;
684
685 /* skip realm */
686 for (p += 6; *p && *p != ':'; ++p)
687 /* nothing */ ;
688 if (!*p || strchr(++p, ':') == NULL)
689 return (-1);
690 if ((str = strdup(p)) == NULL)
691 return (-1); /* XXX */
692 user = str;
693 pwd = strchr(str, ':');
694 *pwd++ = '\0';
695 r = http_basic_auth(conn, hdr, user, pwd);
696 free(str);
697 return (r);
698 }
699 return (-1);
700 }
701
702
703 /*****************************************************************************
704 * Helper functions for connecting to a server or proxy
705 */
706
707 /*
708 * Connect to the correct HTTP server or proxy.
709 */
710 static conn_t *
http_connect(struct url * URL,struct url * purl,const char * flags,int * cached)711 http_connect(struct url *URL, struct url *purl, const char *flags, int *cached)
712 {
713 conn_t *conn;
714 int af, verbose;
715 #ifdef TCP_NOPUSH
716 int val;
717 #endif
718
719 *cached = 1;
720
721 #ifdef INET6
722 af = AF_UNSPEC;
723 #else
724 af = AF_INET;
725 #endif
726
727 verbose = CHECK_FLAG('v');
728 if (CHECK_FLAG('4'))
729 af = AF_INET;
730 #ifdef INET6
731 else if (CHECK_FLAG('6'))
732 af = AF_INET6;
733 #endif
734
735 if (purl && strcasecmp(URL->scheme, SCHEME_HTTPS) != 0) {
736 URL = purl;
737 } else if (strcasecmp(URL->scheme, SCHEME_FTP) == 0) {
738 /* can't talk http to an ftp server */
739 /* XXX should set an error code */
740 return (NULL);
741 }
742
743 if ((conn = fetch_cache_get(URL, af)) != NULL) {
744 *cached = 1;
745 return (conn);
746 }
747
748 if ((conn = fetch_connect(URL, af, verbose)) == NULL)
749 /* fetch_connect() has already set an error code */
750 return (NULL);
751 if (strcasecmp(URL->scheme, SCHEME_HTTPS) == 0 &&
752 fetch_ssl(conn, verbose) == -1) {
753 fetch_close(conn);
754 /* grrr */
755 #ifdef EAUTH
756 errno = EAUTH;
757 #else
758 errno = EPERM;
759 #endif
760 fetch_syserr();
761 return (NULL);
762 }
763
764 #ifdef TCP_NOPUSH
765 val = 1;
766 setsockopt(conn->sd, IPPROTO_TCP, TCP_NOPUSH, &val, sizeof(val));
767 #endif
768
769 return (conn);
770 }
771
772 static struct url *
http_get_proxy(struct url * url,const char * flags)773 http_get_proxy(struct url * url, const char *flags)
774 {
775 struct url *purl;
776 char *p;
777
778 if (flags != NULL && strchr(flags, 'd') != NULL)
779 return (NULL);
780 if (fetch_no_proxy_match(url->host))
781 return (NULL);
782 if (((p = getenv("HTTP_PROXY")) || (p = getenv("http_proxy"))) &&
783 *p && (purl = fetchParseURL(p))) {
784 if (!*purl->scheme)
785 strcpy(purl->scheme, SCHEME_HTTP);
786 if (!purl->port)
787 purl->port = fetch_default_proxy_port(purl->scheme);
788 if (strcasecmp(purl->scheme, SCHEME_HTTP) == 0)
789 return (purl);
790 fetchFreeURL(purl);
791 }
792 return (NULL);
793 }
794
795 static void
set_if_modified_since(conn_t * conn,time_t last_modified)796 set_if_modified_since(conn_t *conn, time_t last_modified)
797 {
798 static const char weekdays[] = "SunMonTueWedThuFriSat";
799 static const char months[] = "JanFebMarAprMayJunJulAugSepOctNovDec";
800 struct tm tm;
801 char buf[80];
802 gmtime_r(&last_modified, &tm);
803 snprintf(buf, sizeof(buf), "%.3s, %02d %.3s %4d %02d:%02d:%02d GMT",
804 weekdays + tm.tm_wday * 3, tm.tm_mday, months + tm.tm_mon * 3,
805 tm.tm_year + 1900, tm.tm_hour, tm.tm_min, tm.tm_sec);
806 http_cmd(conn, "If-Modified-Since: %s\r\n", buf);
807 }
808
809
810 /*****************************************************************************
811 * Core
812 */
813
814 /*
815 * Send a request and process the reply
816 *
817 * XXX This function is way too long, the do..while loop should be split
818 * XXX off into a separate function.
819 */
820 fetchIO *
http_request(struct url * URL,const char * op,struct url_stat * us,struct url * purl,const char * flags)821 http_request(struct url *URL, const char *op, struct url_stat *us,
822 struct url *purl, const char *flags)
823 {
824 conn_t *conn;
825 struct url *url, *new;
826 int chunked, direct, if_modified_since, need_auth, noredirect;
827 int keep_alive, verbose, cached;
828 int e, i, n, val;
829 off_t offset, clength, length, size;
830 time_t mtime;
831 const char *p;
832 fetchIO *f;
833 hdr_t h;
834 char hbuf[URL_HOSTLEN + 7], *host;
835
836 direct = CHECK_FLAG('d');
837 noredirect = CHECK_FLAG('A');
838 verbose = CHECK_FLAG('v');
839 if_modified_since = CHECK_FLAG('i');
840 keep_alive = 0;
841
842 if (direct && purl) {
843 fetchFreeURL(purl);
844 purl = NULL;
845 }
846
847 /* try the provided URL first */
848 url = URL;
849
850 /* if the A flag is set, we only get one try */
851 n = noredirect ? 1 : MAX_REDIRECT;
852 i = 0;
853
854 e = HTTP_PROTOCOL_ERROR;
855 need_auth = 0;
856 do {
857 new = NULL;
858 chunked = 0;
859 offset = 0;
860 clength = -1;
861 length = -1;
862 size = -1;
863 mtime = 0;
864
865 /* check port */
866 if (!url->port)
867 url->port = fetch_default_port(url->scheme);
868
869 /* were we redirected to an FTP URL? */
870 if (purl == NULL && strcmp(url->scheme, SCHEME_FTP) == 0) {
871 if (strcmp(op, "GET") == 0)
872 return (ftp_request(url, "RETR", NULL, us, purl, flags));
873 else if (strcmp(op, "HEAD") == 0)
874 return (ftp_request(url, "STAT", NULL, us, purl, flags));
875 }
876
877 /* connect to server or proxy */
878 if ((conn = http_connect(url, purl, flags, &cached)) == NULL)
879 goto ouch;
880
881 host = url->host;
882 #ifdef INET6
883 if (strchr(url->host, ':')) {
884 snprintf(hbuf, sizeof(hbuf), "[%s]", url->host);
885 host = hbuf;
886 }
887 #endif
888 if (url->port != fetch_default_port(url->scheme)) {
889 if (host != hbuf) {
890 strcpy(hbuf, host);
891 host = hbuf;
892 }
893 snprintf(hbuf + strlen(hbuf),
894 sizeof(hbuf) - strlen(hbuf), ":%d", url->port);
895 }
896
897 /* send request */
898 if (verbose)
899 fetch_info("requesting %s://%s%s",
900 url->scheme, host, url->doc);
901 if (purl) {
902 http_cmd(conn, "%s %s://%s%s HTTP/1.1\r\n",
903 op, url->scheme, host, url->doc);
904 } else {
905 http_cmd(conn, "%s %s HTTP/1.1\r\n",
906 op, url->doc);
907 }
908
909 if (if_modified_since && url->last_modified > 0)
910 set_if_modified_since(conn, url->last_modified);
911
912 /* virtual host */
913 http_cmd(conn, "Host: %s\r\n", host);
914
915 /* proxy authorization */
916 if (purl) {
917 if (*purl->user || *purl->pwd)
918 http_basic_auth(conn, "Proxy-Authorization",
919 purl->user, purl->pwd);
920 else if ((p = getenv("HTTP_PROXY_AUTH")) != NULL && *p != '\0')
921 http_authorize(conn, "Proxy-Authorization", p);
922 }
923
924 /* server authorization */
925 if (need_auth || *url->user || *url->pwd) {
926 if (*url->user || *url->pwd)
927 http_basic_auth(conn, "Authorization", url->user, url->pwd);
928 else if ((p = getenv("HTTP_AUTH")) != NULL && *p != '\0')
929 http_authorize(conn, "Authorization", p);
930 else if (fetchAuthMethod && fetchAuthMethod(url) == 0) {
931 http_basic_auth(conn, "Authorization", url->user, url->pwd);
932 } else {
933 http_seterr(HTTP_NEED_AUTH);
934 goto ouch;
935 }
936 }
937
938 /* other headers */
939 if ((p = getenv("HTTP_REFERER")) != NULL && *p != '\0') {
940 if (strcasecmp(p, "auto") == 0)
941 http_cmd(conn, "Referer: %s://%s%s\r\n",
942 url->scheme, host, url->doc);
943 else
944 http_cmd(conn, "Referer: %s\r\n", p);
945 }
946 if ((p = getenv("HTTP_USER_AGENT")) != NULL && *p != '\0')
947 http_cmd(conn, "User-Agent: %s\r\n", p);
948 else
949 http_cmd(conn, "User-Agent: %s\r\n", _LIBFETCH_VER);
950 if (url->offset > 0)
951 http_cmd(conn, "Range: bytes=%lld-\r\n", (long long)url->offset);
952 http_cmd(conn, "\r\n");
953
954 /*
955 * Force the queued request to be dispatched. Normally, one
956 * would do this with shutdown(2) but squid proxies can be
957 * configured to disallow such half-closed connections. To
958 * be compatible with such configurations, fiddle with socket
959 * options to force the pending data to be written.
960 */
961 #ifdef TCP_NOPUSH
962 val = 0;
963 setsockopt(conn->sd, IPPROTO_TCP, TCP_NOPUSH, &val,
964 sizeof(val));
965 #endif
966 val = 1;
967 setsockopt(conn->sd, IPPROTO_TCP, TCP_NODELAY, &val,
968 (socklen_t)sizeof(val));
969
970 /* get reply */
971 switch (http_get_reply(conn)) {
972 case HTTP_OK:
973 case HTTP_PARTIAL:
974 case HTTP_NOT_MODIFIED:
975 /* fine */
976 break;
977 case HTTP_MOVED_PERM:
978 case HTTP_MOVED_TEMP:
979 case HTTP_SEE_OTHER:
980 /*
981 * Not so fine, but we still have to read the
982 * headers to get the new location.
983 */
984 break;
985 case HTTP_NEED_AUTH:
986 if (need_auth) {
987 /*
988 * We already sent out authorization code,
989 * so there's nothing more we can do.
990 */
991 http_seterr(conn->err);
992 goto ouch;
993 }
994 /* try again, but send the password this time */
995 if (verbose)
996 fetch_info("server requires authorization");
997 break;
998 case HTTP_NEED_PROXY_AUTH:
999 /*
1000 * If we're talking to a proxy, we already sent
1001 * our proxy authorization code, so there's
1002 * nothing more we can do.
1003 */
1004 http_seterr(conn->err);
1005 goto ouch;
1006 case HTTP_BAD_RANGE:
1007 /*
1008 * This can happen if we ask for 0 bytes because
1009 * we already have the whole file. Consider this
1010 * a success for now, and check sizes later.
1011 */
1012 break;
1013 case HTTP_PROTOCOL_ERROR:
1014 /* fall through */
1015 case -1:
1016 --i;
1017 if (cached)
1018 continue;
1019 fetch_syserr();
1020 goto ouch;
1021 default:
1022 http_seterr(conn->err);
1023 if (!verbose)
1024 goto ouch;
1025 /* fall through so we can get the full error message */
1026 }
1027
1028 /* get headers */
1029 do {
1030 switch ((h = http_next_header(conn, &p))) {
1031 case hdr_syserror:
1032 fetch_syserr();
1033 goto ouch;
1034 case hdr_error:
1035 http_seterr(HTTP_PROTOCOL_ERROR);
1036 goto ouch;
1037 case hdr_connection:
1038 /* XXX too weak? */
1039 keep_alive = (strcasecmp(p, "keep-alive") == 0);
1040 break;
1041 case hdr_content_length:
1042 http_parse_length(p, &clength);
1043 break;
1044 case hdr_content_range:
1045 http_parse_range(p, &offset, &length, &size);
1046 break;
1047 case hdr_last_modified:
1048 http_parse_mtime(p, &mtime);
1049 break;
1050 case hdr_location:
1051 if (!HTTP_REDIRECT(conn->err))
1052 break;
1053 if (new)
1054 free(new);
1055 if (verbose)
1056 fetch_info("%d redirect to %s", conn->err, p);
1057 if (*p == '/')
1058 /* absolute path */
1059 new = fetchMakeURL(url->scheme, url->host, url->port, p,
1060 url->user, url->pwd);
1061 else
1062 new = fetchParseURL(p);
1063 if (new == NULL) {
1064 /* XXX should set an error code */
1065 goto ouch;
1066 }
1067 if (!*new->user && !*new->pwd) {
1068 strcpy(new->user, url->user);
1069 strcpy(new->pwd, url->pwd);
1070 }
1071 new->offset = url->offset;
1072 new->length = url->length;
1073 break;
1074 case hdr_transfer_encoding:
1075 /* XXX weak test*/
1076 chunked = (strcasecmp(p, "chunked") == 0);
1077 break;
1078 case hdr_www_authenticate:
1079 if (conn->err != HTTP_NEED_AUTH)
1080 break;
1081 /* if we were smarter, we'd check the method and realm */
1082 break;
1083 case hdr_end:
1084 /* fall through */
1085 case hdr_unknown:
1086 /* ignore */
1087 break;
1088 }
1089 } while (h > hdr_end);
1090
1091 /* we need to provide authentication */
1092 if (conn->err == HTTP_NEED_AUTH) {
1093 e = conn->err;
1094 need_auth = 1;
1095 fetch_close(conn);
1096 conn = NULL;
1097 continue;
1098 }
1099
1100 /* requested range not satisfiable */
1101 if (conn->err == HTTP_BAD_RANGE) {
1102 if (url->offset == size && url->length == 0) {
1103 /* asked for 0 bytes; fake it */
1104 offset = url->offset;
1105 conn->err = HTTP_OK;
1106 break;
1107 } else {
1108 http_seterr(conn->err);
1109 goto ouch;
1110 }
1111 }
1112
1113 /* we have a hit or an error */
1114 if (conn->err == HTTP_OK ||
1115 conn->err == HTTP_PARTIAL ||
1116 conn->err == HTTP_NOT_MODIFIED ||
1117 HTTP_ERROR(conn->err))
1118 break;
1119
1120 /* all other cases: we got a redirect */
1121 e = conn->err;
1122 need_auth = 0;
1123 fetch_close(conn);
1124 conn = NULL;
1125 if (!new)
1126 break;
1127 if (url != URL)
1128 fetchFreeURL(url);
1129 url = new;
1130 } while (++i < n);
1131
1132 /* we failed, or ran out of retries */
1133 if (conn == NULL) {
1134 http_seterr(e);
1135 goto ouch;
1136 }
1137
1138 /* check for inconsistencies */
1139 if (clength != -1 && length != -1 && clength != length) {
1140 http_seterr(HTTP_PROTOCOL_ERROR);
1141 goto ouch;
1142 }
1143 if (clength == -1)
1144 clength = length;
1145 if (clength != -1)
1146 length = offset + clength;
1147 if (length != -1 && size != -1 && length != size) {
1148 http_seterr(HTTP_PROTOCOL_ERROR);
1149 goto ouch;
1150 }
1151 if (size == -1)
1152 size = length;
1153
1154 /* fill in stats */
1155 if (us) {
1156 us->size = size;
1157 us->atime = us->mtime = mtime;
1158 }
1159
1160 /* too far? */
1161 if (URL->offset > 0 && offset > URL->offset) {
1162 http_seterr(HTTP_PROTOCOL_ERROR);
1163 goto ouch;
1164 }
1165
1166 /* report back real offset and size */
1167 URL->offset = offset;
1168 URL->length = clength;
1169
1170 if (clength == -1 && !chunked)
1171 keep_alive = 0;
1172
1173 if (conn->err == HTTP_NOT_MODIFIED) {
1174 http_seterr(HTTP_NOT_MODIFIED);
1175 if (keep_alive) {
1176 fetch_cache_put(conn, fetch_close);
1177 conn = NULL;
1178 }
1179 goto ouch;
1180 }
1181
1182 /* wrap it up in a fetchIO */
1183 if ((f = http_funopen(conn, chunked, keep_alive, clength)) == NULL) {
1184 fetch_syserr();
1185 goto ouch;
1186 }
1187
1188 if (url != URL)
1189 fetchFreeURL(url);
1190 if (purl)
1191 fetchFreeURL(purl);
1192
1193 if (HTTP_ERROR(conn->err)) {
1194
1195 if (keep_alive) {
1196 char buf[512];
1197 do {
1198 } while (fetchIO_read(f, buf, sizeof(buf)) > 0);
1199 }
1200
1201 fetchIO_close(f);
1202 f = NULL;
1203 }
1204
1205 return (f);
1206
1207 ouch:
1208 if (url != URL)
1209 fetchFreeURL(url);
1210 if (purl)
1211 fetchFreeURL(purl);
1212 if (conn != NULL)
1213 fetch_close(conn);
1214 return (NULL);
1215 }
1216
1217
1218 /*****************************************************************************
1219 * Entry points
1220 */
1221
1222 /*
1223 * Retrieve and stat a file by HTTP
1224 */
1225 fetchIO *
fetchXGetHTTP(struct url * URL,struct url_stat * us,const char * flags)1226 fetchXGetHTTP(struct url *URL, struct url_stat *us, const char *flags)
1227 {
1228 return (http_request(URL, "GET", us, http_get_proxy(URL, flags), flags));
1229 }
1230
1231 /*
1232 * Retrieve a file by HTTP
1233 */
1234 fetchIO *
fetchGetHTTP(struct url * URL,const char * flags)1235 fetchGetHTTP(struct url *URL, const char *flags)
1236 {
1237 return (fetchXGetHTTP(URL, NULL, flags));
1238 }
1239
1240 /*
1241 * Store a file by HTTP
1242 */
1243 fetchIO *
1244 /*ARGSUSED*/
fetchPutHTTP(struct url * URL __unused,const char * flags __unused)1245 fetchPutHTTP(struct url *URL __unused, const char *flags __unused)
1246 {
1247 fprintf(stderr, "fetchPutHTTP(): not implemented\n");
1248 return (NULL);
1249 }
1250
1251 /*
1252 * Get an HTTP document's metadata
1253 */
1254 int
fetchStatHTTP(struct url * URL,struct url_stat * us,const char * flags)1255 fetchStatHTTP(struct url *URL, struct url_stat *us, const char *flags)
1256 {
1257 fetchIO *f;
1258
1259 f = http_request(URL, "HEAD", us, http_get_proxy(URL, flags), flags);
1260 if (f == NULL)
1261 return (-1);
1262 fetchIO_close(f);
1263 return (0);
1264 }
1265
1266 enum http_states {
1267 ST_NONE,
1268 ST_LT,
1269 ST_LTA,
1270 ST_TAGA,
1271 ST_H,
1272 ST_R,
1273 ST_E,
1274 ST_F,
1275 ST_HREF,
1276 ST_HREFQ,
1277 ST_TAG,
1278 ST_TAGAX,
1279 ST_TAGAQ
1280 };
1281
1282 struct index_parser {
1283 struct url_list *ue;
1284 struct url *url;
1285 enum http_states state;
1286 };
1287
1288 static ssize_t
parse_index(struct index_parser * parser,const char * buf,size_t len)1289 parse_index(struct index_parser *parser, const char *buf, size_t len)
1290 {
1291 char *end_attr, p = *buf;
1292
1293 switch (parser->state) {
1294 case ST_NONE:
1295 /* Plain text, not in markup */
1296 if (p == '<')
1297 parser->state = ST_LT;
1298 return 1;
1299 case ST_LT:
1300 /* In tag -- "<" already found */
1301 if (p == '>')
1302 parser->state = ST_NONE;
1303 else if (p == 'a' || p == 'A')
1304 parser->state = ST_LTA;
1305 else if (!isspace((unsigned char)p))
1306 parser->state = ST_TAG;
1307 return 1;
1308 case ST_LTA:
1309 /* In tag -- "<a" already found */
1310 if (p == '>')
1311 parser->state = ST_NONE;
1312 else if (p == '"')
1313 parser->state = ST_TAGAQ;
1314 else if (isspace((unsigned char)p))
1315 parser->state = ST_TAGA;
1316 else
1317 parser->state = ST_TAG;
1318 return 1;
1319 case ST_TAG:
1320 /* In tag, but not "<a" -- disregard */
1321 if (p == '>')
1322 parser->state = ST_NONE;
1323 return 1;
1324 case ST_TAGA:
1325 /* In a-tag -- "<a " already found */
1326 if (p == '>')
1327 parser->state = ST_NONE;
1328 else if (p == '"')
1329 parser->state = ST_TAGAQ;
1330 else if (p == 'h' || p == 'H')
1331 parser->state = ST_H;
1332 else if (!isspace((unsigned char)p))
1333 parser->state = ST_TAGAX;
1334 return 1;
1335 case ST_TAGAX:
1336 /* In unknown keyword in a-tag */
1337 if (p == '>')
1338 parser->state = ST_NONE;
1339 else if (p == '"')
1340 parser->state = ST_TAGAQ;
1341 else if (isspace((unsigned char)p))
1342 parser->state = ST_TAGA;
1343 return 1;
1344 case ST_TAGAQ:
1345 /* In a-tag, unknown argument for keys. */
1346 if (p == '>')
1347 parser->state = ST_NONE;
1348 else if (p == '"')
1349 parser->state = ST_TAGA;
1350 return 1;
1351 case ST_H:
1352 /* In a-tag -- "<a h" already found */
1353 if (p == '>')
1354 parser->state = ST_NONE;
1355 else if (p == '"')
1356 parser->state = ST_TAGAQ;
1357 else if (p == 'r' || p == 'R')
1358 parser->state = ST_R;
1359 else if (isspace((unsigned char)p))
1360 parser->state = ST_TAGA;
1361 else
1362 parser->state = ST_TAGAX;
1363 return 1;
1364 case ST_R:
1365 /* In a-tag -- "<a hr" already found */
1366 if (p == '>')
1367 parser->state = ST_NONE;
1368 else if (p == '"')
1369 parser->state = ST_TAGAQ;
1370 else if (p == 'e' || p == 'E')
1371 parser->state = ST_E;
1372 else if (isspace((unsigned char)p))
1373 parser->state = ST_TAGA;
1374 else
1375 parser->state = ST_TAGAX;
1376 return 1;
1377 case ST_E:
1378 /* In a-tag -- "<a hre" already found */
1379 if (p == '>')
1380 parser->state = ST_NONE;
1381 else if (p == '"')
1382 parser->state = ST_TAGAQ;
1383 else if (p == 'f' || p == 'F')
1384 parser->state = ST_F;
1385 else if (isspace((unsigned char)p))
1386 parser->state = ST_TAGA;
1387 else
1388 parser->state = ST_TAGAX;
1389 return 1;
1390 case ST_F:
1391 /* In a-tag -- "<a href" already found */
1392 if (p == '>')
1393 parser->state = ST_NONE;
1394 else if (p == '"')
1395 parser->state = ST_TAGAQ;
1396 else if (p == '=')
1397 parser->state = ST_HREF;
1398 else if (!isspace((unsigned char)p))
1399 parser->state = ST_TAGAX;
1400 return 1;
1401 case ST_HREF:
1402 /* In a-tag -- "<a href=" already found */
1403 if (p == '>')
1404 parser->state = ST_NONE;
1405 else if (p == '"')
1406 parser->state = ST_HREFQ;
1407 else if (!isspace((unsigned char)p))
1408 parser->state = ST_TAGA;
1409 return 1;
1410 case ST_HREFQ:
1411 /* In href of the a-tag */
1412 end_attr = memchr(buf, '"', len);
1413 if (end_attr == NULL)
1414 return 0;
1415 *end_attr = '\0';
1416 parser->state = ST_TAGA;
1417 if (fetch_add_entry(parser->ue, parser->url, buf, 1))
1418 return -1;
1419 return end_attr + 1 - buf;
1420 }
1421 /* NOTREACHED */
1422 abort();
1423 }
1424
1425 struct http_index_cache {
1426 struct http_index_cache *next;
1427 struct url *location;
1428 struct url_list ue;
1429 };
1430
1431 static struct http_index_cache *index_cache;
1432
1433 /*
1434 * List a directory
1435 */
1436 int
1437 /*ARGSUSED*/
fetchListHTTP(struct url_list * ue,struct url * url,const char * pattern __unused,const char * flags)1438 fetchListHTTP(struct url_list *ue, struct url *url, const char *pattern __unused, const char *flags)
1439 {
1440 fetchIO *f;
1441 char buf[2 * PATH_MAX];
1442 size_t buf_len, sum_processed;
1443 ssize_t read_len, processed;
1444 struct index_parser state;
1445 struct http_index_cache *cache = NULL;
1446 int do_cache, ret;
1447
1448 do_cache = CHECK_FLAG('c');
1449
1450 if (do_cache) {
1451 for (cache = index_cache; cache != NULL; cache = cache->next) {
1452 if (strcmp(cache->location->scheme, url->scheme))
1453 continue;
1454 if (strcmp(cache->location->user, url->user))
1455 continue;
1456 if (strcmp(cache->location->pwd, url->pwd))
1457 continue;
1458 if (strcmp(cache->location->host, url->host))
1459 continue;
1460 if (cache->location->port != url->port)
1461 continue;
1462 if (strcmp(cache->location->doc, url->doc))
1463 continue;
1464 return fetchAppendURLList(ue, &cache->ue);
1465 }
1466
1467 cache = malloc(sizeof(*cache));
1468 fetchInitURLList(&cache->ue);
1469 cache->location = fetchCopyURL(url);
1470 }
1471
1472 f = fetchGetHTTP(url, flags);
1473 if (f == NULL) {
1474 if (do_cache) {
1475 fetchFreeURLList(&cache->ue);
1476 fetchFreeURL(cache->location);
1477 free(cache);
1478 }
1479 return -1;
1480 }
1481
1482 state.url = url;
1483 state.state = ST_NONE;
1484 if (do_cache) {
1485 state.ue = &cache->ue;
1486 } else {
1487 state.ue = ue;
1488 }
1489
1490 buf_len = 0;
1491
1492 while ((read_len = fetchIO_read(f, buf + buf_len, sizeof(buf) - buf_len)) > 0) {
1493 buf_len += read_len;
1494 sum_processed = 0;
1495 do {
1496 processed = parse_index(&state, buf + sum_processed, buf_len);
1497 if (processed == -1)
1498 break;
1499 buf_len -= processed;
1500 sum_processed += processed;
1501 } while (processed != 0 && buf_len > 0);
1502 if (processed == -1) {
1503 read_len = -1;
1504 break;
1505 }
1506 memmove(buf, buf + sum_processed, buf_len);
1507 }
1508
1509 fetchIO_close(f);
1510
1511 ret = read_len < 0 ? -1 : 0;
1512
1513 if (do_cache) {
1514 if (ret == 0) {
1515 cache->next = index_cache;
1516 index_cache = cache;
1517 }
1518
1519 if (fetchAppendURLList(ue, &cache->ue))
1520 ret = -1;
1521 }
1522
1523 return ret;
1524 }
1525