1 /*
2  * nanohttp.c: minimalist HTTP GET implementation to fetch external subsets.
3  *             focuses on size, streamability, reentrancy and portability
4  *
5  * This is clearly not a general purpose HTTP implementation
6  * If you look for one, check:
7  *         http://www.w3.org/Library/
8  *
9  * See Copyright for the status of this software.
10  *
11  * daniel@veillard.com
12  */
13 
14 #define NEED_SOCKETS
15 #define IN_LIBXML
16 #include "libxml.h"
17 
18 #ifdef LIBXML_HTTP_ENABLED
19 #include <string.h>
20 
21 #ifdef HAVE_STDLIB_H
22 #include <stdlib.h>
23 #endif
24 #ifdef HAVE_UNISTD_H
25 #include <unistd.h>
26 #endif
27 #ifdef HAVE_SYS_TYPES_H
28 #include <sys/types.h>
29 #endif
30 #ifdef HAVE_SYS_SOCKET_H
31 #include <sys/socket.h>
32 #endif
33 #ifdef HAVE_NETINET_IN_H
34 #include <netinet/in.h>
35 #endif
36 #ifdef HAVE_ARPA_INET_H
37 #include <arpa/inet.h>
38 #endif
39 #ifdef HAVE_NETDB_H
40 #include <netdb.h>
41 #endif
42 #ifdef HAVE_RESOLV_H
43 #ifdef HAVE_ARPA_NAMESER_H
44 #include <arpa/nameser.h>
45 #endif
46 #include <resolv.h>
47 #endif
48 #ifdef HAVE_FCNTL_H
49 #include <fcntl.h>
50 #endif
51 #ifdef HAVE_ERRNO_H
52 #include <errno.h>
53 #endif
54 #ifdef HAVE_SYS_TIME_H
55 #include <sys/time.h>
56 #endif
57 #ifdef HAVE_SYS_SELECT_H
58 #include <sys/select.h>
59 #endif
60 #ifdef HAVE_STRINGS_H
61 #include <strings.h>
62 #endif
63 #ifdef SUPPORT_IP6
64 #include <resolv.h>
65 #endif
66 #ifdef HAVE_ZLIB_H
67 #include <zlib.h>
68 #endif
69 
70 
71 #ifdef VMS
72 #include <stropts>
73 #define XML_SOCKLEN_T unsigned int
74 #define SOCKET int
75 #endif
76 
77 #if defined(__MINGW32__) || defined(_WIN32_WCE)
78 #define _WINSOCKAPI_
79 #include <wsockcompat.h>
80 #include <winsock2.h>
81 #undef XML_SOCKLEN_T
82 #define XML_SOCKLEN_T unsigned int
83 #endif
84 
85 
86 #include <libxml/globals.h>
87 #include <libxml/xmlerror.h>
88 #include <libxml/xmlmemory.h>
89 #include <libxml/parser.h> /* for xmlStr(n)casecmp() */
90 #include <libxml/nanohttp.h>
91 #include <libxml/globals.h>
92 #include <libxml/uri.h>
93 
94 /**
95  * A couple portability macros
96  */
97 #ifndef _WINSOCKAPI_
98 #ifndef __BEOS__
99 #define closesocket(s) close(s)
100 #endif
101 #define SOCKET int
102 #endif
103 
104 #ifdef __BEOS__
105 #ifndef PF_INET
106 #define PF_INET AF_INET
107 #endif
108 #endif
109 
110 #ifndef XML_SOCKLEN_T
111 #define XML_SOCKLEN_T unsigned int
112 #endif
113 #ifndef SOCKET
114 #define SOCKET int
115 #endif
116 
117 #ifdef STANDALONE
118 #define DEBUG_HTTP
119 #define xmlStrncasecmp(a, b, n) strncasecmp((char *)a, (char *)b, n)
120 #define xmlStrcasecmpi(a, b) strcasecmp((char *)a, (char *)b)
121 #endif
122 
123 #define XML_NANO_HTTP_MAX_REDIR	10
124 
125 #define XML_NANO_HTTP_CHUNK	4096
126 
127 #define XML_NANO_HTTP_CLOSED	0
128 #define XML_NANO_HTTP_WRITE	1
129 #define XML_NANO_HTTP_READ	2
130 #define XML_NANO_HTTP_NONE	4
131 
132 typedef struct xmlNanoHTTPCtxt {
133     char *protocol;	/* the protocol name */
134     char *hostname;	/* the host name */
135     int port;		/* the port */
136     char *path;		/* the path within the URL */
137     char *query;	/* the query string */
138     SOCKET fd;		/* the file descriptor for the socket */
139     int state;		/* WRITE / READ / CLOSED */
140     char *out;		/* buffer sent (zero terminated) */
141     char *outptr;	/* index within the buffer sent */
142     char *in;		/* the receiving buffer */
143     char *content;	/* the start of the content */
144     char *inptr;	/* the next byte to read from network */
145     char *inrptr;	/* the next byte to give back to the client */
146     int inlen;		/* len of the input buffer */
147     int last;		/* return code for last operation */
148     int returnValue;	/* the protocol return value */
149     int ContentLength;  /* specified content length from HTTP header */
150     char *contentType;	/* the MIME type for the input */
151     char *location;	/* the new URL in case of redirect */
152     char *authHeader;	/* contents of {WWW,Proxy}-Authenticate header */
153     char *encoding;	/* encoding extracted from the contentType */
154     char *mimeType;	/* Mime-Type extracted from the contentType */
155 #ifdef HAVE_ZLIB_H
156     z_stream *strm;	/* Zlib stream object */
157     int usesGzip;	/* "Content-Encoding: gzip" was detected */
158 #endif
159 } xmlNanoHTTPCtxt, *xmlNanoHTTPCtxtPtr;
160 
161 static int initialized = 0;
162 static char *proxy = NULL;	 /* the proxy name if any */
163 static int proxyPort;	/* the proxy port if any */
164 static unsigned int timeout = 60;/* the select() timeout in seconds */
165 
166 static int xmlNanoHTTPFetchContent( void * ctx, char ** ptr, int * len );
167 
168 /**
169  * xmlHTTPErrMemory:
170  * @extra:  extra informations
171  *
172  * Handle an out of memory condition
173  */
174 static void
xmlHTTPErrMemory(const char * extra)175 xmlHTTPErrMemory(const char *extra)
176 {
177     __xmlSimpleError(XML_FROM_HTTP, XML_ERR_NO_MEMORY, NULL, NULL, extra);
178 }
179 
180 /**
181  * A portability function
182  */
socket_errno(void)183 static int socket_errno(void) {
184 #ifdef _WINSOCKAPI_
185     return(WSAGetLastError());
186 #else
187     return(errno);
188 #endif
189 }
190 
191 #ifdef SUPPORT_IP6
192 static
have_ipv6(void)193 int have_ipv6(void) {
194     int s;
195 
196     s = socket (AF_INET6, SOCK_STREAM, 0);
197     if (s != -1) {
198 	close (s);
199 	return (1);
200     }
201     return (0);
202 }
203 #endif
204 
205 /**
206  * xmlNanoHTTPInit:
207  *
208  * Initialize the HTTP protocol layer.
209  * Currently it just checks for proxy informations
210  */
211 
212 void
xmlNanoHTTPInit(void)213 xmlNanoHTTPInit(void) {
214     const char *env;
215 #ifdef _WINSOCKAPI_
216     WSADATA wsaData;
217 #endif
218 
219     if (initialized)
220 	return;
221 
222 #ifdef _WINSOCKAPI_
223     if (WSAStartup(MAKEWORD(1, 1), &wsaData) != 0)
224 	return;
225 #endif
226 
227     if (proxy == NULL) {
228 	proxyPort = 80;
229 	env = getenv("no_proxy");
230 	if (env && ((env[0] == '*') && (env[1] == 0)))
231 	    goto done;
232 	env = getenv("http_proxy");
233 	if (env != NULL) {
234 	    xmlNanoHTTPScanProxy(env);
235 	    goto done;
236 	}
237 	env = getenv("HTTP_PROXY");
238 	if (env != NULL) {
239 	    xmlNanoHTTPScanProxy(env);
240 	    goto done;
241 	}
242     }
243 done:
244     initialized = 1;
245 }
246 
247 /**
248  * xmlNanoHTTPCleanup:
249  *
250  * Cleanup the HTTP protocol layer.
251  */
252 
253 void
xmlNanoHTTPCleanup(void)254 xmlNanoHTTPCleanup(void) {
255     if (proxy != NULL) {
256 	xmlFree(proxy);
257 	proxy = NULL;
258     }
259 #ifdef _WINSOCKAPI_
260     if (initialized)
261 	WSACleanup();
262 #endif
263     initialized = 0;
264     return;
265 }
266 
267 /**
268  * xmlNanoHTTPScanURL:
269  * @ctxt:  an HTTP context
270  * @URL:  The URL used to initialize the context
271  *
272  * (Re)Initialize an HTTP context by parsing the URL and finding
273  * the protocol host port and path it indicates.
274  */
275 
276 static void
xmlNanoHTTPScanURL(xmlNanoHTTPCtxtPtr ctxt,const char * URL)277 xmlNanoHTTPScanURL(xmlNanoHTTPCtxtPtr ctxt, const char *URL) {
278     xmlURIPtr uri;
279     /*
280      * Clear any existing data from the context
281      */
282     if (ctxt->protocol != NULL) {
283         xmlFree(ctxt->protocol);
284 	ctxt->protocol = NULL;
285     }
286     if (ctxt->hostname != NULL) {
287         xmlFree(ctxt->hostname);
288 	ctxt->hostname = NULL;
289     }
290     if (ctxt->path != NULL) {
291         xmlFree(ctxt->path);
292 	ctxt->path = NULL;
293     }
294     if (ctxt->query != NULL) {
295         xmlFree(ctxt->query);
296 	ctxt->query = NULL;
297     }
298     if (URL == NULL) return;
299 
300     uri = xmlParseURIRaw(URL, 1);
301     if (uri == NULL)
302 	return;
303 
304     if ((uri->scheme == NULL) || (uri->server == NULL)) {
305 	xmlFreeURI(uri);
306 	return;
307     }
308 
309     ctxt->protocol = xmlMemStrdup(uri->scheme);
310     ctxt->hostname = xmlMemStrdup(uri->server);
311     if (uri->path != NULL)
312 	ctxt->path = xmlMemStrdup(uri->path);
313     else
314 	ctxt->path = xmlMemStrdup("/");
315     if (uri->query != NULL)
316 	ctxt->query = xmlMemStrdup(uri->query);
317     if (uri->port != 0)
318 	ctxt->port = uri->port;
319 
320     xmlFreeURI(uri);
321 }
322 
323 /**
324  * xmlNanoHTTPScanProxy:
325  * @URL:  The proxy URL used to initialize the proxy context
326  *
327  * (Re)Initialize the HTTP Proxy context by parsing the URL and finding
328  * the protocol host port it indicates.
329  * Should be like http://myproxy/ or http://myproxy:3128/
330  * A NULL URL cleans up proxy informations.
331  */
332 
333 void
xmlNanoHTTPScanProxy(const char * URL)334 xmlNanoHTTPScanProxy(const char *URL) {
335     xmlURIPtr uri;
336 
337     if (proxy != NULL) {
338         xmlFree(proxy);
339 	proxy = NULL;
340     }
341     proxyPort = 0;
342 
343 #ifdef DEBUG_HTTP
344     if (URL == NULL)
345 	xmlGenericError(xmlGenericErrorContext,
346 		"Removing HTTP proxy info\n");
347     else
348 	xmlGenericError(xmlGenericErrorContext,
349 		"Using HTTP proxy %s\n", URL);
350 #endif
351     if (URL == NULL) return;
352 
353     uri = xmlParseURIRaw(URL, 1);
354     if ((uri == NULL) || (uri->scheme == NULL) ||
355 	(strcmp(uri->scheme, "http")) || (uri->server == NULL)) {
356 	__xmlIOErr(XML_FROM_HTTP, XML_HTTP_URL_SYNTAX, "Syntax Error\n");
357 	if (uri != NULL)
358 	    xmlFreeURI(uri);
359 	return;
360     }
361 
362     proxy = xmlMemStrdup(uri->server);
363     if (uri->port != 0)
364 	proxyPort = uri->port;
365 
366     xmlFreeURI(uri);
367 }
368 
369 /**
370  * xmlNanoHTTPNewCtxt:
371  * @URL:  The URL used to initialize the context
372  *
373  * Allocate and initialize a new HTTP context.
374  *
375  * Returns an HTTP context or NULL in case of error.
376  */
377 
378 static xmlNanoHTTPCtxtPtr
xmlNanoHTTPNewCtxt(const char * URL)379 xmlNanoHTTPNewCtxt(const char *URL) {
380     xmlNanoHTTPCtxtPtr ret;
381 
382     ret = (xmlNanoHTTPCtxtPtr) xmlMalloc(sizeof(xmlNanoHTTPCtxt));
383     if (ret == NULL) {
384         xmlHTTPErrMemory("allocating context");
385         return(NULL);
386     }
387 
388     memset(ret, 0, sizeof(xmlNanoHTTPCtxt));
389     ret->port = 80;
390     ret->returnValue = 0;
391     ret->fd = -1;
392     ret->ContentLength = -1;
393 
394     xmlNanoHTTPScanURL(ret, URL);
395 
396     return(ret);
397 }
398 
399 /**
400  * xmlNanoHTTPFreeCtxt:
401  * @ctxt:  an HTTP context
402  *
403  * Frees the context after closing the connection.
404  */
405 
406 static void
xmlNanoHTTPFreeCtxt(xmlNanoHTTPCtxtPtr ctxt)407 xmlNanoHTTPFreeCtxt(xmlNanoHTTPCtxtPtr ctxt) {
408     if (ctxt == NULL) return;
409     if (ctxt->hostname != NULL) xmlFree(ctxt->hostname);
410     if (ctxt->protocol != NULL) xmlFree(ctxt->protocol);
411     if (ctxt->path != NULL) xmlFree(ctxt->path);
412     if (ctxt->query != NULL) xmlFree(ctxt->query);
413     if (ctxt->out != NULL) xmlFree(ctxt->out);
414     if (ctxt->in != NULL) xmlFree(ctxt->in);
415     if (ctxt->contentType != NULL) xmlFree(ctxt->contentType);
416     if (ctxt->encoding != NULL) xmlFree(ctxt->encoding);
417     if (ctxt->mimeType != NULL) xmlFree(ctxt->mimeType);
418     if (ctxt->location != NULL) xmlFree(ctxt->location);
419     if (ctxt->authHeader != NULL) xmlFree(ctxt->authHeader);
420 #ifdef HAVE_ZLIB_H
421     if (ctxt->strm != NULL) {
422 	inflateEnd(ctxt->strm);
423 	xmlFree(ctxt->strm);
424     }
425 #endif
426 
427     ctxt->state = XML_NANO_HTTP_NONE;
428     if (ctxt->fd >= 0) closesocket(ctxt->fd);
429     ctxt->fd = -1;
430     xmlFree(ctxt);
431 }
432 
433 /**
434  * xmlNanoHTTPSend:
435  * @ctxt:  an HTTP context
436  *
437  * Send the input needed to initiate the processing on the server side
438  * Returns number of bytes sent or -1 on error.
439  */
440 
441 static int
xmlNanoHTTPSend(xmlNanoHTTPCtxtPtr ctxt,const char * xmt_ptr,int outlen)442 xmlNanoHTTPSend(xmlNanoHTTPCtxtPtr ctxt, const char * xmt_ptr, int outlen) {
443 
444     int 	total_sent = 0;
445 
446     if ( (ctxt->state & XML_NANO_HTTP_WRITE) && (xmt_ptr != NULL ) ) {
447         while (total_sent < outlen) {
448             int nsent = send(ctxt->fd, xmt_ptr + total_sent,
449                                       outlen - total_sent, 0);
450             if (nsent>0)
451                 total_sent += nsent;
452 	    else if ( ( nsent == -1 ) &&
453 #if defined(EAGAIN) && EAGAIN != EWOULDBLOCK
454 	    	      ( socket_errno( ) != EAGAIN ) &&
455 #endif
456 		        ( socket_errno( ) != EWOULDBLOCK ) ) {
457 		__xmlIOErr(XML_FROM_HTTP, 0, "send failed\n");
458 		if ( total_sent == 0 )
459 		    total_sent = -1;
460 		break;
461 	    }
462 	    else {
463 	        /*
464 		**  No data sent
465 		**  Since non-blocking sockets are used, wait for
466 		**  socket to be writable or default timeout prior
467 		**  to retrying.
468 		*/
469 
470 		struct timeval	tv;
471 		fd_set		wfd;
472 
473 		tv.tv_sec = timeout;
474 		tv.tv_usec = 0;
475 		FD_ZERO( &wfd );
476 #ifdef _MSC_VER
477 #pragma warning(push)
478 #pragma warning(disable: 4018)
479 #endif
480 		FD_SET( ctxt->fd, &wfd );
481 #ifdef _MSC_VER
482 #pragma warning(pop)
483 #endif
484 		(void)select( ctxt->fd + 1, NULL, &wfd, NULL, &tv );
485 	    }
486 	}
487     }
488 
489     return total_sent;
490 }
491 
492 /**
493  * xmlNanoHTTPRecv:
494  * @ctxt:  an HTTP context
495  *
496  * Read information coming from the HTTP connection.
497  * This is a blocking call (but it blocks in select(), not read()).
498  *
499  * Returns the number of byte read or -1 in case of error.
500  */
501 
502 static int
xmlNanoHTTPRecv(xmlNanoHTTPCtxtPtr ctxt)503 xmlNanoHTTPRecv(xmlNanoHTTPCtxtPtr ctxt) {
504     fd_set rfd;
505     struct timeval tv;
506 
507 
508     while (ctxt->state & XML_NANO_HTTP_READ) {
509 	if (ctxt->in == NULL) {
510 	    ctxt->in = (char *) xmlMallocAtomic(65000 * sizeof(char));
511 	    if (ctxt->in == NULL) {
512 		xmlHTTPErrMemory("allocating input");
513 	        ctxt->last = -1;
514 		return(-1);
515 	    }
516 	    ctxt->inlen = 65000;
517 	    ctxt->inptr = ctxt->content = ctxt->inrptr = ctxt->in;
518 	}
519 	if (ctxt->inrptr > ctxt->in + XML_NANO_HTTP_CHUNK) {
520 	    int delta = ctxt->inrptr - ctxt->in;
521 	    int len = ctxt->inptr - ctxt->inrptr;
522 
523 	    memmove(ctxt->in, ctxt->inrptr, len);
524 	    ctxt->inrptr -= delta;
525 	    ctxt->content -= delta;
526 	    ctxt->inptr -= delta;
527 	}
528         if ((ctxt->in + ctxt->inlen) < (ctxt->inptr + XML_NANO_HTTP_CHUNK)) {
529 	    int d_inptr = ctxt->inptr - ctxt->in;
530 	    int d_content = ctxt->content - ctxt->in;
531 	    int d_inrptr = ctxt->inrptr - ctxt->in;
532 	    char *	tmp_ptr = ctxt->in;
533 
534 	    ctxt->inlen *= 2;
535             ctxt->in = (char *) xmlRealloc(tmp_ptr, ctxt->inlen);
536 	    if (ctxt->in == NULL) {
537 		xmlHTTPErrMemory("allocating input buffer");
538 		xmlFree( tmp_ptr );
539 	        ctxt->last = -1;
540 		return(-1);
541 	    }
542             ctxt->inptr = ctxt->in + d_inptr;
543             ctxt->content = ctxt->in + d_content;
544             ctxt->inrptr = ctxt->in + d_inrptr;
545 	}
546 	ctxt->last = recv(ctxt->fd, ctxt->inptr, XML_NANO_HTTP_CHUNK, 0);
547 	if (ctxt->last > 0) {
548 	    ctxt->inptr += ctxt->last;
549 	    return(ctxt->last);
550 	}
551 	if (ctxt->last == 0) {
552 	    return(0);
553 	}
554 	if (ctxt->last == -1) {
555 	    switch (socket_errno()) {
556 		case EINPROGRESS:
557 		case EWOULDBLOCK:
558 #if defined(EAGAIN) && EAGAIN != EWOULDBLOCK
559 		case EAGAIN:
560 #endif
561 		    break;
562 
563 		case ECONNRESET:
564 		case ESHUTDOWN:
565 		    return ( 0 );
566 
567 		default:
568 		    __xmlIOErr(XML_FROM_HTTP, 0, "recv failed\n");
569 		    return(-1);
570 	    }
571 	}
572 
573 	tv.tv_sec = timeout;
574 	tv.tv_usec = 0;
575 	FD_ZERO(&rfd);
576 #ifdef _MSC_VER
577 #pragma warning(push)
578 #pragma warning(disable: 4018)
579 #endif
580 	FD_SET(ctxt->fd, &rfd);
581 #ifdef _MSC_VER
582 #pragma warning(pop)
583 #endif
584 
585 	if ( (select(ctxt->fd+1, &rfd, NULL, NULL, &tv)<1)
586 #if defined(EINTR)
587 		&& (errno != EINTR)
588 #endif
589 	)
590 		return(0);
591     }
592     return(0);
593 }
594 
595 /**
596  * xmlNanoHTTPReadLine:
597  * @ctxt:  an HTTP context
598  *
599  * Read one line in the HTTP server output, usually for extracting
600  * the HTTP protocol informations from the answer header.
601  *
602  * Returns a newly allocated string with a copy of the line, or NULL
603  *         which indicate the end of the input.
604  */
605 
606 static char *
xmlNanoHTTPReadLine(xmlNanoHTTPCtxtPtr ctxt)607 xmlNanoHTTPReadLine(xmlNanoHTTPCtxtPtr ctxt) {
608     char buf[4096];
609     char *bp = buf;
610     int	rc;
611 
612     while (bp - buf < 4095) {
613 	if (ctxt->inrptr == ctxt->inptr) {
614 	    if ( (rc = xmlNanoHTTPRecv(ctxt)) == 0) {
615 		if (bp == buf)
616 		    return(NULL);
617 		else
618 		    *bp = 0;
619 		return(xmlMemStrdup(buf));
620 	    }
621 	    else if ( rc == -1 ) {
622 	        return ( NULL );
623 	    }
624 	}
625 	*bp = *ctxt->inrptr++;
626 	if (*bp == '\n') {
627 	    *bp = 0;
628 	    return(xmlMemStrdup(buf));
629 	}
630 	if (*bp != '\r')
631 	    bp++;
632     }
633     buf[4095] = 0;
634     return(xmlMemStrdup(buf));
635 }
636 
637 
638 /**
639  * xmlNanoHTTPScanAnswer:
640  * @ctxt:  an HTTP context
641  * @line:  an HTTP header line
642  *
643  * Try to extract useful informations from the server answer.
644  * We currently parse and process:
645  *  - The HTTP revision/ return code
646  *  - The Content-Type, Mime-Type and charset used
647  *  - The Location for redirect processing.
648  *
649  * Returns -1 in case of failure, the file descriptor number otherwise
650  */
651 
652 static void
xmlNanoHTTPScanAnswer(xmlNanoHTTPCtxtPtr ctxt,const char * line)653 xmlNanoHTTPScanAnswer(xmlNanoHTTPCtxtPtr ctxt, const char *line) {
654     const char *cur = line;
655 
656     if (line == NULL) return;
657 
658     if (!strncmp(line, "HTTP/", 5)) {
659         int version = 0;
660 	int ret = 0;
661 
662 	cur += 5;
663 	while ((*cur >= '0') && (*cur <= '9')) {
664 	    version *= 10;
665 	    version += *cur - '0';
666 	    cur++;
667 	}
668 	if (*cur == '.') {
669 	    cur++;
670 	    if ((*cur >= '0') && (*cur <= '9')) {
671 		version *= 10;
672 		version += *cur - '0';
673 		cur++;
674 	    }
675 	    while ((*cur >= '0') && (*cur <= '9'))
676 		cur++;
677 	} else
678 	    version *= 10;
679 	if ((*cur != ' ') && (*cur != '\t')) return;
680 	while ((*cur == ' ') || (*cur == '\t')) cur++;
681 	if ((*cur < '0') || (*cur > '9')) return;
682 	while ((*cur >= '0') && (*cur <= '9')) {
683 	    ret *= 10;
684 	    ret += *cur - '0';
685 	    cur++;
686 	}
687 	if ((*cur != 0) && (*cur != ' ') && (*cur != '\t')) return;
688 	ctxt->returnValue = ret;
689     } else if (!xmlStrncasecmp(BAD_CAST line, BAD_CAST"Content-Type:", 13)) {
690         const xmlChar *charset, *last, *mime;
691         cur += 13;
692 	while ((*cur == ' ') || (*cur == '\t')) cur++;
693 	if (ctxt->contentType != NULL)
694 	    xmlFree(ctxt->contentType);
695 	ctxt->contentType = xmlMemStrdup(cur);
696 	mime = (const xmlChar *) cur;
697 	last = mime;
698 	while ((*last != 0) && (*last != ' ') && (*last != '\t') &&
699 	       (*last != ';') && (*last != ','))
700 	    last++;
701 	if (ctxt->mimeType != NULL)
702 	    xmlFree(ctxt->mimeType);
703 	ctxt->mimeType = (char *) xmlStrndup(mime, last - mime);
704 	charset = xmlStrstr(BAD_CAST ctxt->contentType, BAD_CAST "charset=");
705 	if (charset != NULL) {
706 	    charset += 8;
707 	    last = charset;
708 	    while ((*last != 0) && (*last != ' ') && (*last != '\t') &&
709 	           (*last != ';') && (*last != ','))
710 		last++;
711 	    if (ctxt->encoding != NULL)
712 	        xmlFree(ctxt->encoding);
713 	    ctxt->encoding = (char *) xmlStrndup(charset, last - charset);
714 	}
715     } else if (!xmlStrncasecmp(BAD_CAST line, BAD_CAST"ContentType:", 12)) {
716         const xmlChar *charset, *last, *mime;
717         cur += 12;
718 	if (ctxt->contentType != NULL) return;
719 	while ((*cur == ' ') || (*cur == '\t')) cur++;
720 	ctxt->contentType = xmlMemStrdup(cur);
721 	mime = (const xmlChar *) cur;
722 	last = mime;
723 	while ((*last != 0) && (*last != ' ') && (*last != '\t') &&
724 	       (*last != ';') && (*last != ','))
725 	    last++;
726 	if (ctxt->mimeType != NULL)
727 	    xmlFree(ctxt->mimeType);
728 	ctxt->mimeType = (char *) xmlStrndup(mime, last - mime);
729 	charset = xmlStrstr(BAD_CAST ctxt->contentType, BAD_CAST "charset=");
730 	if (charset != NULL) {
731 	    charset += 8;
732 	    last = charset;
733 	    while ((*last != 0) && (*last != ' ') && (*last != '\t') &&
734 	           (*last != ';') && (*last != ','))
735 		last++;
736 	    if (ctxt->encoding != NULL)
737 	        xmlFree(ctxt->encoding);
738 	    ctxt->encoding = (char *) xmlStrndup(charset, last - charset);
739 	}
740     } else if (!xmlStrncasecmp(BAD_CAST line, BAD_CAST"Location:", 9)) {
741         cur += 9;
742 	while ((*cur == ' ') || (*cur == '\t')) cur++;
743 	if (ctxt->location != NULL)
744 	    xmlFree(ctxt->location);
745 	if (*cur == '/') {
746 	    xmlChar *tmp_http = xmlStrdup(BAD_CAST "http://");
747 	    xmlChar *tmp_loc =
748 	        xmlStrcat(tmp_http, (const xmlChar *) ctxt->hostname);
749 	    ctxt->location =
750 	        (char *) xmlStrcat (tmp_loc, (const xmlChar *) cur);
751 	} else {
752 	    ctxt->location = xmlMemStrdup(cur);
753 	}
754     } else if (!xmlStrncasecmp(BAD_CAST line, BAD_CAST"WWW-Authenticate:", 17)) {
755         cur += 17;
756 	while ((*cur == ' ') || (*cur == '\t')) cur++;
757 	if (ctxt->authHeader != NULL)
758 	    xmlFree(ctxt->authHeader);
759 	ctxt->authHeader = xmlMemStrdup(cur);
760     } else if (!xmlStrncasecmp(BAD_CAST line, BAD_CAST"Proxy-Authenticate:", 19)) {
761         cur += 19;
762 	while ((*cur == ' ') || (*cur == '\t')) cur++;
763 	if (ctxt->authHeader != NULL)
764 	    xmlFree(ctxt->authHeader);
765 	ctxt->authHeader = xmlMemStrdup(cur);
766 #ifdef HAVE_ZLIB_H
767     } else if ( !xmlStrncasecmp( BAD_CAST line, BAD_CAST"Content-Encoding:", 17) ) {
768 	cur += 17;
769 	while ((*cur == ' ') || (*cur == '\t')) cur++;
770 	if ( !xmlStrncasecmp( BAD_CAST cur, BAD_CAST"gzip", 4) ) {
771 	    ctxt->usesGzip = 1;
772 
773 	    ctxt->strm = xmlMalloc(sizeof(z_stream));
774 
775 	    if (ctxt->strm != NULL) {
776 		ctxt->strm->zalloc = Z_NULL;
777 		ctxt->strm->zfree = Z_NULL;
778 		ctxt->strm->opaque = Z_NULL;
779 		ctxt->strm->avail_in = 0;
780 		ctxt->strm->next_in = Z_NULL;
781 
782 		inflateInit2( ctxt->strm, 31 );
783 	    }
784 	}
785 #endif
786     } else if ( !xmlStrncasecmp( BAD_CAST line, BAD_CAST"Content-Length:", 15) ) {
787 	cur += 15;
788 	ctxt->ContentLength = strtol( cur, NULL, 10 );
789     }
790 }
791 
792 /**
793  * xmlNanoHTTPConnectAttempt:
794  * @addr:  a socket address structure
795  *
796  * Attempt a connection to the given IP:port endpoint. It forces
797  * non-blocking semantic on the socket, and allow 60 seconds for
798  * the host to answer.
799  *
800  * Returns -1 in case of failure, the file descriptor number otherwise
801  */
802 
803 static int
xmlNanoHTTPConnectAttempt(struct sockaddr * addr)804 xmlNanoHTTPConnectAttempt(struct sockaddr *addr)
805 {
806     fd_set wfd;
807 #ifdef _WINSOCKAPI_
808     fd_set xfd;
809 #endif
810     struct timeval tv;
811     int status;
812     int addrlen;
813     SOCKET s;
814 
815 #ifdef SUPPORT_IP6
816     if (addr->sa_family == AF_INET6) {
817 	s = socket (PF_INET6, SOCK_STREAM, IPPROTO_TCP);
818 	addrlen = sizeof (struct sockaddr_in6);
819     }
820     else
821 #endif
822     {
823 	s = socket (PF_INET, SOCK_STREAM, IPPROTO_TCP);
824 	addrlen = sizeof (struct sockaddr_in);
825     }
826     if (s==-1) {
827 #ifdef DEBUG_HTTP
828 	perror("socket");
829 #endif
830 	__xmlIOErr(XML_FROM_HTTP, 0, "socket failed\n");
831 	return(-1);
832     }
833 
834 #ifdef _WINSOCKAPI_
835     {
836 	u_long one = 1;
837 
838 	status = ioctlsocket(s, FIONBIO, &one) == SOCKET_ERROR ? -1 : 0;
839     }
840 #else /* _WINSOCKAPI_ */
841 #if defined(VMS)
842     {
843 	int enable = 1;
844 	status = ioctl(s, FIONBIO, &enable);
845     }
846 #else /* VMS */
847 #if defined(__BEOS__)
848 	{
849 		bool noblock = true;
850 		status = setsockopt(s, SOL_SOCKET, SO_NONBLOCK, &noblock, sizeof(noblock));
851 	}
852 #else /* __BEOS__ */
853     if ((status = fcntl(s, F_GETFL, 0)) != -1) {
854 #ifdef O_NONBLOCK
855 	status |= O_NONBLOCK;
856 #else /* O_NONBLOCK */
857 #ifdef F_NDELAY
858 	status |= F_NDELAY;
859 #endif /* F_NDELAY */
860 #endif /* !O_NONBLOCK */
861 	status = fcntl(s, F_SETFL, status);
862     }
863     if (status < 0) {
864 #ifdef DEBUG_HTTP
865 	perror("nonblocking");
866 #endif
867 	__xmlIOErr(XML_FROM_HTTP, 0, "error setting non-blocking IO\n");
868 	closesocket(s);
869 	return(-1);
870     }
871 #endif /* !__BEOS__ */
872 #endif /* !VMS */
873 #endif /* !_WINSOCKAPI_ */
874 
875     if (connect (s, addr, addrlen) == -1) {
876 	switch (socket_errno()) {
877 	    case EINPROGRESS:
878 	    case EWOULDBLOCK:
879 		break;
880 	    default:
881 		__xmlIOErr(XML_FROM_HTTP, 0, "error connecting to HTTP server");
882 		closesocket(s);
883 		return(-1);
884 	}
885     }
886 
887     tv.tv_sec = timeout;
888     tv.tv_usec = 0;
889 
890 #ifdef _MSC_VER
891 #pragma warning(push)
892 #pragma warning(disable: 4018)
893 #endif
894     FD_ZERO(&wfd);
895     FD_SET(s, &wfd);
896 
897 #ifdef _WINSOCKAPI_
898     FD_ZERO(&xfd);
899     FD_SET(s, &xfd);
900 
901     switch(select(s+1, NULL, &wfd, &xfd, &tv))
902 #else
903     switch(select(s+1, NULL, &wfd, NULL, &tv))
904 #endif
905 #ifdef _MSC_VER
906 #pragma warning(pop)
907 #endif
908     {
909 	case 0:
910 	    /* Time out */
911 	    __xmlIOErr(XML_FROM_HTTP, 0, "Connect attempt timed out");
912 	    closesocket(s);
913 	    return(-1);
914 	case -1:
915 	    /* Ermm.. ?? */
916 	    __xmlIOErr(XML_FROM_HTTP, 0, "Connect failed");
917 	    closesocket(s);
918 	    return(-1);
919     }
920 
921     if ( FD_ISSET(s, &wfd)
922 #ifdef _WINSOCKAPI_
923                            || FD_ISSET(s, &xfd)
924 #endif
925                                                 ) {
926 	XML_SOCKLEN_T len;
927 	len = sizeof(status);
928 #ifdef SO_ERROR
929 	if (getsockopt(s, SOL_SOCKET, SO_ERROR, (char*)&status, &len) < 0 ) {
930 	    /* Solaris error code */
931 	    __xmlIOErr(XML_FROM_HTTP, 0, "getsockopt failed\n");
932 	    return (-1);
933 	}
934 #endif
935 	if ( status ) {
936 	    __xmlIOErr(XML_FROM_HTTP, 0, "Error connecting to remote host");
937 	    closesocket(s);
938 	    errno = status;
939 	    return (-1);
940 	}
941     } else {
942 	/* pbm */
943 	__xmlIOErr(XML_FROM_HTTP, 0, "select failed\n");
944 	closesocket(s);
945 	return (-1);
946     }
947 
948     return(s);
949 }
950 
951 /**
952  * xmlNanoHTTPConnectHost:
953  * @host:  the host name
954  * @port:  the port number
955  *
956  * Attempt a connection to the given host:port endpoint. It tries
957  * the multiple IP provided by the DNS if available.
958  *
959  * Returns -1 in case of failure, the file descriptor number otherwise
960  */
961 
962 static int
xmlNanoHTTPConnectHost(const char * host,int port)963 xmlNanoHTTPConnectHost(const char *host, int port)
964 {
965     struct hostent *h;
966     struct sockaddr *addr = NULL;
967     struct in_addr ia;
968     struct sockaddr_in sockin;
969 
970 #ifdef SUPPORT_IP6
971     struct in6_addr ia6;
972     struct sockaddr_in6 sockin6;
973 #endif
974     int i;
975     int s;
976 
977     memset (&sockin, 0, sizeof(sockin));
978 #ifdef SUPPORT_IP6
979     memset (&sockin6, 0, sizeof(sockin6));
980 #endif
981 
982 #if !defined(HAVE_GETADDRINFO) && defined(SUPPORT_IP6) && defined(RES_USE_INET6)
983     if (have_ipv6 ())
984     {
985 	if (!(_res.options & RES_INIT))
986 	    res_init();
987 	_res.options |= RES_USE_INET6;
988     }
989 #endif
990 
991 #if defined(HAVE_GETADDRINFO) && defined(SUPPORT_IP6) && !defined(_WIN32)
992     if (have_ipv6 ())
993 #endif
994 #if defined(HAVE_GETADDRINFO) && (defined(SUPPORT_IP6) || defined(_WIN32))
995     {
996 	int status;
997 	struct addrinfo hints, *res, *result;
998 
999 	result = NULL;
1000 	memset (&hints, 0,sizeof(hints));
1001 	hints.ai_socktype = SOCK_STREAM;
1002 
1003 	status = getaddrinfo (host, NULL, &hints, &result);
1004 	if (status) {
1005 	    __xmlIOErr(XML_FROM_HTTP, 0, "getaddrinfo failed\n");
1006 	    return (-1);
1007 	}
1008 
1009 	for (res = result; res; res = res->ai_next) {
1010 	    if (res->ai_family == AF_INET) {
1011 		if (res->ai_addrlen > sizeof(sockin)) {
1012 		    __xmlIOErr(XML_FROM_HTTP, 0, "address size mismatch\n");
1013 		    freeaddrinfo (result);
1014 		    return (-1);
1015 		}
1016 		memcpy (&sockin, res->ai_addr, res->ai_addrlen);
1017 		sockin.sin_port = htons (port);
1018 		addr = (struct sockaddr *)&sockin;
1019 #ifdef SUPPORT_IP6
1020 	    } else if (have_ipv6 () && (res->ai_family == AF_INET6)) {
1021 		if (res->ai_addrlen > sizeof(sockin6)) {
1022 		    __xmlIOErr(XML_FROM_HTTP, 0, "address size mismatch\n");
1023 		    freeaddrinfo (result);
1024 		    return (-1);
1025 		}
1026 		memcpy (&sockin6, res->ai_addr, res->ai_addrlen);
1027 		sockin6.sin6_port = htons (port);
1028 		addr = (struct sockaddr *)&sockin6;
1029 #endif
1030 	    } else
1031 		continue;              /* for */
1032 
1033 	    s = xmlNanoHTTPConnectAttempt (addr);
1034 	    if (s != -1) {
1035 		freeaddrinfo (result);
1036 		return (s);
1037 	    }
1038 	}
1039 
1040 	if (result)
1041 	    freeaddrinfo (result);
1042     }
1043 #endif
1044 #if defined(HAVE_GETADDRINFO) && defined(SUPPORT_IP6) && !defined(_WIN32)
1045     else
1046 #endif
1047 #if !defined(HAVE_GETADDRINFO) || !defined(_WIN32)
1048     {
1049 	h = gethostbyname (host);
1050 	if (h == NULL) {
1051 
1052 /*
1053  * Okay, I got fed up by the non-portability of this error message
1054  * extraction code. it work on Linux, if it work on your platform
1055  * and one want to enable it, send me the defined(foobar) needed
1056  */
1057 #if defined(HAVE_NETDB_H) && defined(HOST_NOT_FOUND) && defined(linux)
1058 	    const char *h_err_txt = "";
1059 
1060 	    switch (h_errno) {
1061 		case HOST_NOT_FOUND:
1062 		    h_err_txt = "Authoritive host not found";
1063 		    break;
1064 
1065 		case TRY_AGAIN:
1066 		    h_err_txt =
1067 			"Non-authoritive host not found or server failure.";
1068 		    break;
1069 
1070 		case NO_RECOVERY:
1071 		    h_err_txt =
1072 			"Non-recoverable errors:  FORMERR, REFUSED, or NOTIMP.";
1073 		    break;
1074 
1075 		case NO_ADDRESS:
1076 		    h_err_txt =
1077 			"Valid name, no data record of requested type.";
1078 		    break;
1079 
1080 		default:
1081 		    h_err_txt = "No error text defined.";
1082 		    break;
1083 	    }
1084 	    __xmlIOErr(XML_FROM_HTTP, 0, h_err_txt);
1085 #else
1086 	    __xmlIOErr(XML_FROM_HTTP, 0, "Failed to resolve host");
1087 #endif
1088 	    return (-1);
1089 	}
1090 
1091 	for (i = 0; h->h_addr_list[i]; i++) {
1092 	    if (h->h_addrtype == AF_INET) {
1093 		/* A records (IPv4) */
1094 		if ((unsigned int) h->h_length > sizeof(ia)) {
1095 		    __xmlIOErr(XML_FROM_HTTP, 0, "address size mismatch\n");
1096 		    return (-1);
1097 		}
1098 		memcpy (&ia, h->h_addr_list[i], h->h_length);
1099 		sockin.sin_family = h->h_addrtype;
1100 		sockin.sin_addr = ia;
1101 		sockin.sin_port = (u_short)htons ((unsigned short)port);
1102 		addr = (struct sockaddr *) &sockin;
1103 #ifdef SUPPORT_IP6
1104 	    } else if (have_ipv6 () && (h->h_addrtype == AF_INET6)) {
1105 		/* AAAA records (IPv6) */
1106 		if ((unsigned int) h->h_length > sizeof(ia6)) {
1107 		    __xmlIOErr(XML_FROM_HTTP, 0, "address size mismatch\n");
1108 		    return (-1);
1109 		}
1110 		memcpy (&ia6, h->h_addr_list[i], h->h_length);
1111 		sockin6.sin6_family = h->h_addrtype;
1112 		sockin6.sin6_addr = ia6;
1113 		sockin6.sin6_port = htons (port);
1114 		addr = (struct sockaddr *) &sockin6;
1115 #endif
1116 	    } else
1117 		break;              /* for */
1118 
1119 	    s = xmlNanoHTTPConnectAttempt (addr);
1120 	    if (s != -1)
1121 		return (s);
1122 	}
1123     }
1124 #endif
1125 
1126 #ifdef DEBUG_HTTP
1127     xmlGenericError(xmlGenericErrorContext,
1128                     "xmlNanoHTTPConnectHost:  unable to connect to '%s'.\n",
1129                     host);
1130 #endif
1131     return (-1);
1132 }
1133 
1134 
1135 /**
1136  * xmlNanoHTTPOpen:
1137  * @URL:  The URL to load
1138  * @contentType:  if available the Content-Type information will be
1139  *                returned at that location
1140  *
1141  * This function try to open a connection to the indicated resource
1142  * via HTTP GET.
1143  *
1144  * Returns NULL in case of failure, otherwise a request handler.
1145  *     The contentType, if provided must be freed by the caller
1146  */
1147 
1148 void*
xmlNanoHTTPOpen(const char * URL,char ** contentType)1149 xmlNanoHTTPOpen(const char *URL, char **contentType) {
1150     if (contentType != NULL) *contentType = NULL;
1151     return(xmlNanoHTTPMethod(URL, NULL, NULL, contentType, NULL, 0));
1152 }
1153 
1154 /**
1155  * xmlNanoHTTPOpenRedir:
1156  * @URL:  The URL to load
1157  * @contentType:  if available the Content-Type information will be
1158  *                returned at that location
1159  * @redir: if available the redirected URL will be returned
1160  *
1161  * This function try to open a connection to the indicated resource
1162  * via HTTP GET.
1163  *
1164  * Returns NULL in case of failure, otherwise a request handler.
1165  *     The contentType, if provided must be freed by the caller
1166  */
1167 
1168 void*
xmlNanoHTTPOpenRedir(const char * URL,char ** contentType,char ** redir)1169 xmlNanoHTTPOpenRedir(const char *URL, char **contentType, char **redir) {
1170     if (contentType != NULL) *contentType = NULL;
1171     if (redir != NULL) *redir = NULL;
1172     return(xmlNanoHTTPMethodRedir(URL, NULL, NULL, contentType, redir, NULL,0));
1173 }
1174 
1175 /**
1176  * xmlNanoHTTPRead:
1177  * @ctx:  the HTTP context
1178  * @dest:  a buffer
1179  * @len:  the buffer length
1180  *
1181  * This function tries to read @len bytes from the existing HTTP connection
1182  * and saves them in @dest. This is a blocking call.
1183  *
1184  * Returns the number of byte read. 0 is an indication of an end of connection.
1185  *         -1 indicates a parameter error.
1186  */
1187 int
xmlNanoHTTPRead(void * ctx,void * dest,int len)1188 xmlNanoHTTPRead(void *ctx, void *dest, int len) {
1189     xmlNanoHTTPCtxtPtr ctxt = (xmlNanoHTTPCtxtPtr) ctx;
1190 #ifdef HAVE_ZLIB_H
1191     int bytes_read = 0;
1192     int orig_avail_in;
1193     int z_ret;
1194 #endif
1195 
1196     if (ctx == NULL) return(-1);
1197     if (dest == NULL) return(-1);
1198     if (len <= 0) return(0);
1199 
1200 #ifdef HAVE_ZLIB_H
1201     if (ctxt->usesGzip == 1) {
1202         if (ctxt->strm == NULL) return(0);
1203 
1204         ctxt->strm->next_out = dest;
1205         ctxt->strm->avail_out = len;
1206 	ctxt->strm->avail_in = ctxt->inptr - ctxt->inrptr;
1207 
1208         while (ctxt->strm->avail_out > 0 &&
1209 	       (ctxt->strm->avail_in > 0 || xmlNanoHTTPRecv(ctxt) > 0)) {
1210             orig_avail_in = ctxt->strm->avail_in =
1211 			    ctxt->inptr - ctxt->inrptr - bytes_read;
1212             ctxt->strm->next_in = BAD_CAST (ctxt->inrptr + bytes_read);
1213 
1214             z_ret = inflate(ctxt->strm, Z_NO_FLUSH);
1215             bytes_read += orig_avail_in - ctxt->strm->avail_in;
1216 
1217             if (z_ret != Z_OK) break;
1218 	}
1219 
1220         ctxt->inrptr += bytes_read;
1221         return(len - ctxt->strm->avail_out);
1222     }
1223 #endif
1224 
1225     while (ctxt->inptr - ctxt->inrptr < len) {
1226         if (xmlNanoHTTPRecv(ctxt) <= 0) break;
1227     }
1228     if (ctxt->inptr - ctxt->inrptr < len)
1229         len = ctxt->inptr - ctxt->inrptr;
1230     memcpy(dest, ctxt->inrptr, len);
1231     ctxt->inrptr += len;
1232     return(len);
1233 }
1234 
1235 /**
1236  * xmlNanoHTTPClose:
1237  * @ctx:  the HTTP context
1238  *
1239  * This function closes an HTTP context, it ends up the connection and
1240  * free all data related to it.
1241  */
1242 void
xmlNanoHTTPClose(void * ctx)1243 xmlNanoHTTPClose(void *ctx) {
1244     xmlNanoHTTPCtxtPtr ctxt = (xmlNanoHTTPCtxtPtr) ctx;
1245 
1246     if (ctx == NULL) return;
1247 
1248     xmlNanoHTTPFreeCtxt(ctxt);
1249 }
1250 
1251 /**
1252  * xmlNanoHTTPMethodRedir:
1253  * @URL:  The URL to load
1254  * @method:  the HTTP method to use
1255  * @input:  the input string if any
1256  * @contentType:  the Content-Type information IN and OUT
1257  * @redir:  the redirected URL OUT
1258  * @headers:  the extra headers
1259  * @ilen:  input length
1260  *
1261  * This function try to open a connection to the indicated resource
1262  * via HTTP using the given @method, adding the given extra headers
1263  * and the input buffer for the request content.
1264  *
1265  * Returns NULL in case of failure, otherwise a request handler.
1266  *     The contentType, or redir, if provided must be freed by the caller
1267  */
1268 
1269 void*
xmlNanoHTTPMethodRedir(const char * URL,const char * method,const char * input,char ** contentType,char ** redir,const char * headers,int ilen)1270 xmlNanoHTTPMethodRedir(const char *URL, const char *method, const char *input,
1271                   char **contentType, char **redir,
1272 		  const char *headers, int ilen ) {
1273     xmlNanoHTTPCtxtPtr ctxt;
1274     char *bp, *p;
1275     int blen, ret;
1276     int head;
1277     int nbRedirects = 0;
1278     char *redirURL = NULL;
1279 #ifdef DEBUG_HTTP
1280     int xmt_bytes;
1281 #endif
1282 
1283     if (URL == NULL) return(NULL);
1284     if (method == NULL) method = "GET";
1285     xmlNanoHTTPInit();
1286 
1287 retry:
1288     if (redirURL == NULL)
1289 	ctxt = xmlNanoHTTPNewCtxt(URL);
1290     else {
1291 	ctxt = xmlNanoHTTPNewCtxt(redirURL);
1292 	ctxt->location = xmlMemStrdup(redirURL);
1293     }
1294 
1295     if ( ctxt == NULL ) {
1296 	return ( NULL );
1297     }
1298 
1299     if ((ctxt->protocol == NULL) || (strcmp(ctxt->protocol, "http"))) {
1300 	__xmlIOErr(XML_FROM_HTTP, XML_HTTP_URL_SYNTAX, "Not a valid HTTP URI");
1301         xmlNanoHTTPFreeCtxt(ctxt);
1302 	if (redirURL != NULL) xmlFree(redirURL);
1303         return(NULL);
1304     }
1305     if (ctxt->hostname == NULL) {
1306 	__xmlIOErr(XML_FROM_HTTP, XML_HTTP_UNKNOWN_HOST,
1307 	           "Failed to identify host in URI");
1308         xmlNanoHTTPFreeCtxt(ctxt);
1309 	if (redirURL != NULL) xmlFree(redirURL);
1310         return(NULL);
1311     }
1312     if (proxy) {
1313 	blen = strlen(ctxt->hostname) * 2 + 16;
1314 	ret = xmlNanoHTTPConnectHost(proxy, proxyPort);
1315     }
1316     else {
1317 	blen = strlen(ctxt->hostname);
1318 	ret = xmlNanoHTTPConnectHost(ctxt->hostname, ctxt->port);
1319     }
1320     if (ret < 0) {
1321         xmlNanoHTTPFreeCtxt(ctxt);
1322 	if (redirURL != NULL) xmlFree(redirURL);
1323         return(NULL);
1324     }
1325     ctxt->fd = ret;
1326 
1327     if (input == NULL)
1328 	ilen = 0;
1329     else
1330 	blen += 36;
1331 
1332     if (headers != NULL)
1333 	blen += strlen(headers) + 2;
1334     if (contentType && *contentType)
1335 	/* reserve for string plus 'Content-Type: \r\n" */
1336 	blen += strlen(*contentType) + 16;
1337     if (ctxt->query != NULL)
1338 	/* 1 for '?' */
1339 	blen += strlen(ctxt->query) + 1;
1340     blen += strlen(method) + strlen(ctxt->path) + 24;
1341 #ifdef HAVE_ZLIB_H
1342     /* reserve for possible 'Accept-Encoding: gzip' string */
1343     blen += 23;
1344 #endif
1345     if (ctxt->port != 80) {
1346 	/* reserve space for ':xxxxx', incl. potential proxy */
1347 	if (proxy)
1348 	    blen += 12;
1349 	else
1350 	    blen += 6;
1351     }
1352     bp = (char*)xmlMallocAtomic(blen);
1353     if ( bp == NULL ) {
1354         xmlNanoHTTPFreeCtxt( ctxt );
1355 	xmlHTTPErrMemory("allocating header buffer");
1356 	return ( NULL );
1357     }
1358 
1359     p = bp;
1360 
1361     if (proxy) {
1362 	if (ctxt->port != 80) {
1363 	    p += snprintf( p, blen - (p - bp), "%s http://%s:%d%s",
1364 			method, ctxt->hostname,
1365 		 	ctxt->port, ctxt->path );
1366 	}
1367 	else
1368 	    p += snprintf( p, blen - (p - bp), "%s http://%s%s", method,
1369 	    		ctxt->hostname, ctxt->path);
1370     }
1371     else
1372 	p += snprintf( p, blen - (p - bp), "%s %s", method, ctxt->path);
1373 
1374     if (ctxt->query != NULL)
1375 	p += snprintf( p, blen - (p - bp), "?%s", ctxt->query);
1376 
1377     if (ctxt->port == 80) {
1378         p += snprintf( p, blen - (p - bp), " HTTP/1.0\r\nHost: %s\r\n",
1379 		    ctxt->hostname);
1380     } else {
1381         p += snprintf( p, blen - (p - bp), " HTTP/1.0\r\nHost: %s:%d\r\n",
1382 		    ctxt->hostname, ctxt->port);
1383     }
1384 
1385 #ifdef HAVE_ZLIB_H
1386     p += snprintf(p, blen - (p - bp), "Accept-Encoding: gzip\r\n");
1387 #endif
1388 
1389     if (contentType != NULL && *contentType)
1390 	p += snprintf(p, blen - (p - bp), "Content-Type: %s\r\n", *contentType);
1391 
1392     if (headers != NULL)
1393 	p += snprintf( p, blen - (p - bp), "%s", headers );
1394 
1395     if (input != NULL)
1396 	snprintf(p, blen - (p - bp), "Content-Length: %d\r\n\r\n", ilen );
1397     else
1398 	snprintf(p, blen - (p - bp), "\r\n");
1399 
1400 #ifdef DEBUG_HTTP
1401     xmlGenericError(xmlGenericErrorContext,
1402 	    "-> %s%s", proxy? "(Proxy) " : "", bp);
1403     if ((blen -= strlen(bp)+1) < 0)
1404 	xmlGenericError(xmlGenericErrorContext,
1405 		"ERROR: overflowed buffer by %d bytes\n", -blen);
1406 #endif
1407     ctxt->outptr = ctxt->out = bp;
1408     ctxt->state = XML_NANO_HTTP_WRITE;
1409     blen = strlen( ctxt->out );
1410 #ifdef DEBUG_HTTP
1411     xmt_bytes = xmlNanoHTTPSend(ctxt, ctxt->out, blen );
1412     if ( xmt_bytes != blen )
1413         xmlGenericError( xmlGenericErrorContext,
1414 			"xmlNanoHTTPMethodRedir:  Only %d of %d %s %s\n",
1415 			xmt_bytes, blen,
1416 			"bytes of HTTP headers sent to host",
1417 			ctxt->hostname );
1418 #else
1419     xmlNanoHTTPSend(ctxt, ctxt->out, blen );
1420 #endif
1421 
1422     if ( input != NULL ) {
1423 #ifdef DEBUG_HTTP
1424         xmt_bytes = xmlNanoHTTPSend( ctxt, input, ilen );
1425 
1426 	if ( xmt_bytes != ilen )
1427 	    xmlGenericError( xmlGenericErrorContext,
1428 	    		"xmlNanoHTTPMethodRedir:  Only %d of %d %s %s\n",
1429 			xmt_bytes, ilen,
1430 			"bytes of HTTP content sent to host",
1431 			ctxt->hostname );
1432 #else
1433 	xmlNanoHTTPSend( ctxt, input, ilen );
1434 #endif
1435     }
1436 
1437     ctxt->state = XML_NANO_HTTP_READ;
1438     head = 1;
1439 
1440     while ((p = xmlNanoHTTPReadLine(ctxt)) != NULL) {
1441         if (head && (*p == 0)) {
1442 	    head = 0;
1443 	    ctxt->content = ctxt->inrptr;
1444 	    xmlFree(p);
1445 	    break;
1446 	}
1447 	xmlNanoHTTPScanAnswer(ctxt, p);
1448 
1449 #ifdef DEBUG_HTTP
1450 	xmlGenericError(xmlGenericErrorContext, "<- %s\n", p);
1451 #endif
1452         xmlFree(p);
1453     }
1454 
1455     if ((ctxt->location != NULL) && (ctxt->returnValue >= 300) &&
1456         (ctxt->returnValue < 400)) {
1457 #ifdef DEBUG_HTTP
1458 	xmlGenericError(xmlGenericErrorContext,
1459 		"\nRedirect to: %s\n", ctxt->location);
1460 #endif
1461 	while ( xmlNanoHTTPRecv(ctxt) > 0 ) ;
1462         if (nbRedirects < XML_NANO_HTTP_MAX_REDIR) {
1463 	    nbRedirects++;
1464 	    if (redirURL != NULL)
1465 		xmlFree(redirURL);
1466 	    redirURL = xmlMemStrdup(ctxt->location);
1467 	    xmlNanoHTTPFreeCtxt(ctxt);
1468 	    goto retry;
1469 	}
1470 	xmlNanoHTTPFreeCtxt(ctxt);
1471 	if (redirURL != NULL) xmlFree(redirURL);
1472 #ifdef DEBUG_HTTP
1473 	xmlGenericError(xmlGenericErrorContext,
1474 		"xmlNanoHTTPMethodRedir: Too many redirects, aborting ...\n");
1475 #endif
1476 	return(NULL);
1477     }
1478 
1479     if (contentType != NULL) {
1480 	if (ctxt->contentType != NULL)
1481 	    *contentType = xmlMemStrdup(ctxt->contentType);
1482 	else
1483 	    *contentType = NULL;
1484     }
1485 
1486     if ((redir != NULL) && (redirURL != NULL)) {
1487 	*redir = redirURL;
1488     } else {
1489 	if (redirURL != NULL)
1490 	    xmlFree(redirURL);
1491 	if (redir != NULL)
1492 	    *redir = NULL;
1493     }
1494 
1495 #ifdef DEBUG_HTTP
1496     if (ctxt->contentType != NULL)
1497 	xmlGenericError(xmlGenericErrorContext,
1498 		"\nCode %d, content-type '%s'\n\n",
1499 	       ctxt->returnValue, ctxt->contentType);
1500     else
1501 	xmlGenericError(xmlGenericErrorContext,
1502 		"\nCode %d, no content-type\n\n",
1503 	       ctxt->returnValue);
1504 #endif
1505 
1506     return((void *) ctxt);
1507 }
1508 
1509 /**
1510  * xmlNanoHTTPMethod:
1511  * @URL:  The URL to load
1512  * @method:  the HTTP method to use
1513  * @input:  the input string if any
1514  * @contentType:  the Content-Type information IN and OUT
1515  * @headers:  the extra headers
1516  * @ilen:  input length
1517  *
1518  * This function try to open a connection to the indicated resource
1519  * via HTTP using the given @method, adding the given extra headers
1520  * and the input buffer for the request content.
1521  *
1522  * Returns NULL in case of failure, otherwise a request handler.
1523  *     The contentType, if provided must be freed by the caller
1524  */
1525 
1526 void*
xmlNanoHTTPMethod(const char * URL,const char * method,const char * input,char ** contentType,const char * headers,int ilen)1527 xmlNanoHTTPMethod(const char *URL, const char *method, const char *input,
1528                   char **contentType, const char *headers, int ilen) {
1529     return(xmlNanoHTTPMethodRedir(URL, method, input, contentType,
1530 		                  NULL, headers, ilen));
1531 }
1532 
1533 /**
1534  * xmlNanoHTTPFetch:
1535  * @URL:  The URL to load
1536  * @filename:  the filename where the content should be saved
1537  * @contentType:  if available the Content-Type information will be
1538  *                returned at that location
1539  *
1540  * This function try to fetch the indicated resource via HTTP GET
1541  * and save it's content in the file.
1542  *
1543  * Returns -1 in case of failure, 0 incase of success. The contentType,
1544  *     if provided must be freed by the caller
1545  */
1546 int
xmlNanoHTTPFetch(const char * URL,const char * filename,char ** contentType)1547 xmlNanoHTTPFetch(const char *URL, const char *filename, char **contentType) {
1548     void *ctxt = NULL;
1549     char *buf = NULL;
1550     int fd;
1551     int len;
1552 
1553     if (filename == NULL) return(-1);
1554     ctxt = xmlNanoHTTPOpen(URL, contentType);
1555     if (ctxt == NULL) return(-1);
1556 
1557     if (!strcmp(filename, "-"))
1558         fd = 0;
1559     else {
1560         fd = open(filename, O_CREAT | O_WRONLY, 00644);
1561 	if (fd < 0) {
1562 	    xmlNanoHTTPClose(ctxt);
1563 	    if ((contentType != NULL) && (*contentType != NULL)) {
1564 	        xmlFree(*contentType);
1565 		*contentType = NULL;
1566 	    }
1567 	    return(-1);
1568 	}
1569     }
1570 
1571     xmlNanoHTTPFetchContent( ctxt, &buf, &len );
1572     if ( len > 0 ) {
1573 	write(fd, buf, len);
1574     }
1575 
1576     xmlNanoHTTPClose(ctxt);
1577     close(fd);
1578     return(0);
1579 }
1580 
1581 #ifdef LIBXML_OUTPUT_ENABLED
1582 /**
1583  * xmlNanoHTTPSave:
1584  * @ctxt:  the HTTP context
1585  * @filename:  the filename where the content should be saved
1586  *
1587  * This function saves the output of the HTTP transaction to a file
1588  * It closes and free the context at the end
1589  *
1590  * Returns -1 in case of failure, 0 incase of success.
1591  */
1592 int
xmlNanoHTTPSave(void * ctxt,const char * filename)1593 xmlNanoHTTPSave(void *ctxt, const char *filename) {
1594     char *buf = NULL;
1595     int fd;
1596     int len;
1597 
1598     if ((ctxt == NULL) || (filename == NULL)) return(-1);
1599 
1600     if (!strcmp(filename, "-"))
1601         fd = 0;
1602     else {
1603         fd = open(filename, O_CREAT | O_WRONLY, 0666);
1604 	if (fd < 0) {
1605 	    xmlNanoHTTPClose(ctxt);
1606 	    return(-1);
1607 	}
1608     }
1609 
1610     xmlNanoHTTPFetchContent( ctxt, &buf, &len );
1611     if ( len > 0 ) {
1612 	write(fd, buf, len);
1613     }
1614 
1615     xmlNanoHTTPClose(ctxt);
1616     close(fd);
1617     return(0);
1618 }
1619 #endif /* LIBXML_OUTPUT_ENABLED */
1620 
1621 /**
1622  * xmlNanoHTTPReturnCode:
1623  * @ctx:  the HTTP context
1624  *
1625  * Get the latest HTTP return code received
1626  *
1627  * Returns the HTTP return code for the request.
1628  */
1629 int
xmlNanoHTTPReturnCode(void * ctx)1630 xmlNanoHTTPReturnCode(void *ctx) {
1631     xmlNanoHTTPCtxtPtr ctxt = (xmlNanoHTTPCtxtPtr) ctx;
1632 
1633     if (ctxt == NULL) return(-1);
1634 
1635     return(ctxt->returnValue);
1636 }
1637 
1638 /**
1639  * xmlNanoHTTPAuthHeader:
1640  * @ctx:  the HTTP context
1641  *
1642  * Get the authentication header of an HTTP context
1643  *
1644  * Returns the stashed value of the WWW-Authenticate or Proxy-Authenticate
1645  * header.
1646  */
1647 const char *
xmlNanoHTTPAuthHeader(void * ctx)1648 xmlNanoHTTPAuthHeader(void *ctx) {
1649     xmlNanoHTTPCtxtPtr ctxt = (xmlNanoHTTPCtxtPtr) ctx;
1650 
1651     if (ctxt == NULL) return(NULL);
1652 
1653     return(ctxt->authHeader);
1654 }
1655 
1656 /**
1657  * xmlNanoHTTPContentLength:
1658  * @ctx:  the HTTP context
1659  *
1660  * Provides the specified content length from the HTTP header.
1661  *
1662  * Return the specified content length from the HTTP header.  Note that
1663  * a value of -1 indicates that the content length element was not included in
1664  * the response header.
1665  */
1666 int
xmlNanoHTTPContentLength(void * ctx)1667 xmlNanoHTTPContentLength( void * ctx ) {
1668     xmlNanoHTTPCtxtPtr	ctxt = (xmlNanoHTTPCtxtPtr)ctx;
1669 
1670     return ( ( ctxt == NULL ) ? -1 : ctxt->ContentLength );
1671 }
1672 
1673 /**
1674  * xmlNanoHTTPRedir:
1675  * @ctx:  the HTTP context
1676  *
1677  * Provides the specified redirection URL if available from the HTTP header.
1678  *
1679  * Return the specified redirection URL or NULL if not redirected.
1680  */
1681 const char *
xmlNanoHTTPRedir(void * ctx)1682 xmlNanoHTTPRedir( void * ctx ) {
1683     xmlNanoHTTPCtxtPtr	ctxt = (xmlNanoHTTPCtxtPtr)ctx;
1684 
1685     return ( ( ctxt == NULL ) ? NULL : ctxt->location );
1686 }
1687 
1688 /**
1689  * xmlNanoHTTPEncoding:
1690  * @ctx:  the HTTP context
1691  *
1692  * Provides the specified encoding if specified in the HTTP headers.
1693  *
1694  * Return the specified encoding or NULL if not available
1695  */
1696 const char *
xmlNanoHTTPEncoding(void * ctx)1697 xmlNanoHTTPEncoding( void * ctx ) {
1698     xmlNanoHTTPCtxtPtr	ctxt = (xmlNanoHTTPCtxtPtr)ctx;
1699 
1700     return ( ( ctxt == NULL ) ? NULL : ctxt->encoding );
1701 }
1702 
1703 /**
1704  * xmlNanoHTTPMimeType:
1705  * @ctx:  the HTTP context
1706  *
1707  * Provides the specified Mime-Type if specified in the HTTP headers.
1708  *
1709  * Return the specified Mime-Type or NULL if not available
1710  */
1711 const char *
xmlNanoHTTPMimeType(void * ctx)1712 xmlNanoHTTPMimeType( void * ctx ) {
1713     xmlNanoHTTPCtxtPtr	ctxt = (xmlNanoHTTPCtxtPtr)ctx;
1714 
1715     return ( ( ctxt == NULL ) ? NULL : ctxt->mimeType );
1716 }
1717 
1718 /**
1719  * xmlNanoHTTPFetchContent:
1720  * @ctx:  the HTTP context
1721  * @ptr:  pointer to set to the content buffer.
1722  * @len:  integer pointer to hold the length of the content
1723  *
1724  * Check if all the content was read
1725  *
1726  * Returns 0 if all the content was read and available, returns
1727  * -1 if received content length was less than specified or an error
1728  * occurred.
1729  */
1730 static int
xmlNanoHTTPFetchContent(void * ctx,char ** ptr,int * len)1731 xmlNanoHTTPFetchContent( void * ctx, char ** ptr, int * len ) {
1732     xmlNanoHTTPCtxtPtr	ctxt = (xmlNanoHTTPCtxtPtr)ctx;
1733 
1734     int			rc = 0;
1735     int			cur_lgth;
1736     int			rcvd_lgth;
1737     int			dummy_int;
1738     char *		dummy_ptr = NULL;
1739 
1740     /*  Dummy up return input parameters if not provided  */
1741 
1742     if ( len == NULL )
1743         len = &dummy_int;
1744 
1745     if ( ptr == NULL )
1746         ptr = &dummy_ptr;
1747 
1748     /*  But can't work without the context pointer  */
1749 
1750     if ( ( ctxt == NULL ) || ( ctxt->content == NULL ) ) {
1751         *len = 0;
1752 	*ptr = NULL;
1753 	return ( -1 );
1754     }
1755 
1756     rcvd_lgth = ctxt->inptr - ctxt->content;
1757 
1758     while ( (cur_lgth = xmlNanoHTTPRecv( ctxt )) > 0 ) {
1759 
1760 	rcvd_lgth += cur_lgth;
1761 	if ( (ctxt->ContentLength > 0) && (rcvd_lgth >= ctxt->ContentLength) )
1762 	    break;
1763     }
1764 
1765     *ptr = ctxt->content;
1766     *len = rcvd_lgth;
1767 
1768     if ( ( ctxt->ContentLength > 0 ) && ( rcvd_lgth < ctxt->ContentLength ) )
1769         rc = -1;
1770     else if ( rcvd_lgth == 0 )
1771 	rc = -1;
1772 
1773     return ( rc );
1774 }
1775 
1776 #ifdef STANDALONE
main(int argc,char ** argv)1777 int main(int argc, char **argv) {
1778     char *contentType = NULL;
1779 
1780     if (argv[1] != NULL) {
1781 	if (argv[2] != NULL)
1782 	    xmlNanoHTTPFetch(argv[1], argv[2], &contentType);
1783         else
1784 	    xmlNanoHTTPFetch(argv[1], "-", &contentType);
1785 	if (contentType != NULL) xmlFree(contentType);
1786     } else {
1787         xmlGenericError(xmlGenericErrorContext,
1788 		"%s: minimal HTTP GET implementation\n", argv[0]);
1789         xmlGenericError(xmlGenericErrorContext,
1790 		"\tusage %s [ URL [ filename ] ]\n", argv[0]);
1791     }
1792     xmlNanoHTTPCleanup();
1793     xmlMemoryDump();
1794     return(0);
1795 }
1796 #endif /* STANDALONE */
1797 #else /* !LIBXML_HTTP_ENABLED */
1798 #ifdef STANDALONE
1799 #include <stdio.h>
main(int argc,char ** argv)1800 int main(int argc, char **argv) {
1801     xmlGenericError(xmlGenericErrorContext,
1802 	    "%s : HTTP support not compiled in\n", argv[0]);
1803     return(0);
1804 }
1805 #endif /* STANDALONE */
1806 #endif /* LIBXML_HTTP_ENABLED */
1807 #define bottom_nanohttp
1808 #include "elfgcchack.h"
1809