xref: /reactos/sdk/lib/3rdparty/libxml2/nanohttp.c (revision 911153da)
1 /*
2  * nanohttp.c: minimalist HTTP GET implementation to fetch external subsets.
3  *             focuses on size, streamability, reentrancy and portability
4  *
5  * This is clearly not a general purpose HTTP implementation
6  * If you look for one, check:
7  *         http://www.w3.org/Library/
8  *
9  * See Copyright for the status of this software.
10  *
11  * daniel@veillard.com
12  */
13 
14 #define IN_LIBXML
15 #include "libxml.h"
16 
17 #ifdef LIBXML_HTTP_ENABLED
18 #include <string.h>
19 #include <stdlib.h>
20 #include <errno.h>
21 
22 #ifdef HAVE_UNISTD_H
23 #include <unistd.h>
24 #endif
25 #ifdef HAVE_SYS_TYPES_H
26 #include <sys/types.h>
27 #endif
28 #ifdef HAVE_SYS_SOCKET_H
29 #include <sys/socket.h>
30 #endif
31 #ifdef HAVE_NETINET_IN_H
32 #include <netinet/in.h>
33 #endif
34 #ifdef HAVE_ARPA_INET_H
35 #include <arpa/inet.h>
36 #endif
37 #ifdef HAVE_NETDB_H
38 #include <netdb.h>
39 #endif
40 #ifdef HAVE_RESOLV_H
41 #ifdef HAVE_ARPA_NAMESER_H
42 #include <arpa/nameser.h>
43 #endif
44 #include <resolv.h>
45 #endif
46 #ifdef HAVE_FCNTL_H
47 #include <fcntl.h>
48 #endif
49 #ifdef HAVE_SYS_TIME_H
50 #include <sys/time.h>
51 #endif
52 #ifndef HAVE_POLL_H
53 #ifdef HAVE_SYS_SELECT_H
54 #include <sys/select.h>
55 #endif
56 #else
57 #include <poll.h>
58 #endif
59 #ifdef LIBXML_ZLIB_ENABLED
60 #include <zlib.h>
61 #endif
62 
63 
64 #ifdef VMS
65 #include <stropts>
66 #define XML_SOCKLEN_T unsigned int
67 #endif
68 
69 #if defined(_WIN32)
70 #include <wsockcompat.h>
71 #endif
72 
73 #include <libxml/globals.h>
74 #include <libxml/xmlerror.h>
75 #include <libxml/xmlmemory.h>
76 #include <libxml/parser.h> /* for xmlStr(n)casecmp() */
77 #include <libxml/nanohttp.h>
78 #include <libxml/globals.h>
79 #include <libxml/uri.h>
80 
81 /**
82  * A couple portability macros
83  */
84 #ifndef _WINSOCKAPI_
85 #if !defined(__BEOS__) || defined(__HAIKU__)
86 #define closesocket(s) close(s)
87 #endif
88 #define SOCKET int
89 #define INVALID_SOCKET (-1)
90 #endif
91 
92 #ifdef __BEOS__
93 #ifndef PF_INET
94 #define PF_INET AF_INET
95 #endif
96 #endif
97 
98 #ifndef XML_SOCKLEN_T
99 #define XML_SOCKLEN_T unsigned int
100 #endif
101 
102 #ifdef STANDALONE
103 #define DEBUG_HTTP
104 #define xmlStrncasecmp(a, b, n) strncasecmp((char *)a, (char *)b, n)
105 #define xmlStrcasecmpi(a, b) strcasecmp((char *)a, (char *)b)
106 #endif
107 
108 #define XML_NANO_HTTP_MAX_REDIR	10
109 
110 #define XML_NANO_HTTP_CHUNK	4096
111 
112 #define XML_NANO_HTTP_CLOSED	0
113 #define XML_NANO_HTTP_WRITE	1
114 #define XML_NANO_HTTP_READ	2
115 #define XML_NANO_HTTP_NONE	4
116 
117 typedef struct xmlNanoHTTPCtxt {
118     char *protocol;	/* the protocol name */
119     char *hostname;	/* the host name */
120     int port;		/* the port */
121     char *path;		/* the path within the URL */
122     char *query;	/* the query string */
123     SOCKET fd;		/* the file descriptor for the socket */
124     int state;		/* WRITE / READ / CLOSED */
125     char *out;		/* buffer sent (zero terminated) */
126     char *outptr;	/* index within the buffer sent */
127     char *in;		/* the receiving buffer */
128     char *content;	/* the start of the content */
129     char *inptr;	/* the next byte to read from network */
130     char *inrptr;	/* the next byte to give back to the client */
131     int inlen;		/* len of the input buffer */
132     int last;		/* return code for last operation */
133     int returnValue;	/* the protocol return value */
134     int version;        /* the protocol version */
135     int ContentLength;  /* specified content length from HTTP header */
136     char *contentType;	/* the MIME type for the input */
137     char *location;	/* the new URL in case of redirect */
138     char *authHeader;	/* contents of {WWW,Proxy}-Authenticate header */
139     char *encoding;	/* encoding extracted from the contentType */
140     char *mimeType;	/* Mime-Type extracted from the contentType */
141 #ifdef LIBXML_ZLIB_ENABLED
142     z_stream *strm;	/* Zlib stream object */
143     int usesGzip;	/* "Content-Encoding: gzip" was detected */
144 #endif
145 } xmlNanoHTTPCtxt, *xmlNanoHTTPCtxtPtr;
146 
147 static int initialized = 0;
148 static char *proxy = NULL;	 /* the proxy name if any */
149 static int proxyPort;	/* the proxy port if any */
150 static unsigned int timeout = 60;/* the select() timeout in seconds */
151 
152 static int xmlNanoHTTPFetchContent( void * ctx, char ** ptr, int * len );
153 
154 /**
155  * xmlHTTPErrMemory:
156  * @extra:  extra information
157  *
158  * Handle an out of memory condition
159  */
160 static void
xmlHTTPErrMemory(const char * extra)161 xmlHTTPErrMemory(const char *extra)
162 {
163     __xmlSimpleError(XML_FROM_HTTP, XML_ERR_NO_MEMORY, NULL, NULL, extra);
164 }
165 
166 /**
167  * A portability function
168  */
socket_errno(void)169 static int socket_errno(void) {
170 #ifdef _WINSOCKAPI_
171     int err = WSAGetLastError();
172     switch(err) {
173         case WSAECONNRESET:
174             return(ECONNRESET);
175         case WSAEINPROGRESS:
176             return(EINPROGRESS);
177         case WSAEINTR:
178             return(EINTR);
179         case WSAESHUTDOWN:
180             return(ESHUTDOWN);
181         case WSAEWOULDBLOCK:
182             return(EWOULDBLOCK);
183         default:
184             return(err);
185     }
186 #else
187     return(errno);
188 #endif
189 }
190 
191 #ifdef SUPPORT_IP6
192 static
have_ipv6(void)193 int have_ipv6(void) {
194     SOCKET s;
195 
196     s = socket (AF_INET6, SOCK_STREAM, 0);
197     if (s != INVALID_SOCKET) {
198 	close (s);
199 	return (1);
200     }
201     return (0);
202 }
203 #endif
204 
205 /**
206  * xmlNanoHTTPInit:
207  *
208  * Initialize the HTTP protocol layer.
209  * Currently it just checks for proxy information
210  */
211 
212 void
xmlNanoHTTPInit(void)213 xmlNanoHTTPInit(void) {
214     const char *env;
215 #ifdef _WINSOCKAPI_
216     WSADATA wsaData;
217 #endif
218 
219     if (initialized)
220 	return;
221 
222 #ifdef _WINSOCKAPI_
223     if (WSAStartup(MAKEWORD(1, 1), &wsaData) != 0)
224 	return;
225 #endif
226 
227     if (proxy == NULL) {
228 	proxyPort = 80;
229 	env = getenv("no_proxy");
230 	if (env && ((env[0] == '*') && (env[1] == 0)))
231 	    goto done;
232 	env = getenv("http_proxy");
233 	if (env != NULL) {
234 	    xmlNanoHTTPScanProxy(env);
235 	    goto done;
236 	}
237 	env = getenv("HTTP_PROXY");
238 	if (env != NULL) {
239 	    xmlNanoHTTPScanProxy(env);
240 	    goto done;
241 	}
242     }
243 done:
244     initialized = 1;
245 }
246 
247 /**
248  * xmlNanoHTTPCleanup:
249  *
250  * Cleanup the HTTP protocol layer.
251  */
252 
253 void
xmlNanoHTTPCleanup(void)254 xmlNanoHTTPCleanup(void) {
255     if (proxy != NULL) {
256 	xmlFree(proxy);
257 	proxy = NULL;
258     }
259 #ifdef _WINSOCKAPI_
260     if (initialized)
261 	WSACleanup();
262 #endif
263     initialized = 0;
264     return;
265 }
266 
267 /**
268  * xmlNanoHTTPScanURL:
269  * @ctxt:  an HTTP context
270  * @URL:  The URL used to initialize the context
271  *
272  * (Re)Initialize an HTTP context by parsing the URL and finding
273  * the protocol host port and path it indicates.
274  */
275 
276 static void
xmlNanoHTTPScanURL(xmlNanoHTTPCtxtPtr ctxt,const char * URL)277 xmlNanoHTTPScanURL(xmlNanoHTTPCtxtPtr ctxt, const char *URL) {
278     xmlURIPtr uri;
279     int len;
280 
281     /*
282      * Clear any existing data from the context
283      */
284     if (ctxt->protocol != NULL) {
285         xmlFree(ctxt->protocol);
286 	ctxt->protocol = NULL;
287     }
288     if (ctxt->hostname != NULL) {
289         xmlFree(ctxt->hostname);
290 	ctxt->hostname = NULL;
291     }
292     if (ctxt->path != NULL) {
293         xmlFree(ctxt->path);
294 	ctxt->path = NULL;
295     }
296     if (ctxt->query != NULL) {
297         xmlFree(ctxt->query);
298 	ctxt->query = NULL;
299     }
300     if (URL == NULL) return;
301 
302     uri = xmlParseURIRaw(URL, 1);
303     if (uri == NULL)
304 	return;
305 
306     if ((uri->scheme == NULL) || (uri->server == NULL)) {
307 	xmlFreeURI(uri);
308 	return;
309     }
310 
311     ctxt->protocol = xmlMemStrdup(uri->scheme);
312     /* special case of IPv6 addresses, the [] need to be removed */
313     if ((uri->server != NULL) && (*uri->server == '[')) {
314         len = strlen(uri->server);
315 	if ((len > 2) && (uri->server[len - 1] == ']')) {
316 	    ctxt->hostname = (char *) xmlCharStrndup(uri->server + 1, len -2);
317 	} else
318 	    ctxt->hostname = xmlMemStrdup(uri->server);
319     } else
320 	ctxt->hostname = xmlMemStrdup(uri->server);
321     if (uri->path != NULL)
322 	ctxt->path = xmlMemStrdup(uri->path);
323     else
324 	ctxt->path = xmlMemStrdup("/");
325     if (uri->query != NULL)
326 	ctxt->query = xmlMemStrdup(uri->query);
327     if (uri->port != 0)
328 	ctxt->port = uri->port;
329 
330     xmlFreeURI(uri);
331 }
332 
333 /**
334  * xmlNanoHTTPScanProxy:
335  * @URL:  The proxy URL used to initialize the proxy context
336  *
337  * (Re)Initialize the HTTP Proxy context by parsing the URL and finding
338  * the protocol host port it indicates.
339  * Should be like http://myproxy/ or http://myproxy:3128/
340  * A NULL URL cleans up proxy information.
341  */
342 
343 void
xmlNanoHTTPScanProxy(const char * URL)344 xmlNanoHTTPScanProxy(const char *URL) {
345     xmlURIPtr uri;
346 
347     if (proxy != NULL) {
348         xmlFree(proxy);
349 	proxy = NULL;
350     }
351     proxyPort = 0;
352 
353 #ifdef DEBUG_HTTP
354     if (URL == NULL)
355 	xmlGenericError(xmlGenericErrorContext,
356 		"Removing HTTP proxy info\n");
357     else
358 	xmlGenericError(xmlGenericErrorContext,
359 		"Using HTTP proxy %s\n", URL);
360 #endif
361     if (URL == NULL) return;
362 
363     uri = xmlParseURIRaw(URL, 1);
364     if ((uri == NULL) || (uri->scheme == NULL) ||
365 	(strcmp(uri->scheme, "http")) || (uri->server == NULL)) {
366 	__xmlIOErr(XML_FROM_HTTP, XML_HTTP_URL_SYNTAX, "Syntax Error\n");
367 	if (uri != NULL)
368 	    xmlFreeURI(uri);
369 	return;
370     }
371 
372     proxy = xmlMemStrdup(uri->server);
373     if (uri->port != 0)
374 	proxyPort = uri->port;
375 
376     xmlFreeURI(uri);
377 }
378 
379 /**
380  * xmlNanoHTTPNewCtxt:
381  * @URL:  The URL used to initialize the context
382  *
383  * Allocate and initialize a new HTTP context.
384  *
385  * Returns an HTTP context or NULL in case of error.
386  */
387 
388 static xmlNanoHTTPCtxtPtr
xmlNanoHTTPNewCtxt(const char * URL)389 xmlNanoHTTPNewCtxt(const char *URL) {
390     xmlNanoHTTPCtxtPtr ret;
391 
392     ret = (xmlNanoHTTPCtxtPtr) xmlMalloc(sizeof(xmlNanoHTTPCtxt));
393     if (ret == NULL) {
394         xmlHTTPErrMemory("allocating context");
395         return(NULL);
396     }
397 
398     memset(ret, 0, sizeof(xmlNanoHTTPCtxt));
399     ret->port = 80;
400     ret->returnValue = 0;
401     ret->fd = INVALID_SOCKET;
402     ret->ContentLength = -1;
403 
404     xmlNanoHTTPScanURL(ret, URL);
405 
406     return(ret);
407 }
408 
409 /**
410  * xmlNanoHTTPFreeCtxt:
411  * @ctxt:  an HTTP context
412  *
413  * Frees the context after closing the connection.
414  */
415 
416 static void
xmlNanoHTTPFreeCtxt(xmlNanoHTTPCtxtPtr ctxt)417 xmlNanoHTTPFreeCtxt(xmlNanoHTTPCtxtPtr ctxt) {
418     if (ctxt == NULL) return;
419     if (ctxt->hostname != NULL) xmlFree(ctxt->hostname);
420     if (ctxt->protocol != NULL) xmlFree(ctxt->protocol);
421     if (ctxt->path != NULL) xmlFree(ctxt->path);
422     if (ctxt->query != NULL) xmlFree(ctxt->query);
423     if (ctxt->out != NULL) xmlFree(ctxt->out);
424     if (ctxt->in != NULL) xmlFree(ctxt->in);
425     if (ctxt->contentType != NULL) xmlFree(ctxt->contentType);
426     if (ctxt->encoding != NULL) xmlFree(ctxt->encoding);
427     if (ctxt->mimeType != NULL) xmlFree(ctxt->mimeType);
428     if (ctxt->location != NULL) xmlFree(ctxt->location);
429     if (ctxt->authHeader != NULL) xmlFree(ctxt->authHeader);
430 #ifdef LIBXML_ZLIB_ENABLED
431     if (ctxt->strm != NULL) {
432 	inflateEnd(ctxt->strm);
433 	xmlFree(ctxt->strm);
434     }
435 #endif
436 
437     ctxt->state = XML_NANO_HTTP_NONE;
438     if (ctxt->fd != INVALID_SOCKET) closesocket(ctxt->fd);
439     ctxt->fd = INVALID_SOCKET;
440     xmlFree(ctxt);
441 }
442 
443 /**
444  * xmlNanoHTTPSend:
445  * @ctxt:  an HTTP context
446  *
447  * Send the input needed to initiate the processing on the server side
448  * Returns number of bytes sent or -1 on error.
449  */
450 
451 static int
xmlNanoHTTPSend(xmlNanoHTTPCtxtPtr ctxt,const char * xmt_ptr,int outlen)452 xmlNanoHTTPSend(xmlNanoHTTPCtxtPtr ctxt, const char *xmt_ptr, int outlen)
453 {
454     int total_sent = 0;
455 #ifdef HAVE_POLL_H
456     struct pollfd p;
457 #else
458     struct timeval tv;
459     fd_set wfd;
460 #endif
461 
462     if ((ctxt->state & XML_NANO_HTTP_WRITE) && (xmt_ptr != NULL)) {
463         while (total_sent < outlen) {
464             int nsent = send(ctxt->fd, SEND_ARG2_CAST (xmt_ptr + total_sent),
465                              outlen - total_sent, 0);
466 
467             if (nsent > 0)
468                 total_sent += nsent;
469             else if ((nsent == -1) &&
470 #if defined(EAGAIN) && EAGAIN != EWOULDBLOCK
471                      (socket_errno() != EAGAIN) &&
472 #endif
473                      (socket_errno() != EWOULDBLOCK)) {
474                 __xmlIOErr(XML_FROM_HTTP, 0, "send failed\n");
475                 if (total_sent == 0)
476                     total_sent = -1;
477                 break;
478             } else {
479                 /*
480                  * No data sent
481                  * Since non-blocking sockets are used, wait for
482                  * socket to be writable or default timeout prior
483                  * to retrying.
484                  */
485 #ifndef HAVE_POLL_H
486 #ifndef _WINSOCKAPI_
487                 if (ctxt->fd > FD_SETSIZE)
488                     return -1;
489 #endif
490 
491                 tv.tv_sec = timeout;
492                 tv.tv_usec = 0;
493                 FD_ZERO(&wfd);
494 #ifdef _MSC_VER
495 #pragma warning(push)
496 #pragma warning(disable: 4018)
497 #endif
498                 FD_SET(ctxt->fd, &wfd);
499 #ifdef _MSC_VER
500 #pragma warning(pop)
501 #endif
502                 (void) select(ctxt->fd + 1, NULL, &wfd, NULL, &tv);
503 #else
504                 p.fd = ctxt->fd;
505                 p.events = POLLOUT;
506                 (void) poll(&p, 1, timeout * 1000);
507 #endif /* !HAVE_POLL_H */
508             }
509         }
510     }
511 
512     return total_sent;
513 }
514 
515 /**
516  * xmlNanoHTTPRecv:
517  * @ctxt:  an HTTP context
518  *
519  * Read information coming from the HTTP connection.
520  * This is a blocking call (but it blocks in select(), not read()).
521  *
522  * Returns the number of byte read or -1 in case of error.
523  */
524 
525 static int
xmlNanoHTTPRecv(xmlNanoHTTPCtxtPtr ctxt)526 xmlNanoHTTPRecv(xmlNanoHTTPCtxtPtr ctxt)
527 {
528 #ifdef HAVE_POLL_H
529     struct pollfd p;
530 #else
531     fd_set rfd;
532     struct timeval tv;
533 #endif
534 
535 
536     while (ctxt->state & XML_NANO_HTTP_READ) {
537         if (ctxt->in == NULL) {
538             ctxt->in = (char *) xmlMallocAtomic(65000 * sizeof(char));
539             if (ctxt->in == NULL) {
540                 xmlHTTPErrMemory("allocating input");
541                 ctxt->last = -1;
542                 return (-1);
543             }
544             ctxt->inlen = 65000;
545             ctxt->inptr = ctxt->content = ctxt->inrptr = ctxt->in;
546         }
547         if (ctxt->inrptr > ctxt->in + XML_NANO_HTTP_CHUNK) {
548             int delta = ctxt->inrptr - ctxt->in;
549             int len = ctxt->inptr - ctxt->inrptr;
550 
551             memmove(ctxt->in, ctxt->inrptr, len);
552             ctxt->inrptr -= delta;
553             ctxt->content -= delta;
554             ctxt->inptr -= delta;
555         }
556         if ((ctxt->in + ctxt->inlen) < (ctxt->inptr + XML_NANO_HTTP_CHUNK)) {
557             int d_inptr = ctxt->inptr - ctxt->in;
558             int d_content = ctxt->content - ctxt->in;
559             int d_inrptr = ctxt->inrptr - ctxt->in;
560             char *tmp_ptr = ctxt->in;
561 
562             ctxt->inlen *= 2;
563             ctxt->in = (char *) xmlRealloc(tmp_ptr, ctxt->inlen);
564             if (ctxt->in == NULL) {
565                 xmlHTTPErrMemory("allocating input buffer");
566                 xmlFree(tmp_ptr);
567                 ctxt->last = -1;
568                 return (-1);
569             }
570             ctxt->inptr = ctxt->in + d_inptr;
571             ctxt->content = ctxt->in + d_content;
572             ctxt->inrptr = ctxt->in + d_inrptr;
573         }
574         ctxt->last = recv(ctxt->fd, ctxt->inptr, XML_NANO_HTTP_CHUNK, 0);
575         if (ctxt->last > 0) {
576             ctxt->inptr += ctxt->last;
577             return (ctxt->last);
578         }
579         if (ctxt->last == 0) {
580             return (0);
581         }
582         if (ctxt->last == -1) {
583             switch (socket_errno()) {
584                 case EINPROGRESS:
585                 case EWOULDBLOCK:
586 #if defined(EAGAIN) && EAGAIN != EWOULDBLOCK
587                 case EAGAIN:
588 #endif
589                     break;
590 
591                 case ECONNRESET:
592                 case ESHUTDOWN:
593                     return (0);
594 
595                 default:
596                     __xmlIOErr(XML_FROM_HTTP, 0, "recv failed\n");
597                     return (-1);
598             }
599         }
600 #ifdef HAVE_POLL_H
601         p.fd = ctxt->fd;
602         p.events = POLLIN;
603         if ((poll(&p, 1, timeout * 1000) < 1)
604 #if defined(EINTR)
605             && (errno != EINTR)
606 #endif
607             )
608             return (0);
609 #else /* !HAVE_POLL_H */
610 #ifndef _WINSOCKAPI_
611         if (ctxt->fd > FD_SETSIZE)
612             return 0;
613 #endif
614 
615         tv.tv_sec = timeout;
616         tv.tv_usec = 0;
617         FD_ZERO(&rfd);
618 
619 #ifdef _MSC_VER
620 #pragma warning(push)
621 #pragma warning(disable: 4018)
622 #endif
623 
624         FD_SET(ctxt->fd, &rfd);
625 
626 #ifdef _MSC_VER
627 #pragma warning(pop)
628 #endif
629 
630         if ((select(ctxt->fd + 1, &rfd, NULL, NULL, &tv) < 1)
631 #if defined(EINTR)
632             && (socket_errno() != EINTR)
633 #endif
634             )
635             return (0);
636 #endif /* !HAVE_POLL_H */
637     }
638     return (0);
639 }
640 
641 /**
642  * xmlNanoHTTPReadLine:
643  * @ctxt:  an HTTP context
644  *
645  * Read one line in the HTTP server output, usually for extracting
646  * the HTTP protocol information from the answer header.
647  *
648  * Returns a newly allocated string with a copy of the line, or NULL
649  *         which indicate the end of the input.
650  */
651 
652 static char *
xmlNanoHTTPReadLine(xmlNanoHTTPCtxtPtr ctxt)653 xmlNanoHTTPReadLine(xmlNanoHTTPCtxtPtr ctxt) {
654     char buf[4096];
655     char *bp = buf;
656     int	rc;
657 
658     while (bp - buf < 4095) {
659 	if (ctxt->inrptr == ctxt->inptr) {
660 	    if ( (rc = xmlNanoHTTPRecv(ctxt)) == 0) {
661 		if (bp == buf)
662 		    return(NULL);
663 		else
664 		    *bp = 0;
665 		return(xmlMemStrdup(buf));
666 	    }
667 	    else if ( rc == -1 ) {
668 	        return ( NULL );
669 	    }
670 	}
671 	*bp = *ctxt->inrptr++;
672 	if (*bp == '\n') {
673 	    *bp = 0;
674 	    return(xmlMemStrdup(buf));
675 	}
676 	if (*bp != '\r')
677 	    bp++;
678     }
679     buf[4095] = 0;
680     return(xmlMemStrdup(buf));
681 }
682 
683 
684 /**
685  * xmlNanoHTTPScanAnswer:
686  * @ctxt:  an HTTP context
687  * @line:  an HTTP header line
688  *
689  * Try to extract useful information from the server answer.
690  * We currently parse and process:
691  *  - The HTTP revision/ return code
692  *  - The Content-Type, Mime-Type and charset used
693  *  - The Location for redirect processing.
694  *
695  * Returns -1 in case of failure, the file descriptor number otherwise
696  */
697 
698 static void
xmlNanoHTTPScanAnswer(xmlNanoHTTPCtxtPtr ctxt,const char * line)699 xmlNanoHTTPScanAnswer(xmlNanoHTTPCtxtPtr ctxt, const char *line) {
700     const char *cur = line;
701 
702     if (line == NULL) return;
703 
704     if (!strncmp(line, "HTTP/", 5)) {
705         int version = 0;
706 	int ret = 0;
707 
708 	cur += 5;
709 	while ((*cur >= '0') && (*cur <= '9')) {
710 	    version *= 10;
711 	    version += *cur - '0';
712 	    cur++;
713 	}
714 	if (*cur == '.') {
715 	    cur++;
716 	    if ((*cur >= '0') && (*cur <= '9')) {
717 		version *= 10;
718 		version += *cur - '0';
719 		cur++;
720 	    }
721 	    while ((*cur >= '0') && (*cur <= '9'))
722 		cur++;
723 	} else
724 	    version *= 10;
725 	if ((*cur != ' ') && (*cur != '\t')) return;
726 	while ((*cur == ' ') || (*cur == '\t')) cur++;
727 	if ((*cur < '0') || (*cur > '9')) return;
728 	while ((*cur >= '0') && (*cur <= '9')) {
729 	    ret *= 10;
730 	    ret += *cur - '0';
731 	    cur++;
732 	}
733 	if ((*cur != 0) && (*cur != ' ') && (*cur != '\t')) return;
734 	ctxt->returnValue = ret;
735         ctxt->version = version;
736     } else if (!xmlStrncasecmp(BAD_CAST line, BAD_CAST"Content-Type:", 13)) {
737         const xmlChar *charset, *last, *mime;
738         cur += 13;
739 	while ((*cur == ' ') || (*cur == '\t')) cur++;
740 	if (ctxt->contentType != NULL)
741 	    xmlFree(ctxt->contentType);
742 	ctxt->contentType = xmlMemStrdup(cur);
743 	mime = (const xmlChar *) cur;
744 	last = mime;
745 	while ((*last != 0) && (*last != ' ') && (*last != '\t') &&
746 	       (*last != ';') && (*last != ','))
747 	    last++;
748 	if (ctxt->mimeType != NULL)
749 	    xmlFree(ctxt->mimeType);
750 	ctxt->mimeType = (char *) xmlStrndup(mime, last - mime);
751 	charset = xmlStrstr(BAD_CAST ctxt->contentType, BAD_CAST "charset=");
752 	if (charset != NULL) {
753 	    charset += 8;
754 	    last = charset;
755 	    while ((*last != 0) && (*last != ' ') && (*last != '\t') &&
756 	           (*last != ';') && (*last != ','))
757 		last++;
758 	    if (ctxt->encoding != NULL)
759 	        xmlFree(ctxt->encoding);
760 	    ctxt->encoding = (char *) xmlStrndup(charset, last - charset);
761 	}
762     } else if (!xmlStrncasecmp(BAD_CAST line, BAD_CAST"ContentType:", 12)) {
763         const xmlChar *charset, *last, *mime;
764         cur += 12;
765 	if (ctxt->contentType != NULL) return;
766 	while ((*cur == ' ') || (*cur == '\t')) cur++;
767 	ctxt->contentType = xmlMemStrdup(cur);
768 	mime = (const xmlChar *) cur;
769 	last = mime;
770 	while ((*last != 0) && (*last != ' ') && (*last != '\t') &&
771 	       (*last != ';') && (*last != ','))
772 	    last++;
773 	if (ctxt->mimeType != NULL)
774 	    xmlFree(ctxt->mimeType);
775 	ctxt->mimeType = (char *) xmlStrndup(mime, last - mime);
776 	charset = xmlStrstr(BAD_CAST ctxt->contentType, BAD_CAST "charset=");
777 	if (charset != NULL) {
778 	    charset += 8;
779 	    last = charset;
780 	    while ((*last != 0) && (*last != ' ') && (*last != '\t') &&
781 	           (*last != ';') && (*last != ','))
782 		last++;
783 	    if (ctxt->encoding != NULL)
784 	        xmlFree(ctxt->encoding);
785 	    ctxt->encoding = (char *) xmlStrndup(charset, last - charset);
786 	}
787     } else if (!xmlStrncasecmp(BAD_CAST line, BAD_CAST"Location:", 9)) {
788         cur += 9;
789 	while ((*cur == ' ') || (*cur == '\t')) cur++;
790 	if (ctxt->location != NULL)
791 	    xmlFree(ctxt->location);
792 	if (*cur == '/') {
793 	    xmlChar *tmp_http = xmlStrdup(BAD_CAST "http://");
794 	    xmlChar *tmp_loc =
795 	        xmlStrcat(tmp_http, (const xmlChar *) ctxt->hostname);
796 	    ctxt->location =
797 	        (char *) xmlStrcat (tmp_loc, (const xmlChar *) cur);
798 	} else {
799 	    ctxt->location = xmlMemStrdup(cur);
800 	}
801     } else if (!xmlStrncasecmp(BAD_CAST line, BAD_CAST"WWW-Authenticate:", 17)) {
802         cur += 17;
803 	while ((*cur == ' ') || (*cur == '\t')) cur++;
804 	if (ctxt->authHeader != NULL)
805 	    xmlFree(ctxt->authHeader);
806 	ctxt->authHeader = xmlMemStrdup(cur);
807     } else if (!xmlStrncasecmp(BAD_CAST line, BAD_CAST"Proxy-Authenticate:", 19)) {
808         cur += 19;
809 	while ((*cur == ' ') || (*cur == '\t')) cur++;
810 	if (ctxt->authHeader != NULL)
811 	    xmlFree(ctxt->authHeader);
812 	ctxt->authHeader = xmlMemStrdup(cur);
813 #ifdef LIBXML_ZLIB_ENABLED
814     } else if ( !xmlStrncasecmp( BAD_CAST line, BAD_CAST"Content-Encoding:", 17) ) {
815 	cur += 17;
816 	while ((*cur == ' ') || (*cur == '\t')) cur++;
817 	if ( !xmlStrncasecmp( BAD_CAST cur, BAD_CAST"gzip", 4) ) {
818 	    ctxt->usesGzip = 1;
819 
820 	    ctxt->strm = xmlMalloc(sizeof(z_stream));
821 
822 	    if (ctxt->strm != NULL) {
823 		ctxt->strm->zalloc = Z_NULL;
824 		ctxt->strm->zfree = Z_NULL;
825 		ctxt->strm->opaque = Z_NULL;
826 		ctxt->strm->avail_in = 0;
827 		ctxt->strm->next_in = Z_NULL;
828 
829 		inflateInit2( ctxt->strm, 31 );
830 	    }
831 	}
832 #endif
833     } else if ( !xmlStrncasecmp( BAD_CAST line, BAD_CAST"Content-Length:", 15) ) {
834 	cur += 15;
835 	ctxt->ContentLength = strtol( cur, NULL, 10 );
836     }
837 }
838 
839 /**
840  * xmlNanoHTTPConnectAttempt:
841  * @addr:  a socket address structure
842  *
843  * Attempt a connection to the given IP:port endpoint. It forces
844  * non-blocking semantic on the socket, and allow 60 seconds for
845  * the host to answer.
846  *
847  * Returns -1 in case of failure, the file descriptor number otherwise
848  */
849 
850 static SOCKET
xmlNanoHTTPConnectAttempt(struct sockaddr * addr)851 xmlNanoHTTPConnectAttempt(struct sockaddr *addr)
852 {
853 #ifndef HAVE_POLL_H
854     fd_set wfd;
855 #ifdef _WINSOCKAPI_
856     fd_set xfd;
857 #endif
858     struct timeval tv;
859 #else /* !HAVE_POLL_H */
860     struct pollfd p;
861 #endif /* !HAVE_POLL_H */
862     int status;
863 
864     int addrlen;
865 
866     SOCKET s;
867 
868 #ifdef SUPPORT_IP6
869     if (addr->sa_family == AF_INET6) {
870         s = socket(PF_INET6, SOCK_STREAM, IPPROTO_TCP);
871         addrlen = sizeof(struct sockaddr_in6);
872     } else
873 #endif
874     {
875         s = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP);
876         addrlen = sizeof(struct sockaddr_in);
877     }
878     if (s == INVALID_SOCKET) {
879 #ifdef DEBUG_HTTP
880         perror("socket");
881 #endif
882         __xmlIOErr(XML_FROM_HTTP, 0, "socket failed\n");
883         return INVALID_SOCKET;
884     }
885 #ifdef _WINSOCKAPI_
886     {
887         u_long one = 1;
888 
889         status = ioctlsocket(s, FIONBIO, &one) == SOCKET_ERROR ? -1 : 0;
890     }
891 #else /* _WINSOCKAPI_ */
892 #if defined(VMS)
893     {
894         int enable = 1;
895 
896         status = ioctl(s, FIONBIO, &enable);
897     }
898 #else /* VMS */
899 #if defined(__BEOS__) && !defined(__HAIKU__)
900     {
901         bool noblock = true;
902 
903         status =
904             setsockopt(s, SOL_SOCKET, SO_NONBLOCK, &noblock,
905                        sizeof(noblock));
906     }
907 #else /* __BEOS__ */
908     if ((status = fcntl(s, F_GETFL, 0)) != -1) {
909 #ifdef O_NONBLOCK
910         status |= O_NONBLOCK;
911 #else /* O_NONBLOCK */
912 #ifdef F_NDELAY
913         status |= F_NDELAY;
914 #endif /* F_NDELAY */
915 #endif /* !O_NONBLOCK */
916         status = fcntl(s, F_SETFL, status);
917     }
918     if (status < 0) {
919 #ifdef DEBUG_HTTP
920         perror("nonblocking");
921 #endif
922         __xmlIOErr(XML_FROM_HTTP, 0, "error setting non-blocking IO\n");
923         closesocket(s);
924         return INVALID_SOCKET;
925     }
926 #endif /* !__BEOS__ */
927 #endif /* !VMS */
928 #endif /* !_WINSOCKAPI_ */
929 
930     if (connect(s, addr, addrlen) == -1) {
931         switch (socket_errno()) {
932             case EINPROGRESS:
933             case EWOULDBLOCK:
934                 break;
935             default:
936                 __xmlIOErr(XML_FROM_HTTP, 0,
937                            "error connecting to HTTP server");
938                 closesocket(s);
939                 return INVALID_SOCKET;
940         }
941     }
942 #ifndef HAVE_POLL_H
943     tv.tv_sec = timeout;
944     tv.tv_usec = 0;
945 
946 #ifdef _MSC_VER
947 #pragma warning(push)
948 #pragma warning(disable: 4018)
949 #endif
950 #ifndef _WINSOCKAPI_
951     if (s > FD_SETSIZE)
952         return INVALID_SOCKET;
953 #endif
954     FD_ZERO(&wfd);
955     FD_SET(s, &wfd);
956 
957 #ifdef _WINSOCKAPI_
958     FD_ZERO(&xfd);
959     FD_SET(s, &xfd);
960 
961     switch (select(s + 1, NULL, &wfd, &xfd, &tv))
962 #else
963     switch (select(s + 1, NULL, &wfd, NULL, &tv))
964 #endif
965 #ifdef _MSC_VER
966 #pragma warning(pop)
967 #endif
968 
969 #else /* !HAVE_POLL_H */
970     p.fd = s;
971     p.events = POLLOUT;
972     switch (poll(&p, 1, timeout * 1000))
973 #endif /* !HAVE_POLL_H */
974 
975     {
976         case 0:
977             /* Time out */
978             __xmlIOErr(XML_FROM_HTTP, 0, "Connect attempt timed out");
979             closesocket(s);
980             return INVALID_SOCKET;
981         case -1:
982             /* Ermm.. ?? */
983             __xmlIOErr(XML_FROM_HTTP, 0, "Connect failed");
984             closesocket(s);
985             return INVALID_SOCKET;
986     }
987 
988 #ifndef HAVE_POLL_H
989     if (FD_ISSET(s, &wfd)
990 #ifdef _WINSOCKAPI_
991         || FD_ISSET(s, &xfd)
992 #endif
993         )
994 #else /* !HAVE_POLL_H */
995     if (p.revents == POLLOUT)
996 #endif /* !HAVE_POLL_H */
997     {
998         XML_SOCKLEN_T len;
999 
1000         len = sizeof(status);
1001 #ifdef SO_ERROR
1002         if (getsockopt(s, SOL_SOCKET, SO_ERROR, (char *) &status, &len) <
1003             0) {
1004             /* Solaris error code */
1005             __xmlIOErr(XML_FROM_HTTP, 0, "getsockopt failed\n");
1006             closesocket(s);
1007             return INVALID_SOCKET;
1008         }
1009 #endif
1010         if (status) {
1011             __xmlIOErr(XML_FROM_HTTP, 0,
1012                        "Error connecting to remote host");
1013             closesocket(s);
1014             errno = status;
1015             return INVALID_SOCKET;
1016         }
1017     } else {
1018         /* pbm */
1019         __xmlIOErr(XML_FROM_HTTP, 0, "select failed\n");
1020         closesocket(s);
1021         return INVALID_SOCKET;
1022     }
1023 
1024     return (s);
1025 }
1026 
1027 /**
1028  * xmlNanoHTTPConnectHost:
1029  * @host:  the host name
1030  * @port:  the port number
1031  *
1032  * Attempt a connection to the given host:port endpoint. It tries
1033  * the multiple IP provided by the DNS if available.
1034  *
1035  * Returns -1 in case of failure, the file descriptor number otherwise
1036  */
1037 
1038 static SOCKET
xmlNanoHTTPConnectHost(const char * host,int port)1039 xmlNanoHTTPConnectHost(const char *host, int port)
1040 {
1041     struct sockaddr *addr = NULL;
1042     struct sockaddr_in sockin;
1043 
1044 #ifdef SUPPORT_IP6
1045     struct in6_addr ia6;
1046     struct sockaddr_in6 sockin6;
1047 #endif
1048     SOCKET s;
1049 
1050     memset (&sockin, 0, sizeof(sockin));
1051 #ifdef SUPPORT_IP6
1052     memset (&sockin6, 0, sizeof(sockin6));
1053 #endif
1054 
1055 #if !defined(HAVE_GETADDRINFO) && defined(SUPPORT_IP6) && defined(RES_USE_INET6)
1056     if (have_ipv6 ())
1057     {
1058 	if (!(_res.options & RES_INIT))
1059 	    res_init();
1060 	_res.options |= RES_USE_INET6;
1061     }
1062 #endif
1063 
1064 #if defined(HAVE_GETADDRINFO) && defined(SUPPORT_IP6) && !defined(_WIN32)
1065     if (have_ipv6 ())
1066 #endif
1067 #if defined(HAVE_GETADDRINFO) && (defined(SUPPORT_IP6) || defined(_WIN32))
1068     {
1069 	int status;
1070 	struct addrinfo hints, *res, *result;
1071 
1072 	result = NULL;
1073 	memset (&hints, 0,sizeof(hints));
1074 	hints.ai_socktype = SOCK_STREAM;
1075 
1076 	status = getaddrinfo (host, NULL, &hints, &result);
1077 	if (status) {
1078 	    __xmlIOErr(XML_FROM_HTTP, 0, "getaddrinfo failed\n");
1079 	    return INVALID_SOCKET;
1080 	}
1081 
1082 	for (res = result; res; res = res->ai_next) {
1083 	    if (res->ai_family == AF_INET) {
1084 		if ((size_t)res->ai_addrlen > sizeof(sockin)) {
1085 		    __xmlIOErr(XML_FROM_HTTP, 0, "address size mismatch\n");
1086 		    freeaddrinfo (result);
1087 		    return INVALID_SOCKET;
1088 		}
1089 		memcpy (&sockin, res->ai_addr, res->ai_addrlen);
1090 		sockin.sin_port = htons (port);
1091 		addr = (struct sockaddr *)&sockin;
1092 #ifdef SUPPORT_IP6
1093 	    } else if (have_ipv6 () && (res->ai_family == AF_INET6)) {
1094 		if ((size_t)res->ai_addrlen > sizeof(sockin6)) {
1095 		    __xmlIOErr(XML_FROM_HTTP, 0, "address size mismatch\n");
1096 		    freeaddrinfo (result);
1097 		    return INVALID_SOCKET;
1098 		}
1099 		memcpy (&sockin6, res->ai_addr, res->ai_addrlen);
1100 		sockin6.sin6_port = htons (port);
1101 		addr = (struct sockaddr *)&sockin6;
1102 #endif
1103 	    } else
1104 		continue;              /* for */
1105 
1106 	    s = xmlNanoHTTPConnectAttempt (addr);
1107 	    if (s != INVALID_SOCKET) {
1108 		freeaddrinfo (result);
1109 		return (s);
1110 	    }
1111 	}
1112 
1113 	if (result)
1114 	    freeaddrinfo (result);
1115     }
1116 #endif
1117 #if defined(HAVE_GETADDRINFO) && defined(SUPPORT_IP6) && !defined(_WIN32)
1118     else
1119 #endif
1120 #if !defined(HAVE_GETADDRINFO) || !defined(_WIN32)
1121     {
1122         struct hostent *h;
1123         struct in_addr ia;
1124         int i;
1125 
1126 	h = gethostbyname (GETHOSTBYNAME_ARG_CAST host);
1127 	if (h == NULL) {
1128 
1129 /*
1130  * Okay, I got fed up by the non-portability of this error message
1131  * extraction code. it work on Linux, if it work on your platform
1132  * and one want to enable it, send me the defined(foobar) needed
1133  */
1134 #if defined(HAVE_NETDB_H) && defined(HOST_NOT_FOUND) && defined(__linux__)
1135 	    const char *h_err_txt = "";
1136 
1137 	    switch (h_errno) {
1138 		case HOST_NOT_FOUND:
1139 		    h_err_txt = "Authoritative host not found";
1140 		    break;
1141 
1142 		case TRY_AGAIN:
1143 		    h_err_txt =
1144 			"Non-authoritative host not found or server failure.";
1145 		    break;
1146 
1147 		case NO_RECOVERY:
1148 		    h_err_txt =
1149 			"Non-recoverable errors:  FORMERR, REFUSED, or NOTIMP.";
1150 		    break;
1151 
1152 #ifdef NO_ADDRESS
1153 		case NO_ADDRESS:
1154 		    h_err_txt =
1155 			"Valid name, no data record of requested type.";
1156 		    break;
1157 #endif
1158 
1159 		default:
1160 		    h_err_txt = "No error text defined.";
1161 		    break;
1162 	    }
1163 	    __xmlIOErr(XML_FROM_HTTP, 0, h_err_txt);
1164 #else
1165 	    __xmlIOErr(XML_FROM_HTTP, 0, "Failed to resolve host");
1166 #endif
1167 	    return INVALID_SOCKET;
1168 	}
1169 
1170 	for (i = 0; h->h_addr_list[i]; i++) {
1171 	    if (h->h_addrtype == AF_INET) {
1172 		/* A records (IPv4) */
1173 		if ((unsigned int) h->h_length > sizeof(ia)) {
1174 		    __xmlIOErr(XML_FROM_HTTP, 0, "address size mismatch\n");
1175 		    return INVALID_SOCKET;
1176 		}
1177 		memcpy (&ia, h->h_addr_list[i], h->h_length);
1178 		sockin.sin_family = h->h_addrtype;
1179 		sockin.sin_addr = ia;
1180 		sockin.sin_port = (unsigned short)htons ((unsigned short)port);
1181 		addr = (struct sockaddr *) &sockin;
1182 #ifdef SUPPORT_IP6
1183 	    } else if (have_ipv6 () && (h->h_addrtype == AF_INET6)) {
1184 		/* AAAA records (IPv6) */
1185 		if ((unsigned int) h->h_length > sizeof(ia6)) {
1186 		    __xmlIOErr(XML_FROM_HTTP, 0, "address size mismatch\n");
1187 		    return INVALID_SOCKET;
1188 		}
1189 		memcpy (&ia6, h->h_addr_list[i], h->h_length);
1190 		sockin6.sin6_family = h->h_addrtype;
1191 		sockin6.sin6_addr = ia6;
1192 		sockin6.sin6_port = htons (port);
1193 		addr = (struct sockaddr *) &sockin6;
1194 #endif
1195 	    } else
1196 		break;              /* for */
1197 
1198 	    s = xmlNanoHTTPConnectAttempt (addr);
1199 	    if (s != INVALID_SOCKET)
1200 		return (s);
1201 	}
1202     }
1203 #endif
1204 
1205 #ifdef DEBUG_HTTP
1206     xmlGenericError(xmlGenericErrorContext,
1207                     "xmlNanoHTTPConnectHost:  unable to connect to '%s'.\n",
1208                     host);
1209 #endif
1210     return INVALID_SOCKET;
1211 }
1212 
1213 
1214 /**
1215  * xmlNanoHTTPOpen:
1216  * @URL:  The URL to load
1217  * @contentType:  if available the Content-Type information will be
1218  *                returned at that location
1219  *
1220  * This function try to open a connection to the indicated resource
1221  * via HTTP GET.
1222  *
1223  * Returns NULL in case of failure, otherwise a request handler.
1224  *     The contentType, if provided must be freed by the caller
1225  */
1226 
1227 void*
xmlNanoHTTPOpen(const char * URL,char ** contentType)1228 xmlNanoHTTPOpen(const char *URL, char **contentType) {
1229     if (contentType != NULL) *contentType = NULL;
1230     return(xmlNanoHTTPMethod(URL, NULL, NULL, contentType, NULL, 0));
1231 }
1232 
1233 /**
1234  * xmlNanoHTTPOpenRedir:
1235  * @URL:  The URL to load
1236  * @contentType:  if available the Content-Type information will be
1237  *                returned at that location
1238  * @redir: if available the redirected URL will be returned
1239  *
1240  * This function try to open a connection to the indicated resource
1241  * via HTTP GET.
1242  *
1243  * Returns NULL in case of failure, otherwise a request handler.
1244  *     The contentType, if provided must be freed by the caller
1245  */
1246 
1247 void*
xmlNanoHTTPOpenRedir(const char * URL,char ** contentType,char ** redir)1248 xmlNanoHTTPOpenRedir(const char *URL, char **contentType, char **redir) {
1249     if (contentType != NULL) *contentType = NULL;
1250     if (redir != NULL) *redir = NULL;
1251     return(xmlNanoHTTPMethodRedir(URL, NULL, NULL, contentType, redir, NULL,0));
1252 }
1253 
1254 /**
1255  * xmlNanoHTTPRead:
1256  * @ctx:  the HTTP context
1257  * @dest:  a buffer
1258  * @len:  the buffer length
1259  *
1260  * This function tries to read @len bytes from the existing HTTP connection
1261  * and saves them in @dest. This is a blocking call.
1262  *
1263  * Returns the number of byte read. 0 is an indication of an end of connection.
1264  *         -1 indicates a parameter error.
1265  */
1266 int
xmlNanoHTTPRead(void * ctx,void * dest,int len)1267 xmlNanoHTTPRead(void *ctx, void *dest, int len) {
1268     xmlNanoHTTPCtxtPtr ctxt = (xmlNanoHTTPCtxtPtr) ctx;
1269 #ifdef LIBXML_ZLIB_ENABLED
1270     int bytes_read = 0;
1271     int orig_avail_in;
1272     int z_ret;
1273 #endif
1274 
1275     if (ctx == NULL) return(-1);
1276     if (dest == NULL) return(-1);
1277     if (len <= 0) return(0);
1278 
1279 #ifdef LIBXML_ZLIB_ENABLED
1280     if (ctxt->usesGzip == 1) {
1281         if (ctxt->strm == NULL) return(0);
1282 
1283         ctxt->strm->next_out = dest;
1284         ctxt->strm->avail_out = len;
1285 	ctxt->strm->avail_in = ctxt->inptr - ctxt->inrptr;
1286 
1287         while (ctxt->strm->avail_out > 0 &&
1288 	       (ctxt->strm->avail_in > 0 || xmlNanoHTTPRecv(ctxt) > 0)) {
1289             orig_avail_in = ctxt->strm->avail_in =
1290 			    ctxt->inptr - ctxt->inrptr - bytes_read;
1291             ctxt->strm->next_in = BAD_CAST (ctxt->inrptr + bytes_read);
1292 
1293             z_ret = inflate(ctxt->strm, Z_NO_FLUSH);
1294             bytes_read += orig_avail_in - ctxt->strm->avail_in;
1295 
1296             if (z_ret != Z_OK) break;
1297 	}
1298 
1299         ctxt->inrptr += bytes_read;
1300         return(len - ctxt->strm->avail_out);
1301     }
1302 #endif
1303 
1304     while (ctxt->inptr - ctxt->inrptr < len) {
1305         if (xmlNanoHTTPRecv(ctxt) <= 0) break;
1306     }
1307     if (ctxt->inptr - ctxt->inrptr < len)
1308         len = ctxt->inptr - ctxt->inrptr;
1309     memcpy(dest, ctxt->inrptr, len);
1310     ctxt->inrptr += len;
1311     return(len);
1312 }
1313 
1314 /**
1315  * xmlNanoHTTPClose:
1316  * @ctx:  the HTTP context
1317  *
1318  * This function closes an HTTP context, it ends up the connection and
1319  * free all data related to it.
1320  */
1321 void
xmlNanoHTTPClose(void * ctx)1322 xmlNanoHTTPClose(void *ctx) {
1323     xmlNanoHTTPCtxtPtr ctxt = (xmlNanoHTTPCtxtPtr) ctx;
1324 
1325     if (ctx == NULL) return;
1326 
1327     xmlNanoHTTPFreeCtxt(ctxt);
1328 }
1329 
1330 /**
1331  * xmlNanoHTTPMethodRedir:
1332  * @URL:  The URL to load
1333  * @method:  the HTTP method to use
1334  * @input:  the input string if any
1335  * @contentType:  the Content-Type information IN and OUT
1336  * @redir:  the redirected URL OUT
1337  * @headers:  the extra headers
1338  * @ilen:  input length
1339  *
1340  * This function try to open a connection to the indicated resource
1341  * via HTTP using the given @method, adding the given extra headers
1342  * and the input buffer for the request content.
1343  *
1344  * Returns NULL in case of failure, otherwise a request handler.
1345  *     The contentType, or redir, if provided must be freed by the caller
1346  */
1347 
1348 void*
xmlNanoHTTPMethodRedir(const char * URL,const char * method,const char * input,char ** contentType,char ** redir,const char * headers,int ilen)1349 xmlNanoHTTPMethodRedir(const char *URL, const char *method, const char *input,
1350                   char **contentType, char **redir,
1351 		  const char *headers, int ilen ) {
1352     xmlNanoHTTPCtxtPtr ctxt;
1353     char *bp, *p;
1354     int blen;
1355     SOCKET ret;
1356     int nbRedirects = 0;
1357     char *redirURL = NULL;
1358 #ifdef DEBUG_HTTP
1359     int xmt_bytes;
1360 #endif
1361 
1362     if (URL == NULL) return(NULL);
1363     if (method == NULL) method = "GET";
1364     xmlNanoHTTPInit();
1365 
1366 retry:
1367     if (redirURL == NULL) {
1368 	ctxt = xmlNanoHTTPNewCtxt(URL);
1369 	if (ctxt == NULL)
1370 	    return(NULL);
1371     } else {
1372 	ctxt = xmlNanoHTTPNewCtxt(redirURL);
1373 	if (ctxt == NULL)
1374 	    return(NULL);
1375 	ctxt->location = xmlMemStrdup(redirURL);
1376     }
1377 
1378     if ((ctxt->protocol == NULL) || (strcmp(ctxt->protocol, "http"))) {
1379 	__xmlIOErr(XML_FROM_HTTP, XML_HTTP_URL_SYNTAX, "Not a valid HTTP URI");
1380         xmlNanoHTTPFreeCtxt(ctxt);
1381 	if (redirURL != NULL) xmlFree(redirURL);
1382         return(NULL);
1383     }
1384     if (ctxt->hostname == NULL) {
1385 	__xmlIOErr(XML_FROM_HTTP, XML_HTTP_UNKNOWN_HOST,
1386 	           "Failed to identify host in URI");
1387         xmlNanoHTTPFreeCtxt(ctxt);
1388 	if (redirURL != NULL) xmlFree(redirURL);
1389         return(NULL);
1390     }
1391     if (proxy) {
1392 	blen = strlen(ctxt->hostname) * 2 + 16;
1393 	ret = xmlNanoHTTPConnectHost(proxy, proxyPort);
1394     }
1395     else {
1396 	blen = strlen(ctxt->hostname);
1397 	ret = xmlNanoHTTPConnectHost(ctxt->hostname, ctxt->port);
1398     }
1399     if (ret == INVALID_SOCKET) {
1400         xmlNanoHTTPFreeCtxt(ctxt);
1401 	if (redirURL != NULL) xmlFree(redirURL);
1402         return(NULL);
1403     }
1404     ctxt->fd = ret;
1405 
1406     if (input == NULL)
1407 	ilen = 0;
1408     else
1409 	blen += 36;
1410 
1411     if (headers != NULL)
1412 	blen += strlen(headers) + 2;
1413     if (contentType && *contentType)
1414 	/* reserve for string plus 'Content-Type: \r\n" */
1415 	blen += strlen(*contentType) + 16;
1416     if (ctxt->query != NULL)
1417 	/* 1 for '?' */
1418 	blen += strlen(ctxt->query) + 1;
1419     blen += strlen(method) + strlen(ctxt->path) + 24;
1420 #ifdef LIBXML_ZLIB_ENABLED
1421     /* reserve for possible 'Accept-Encoding: gzip' string */
1422     blen += 23;
1423 #endif
1424     if (ctxt->port != 80) {
1425 	/* reserve space for ':xxxxx', incl. potential proxy */
1426 	if (proxy)
1427 	    blen += 17;
1428 	else
1429 	    blen += 11;
1430     }
1431     bp = (char*)xmlMallocAtomic(blen);
1432     if ( bp == NULL ) {
1433         xmlNanoHTTPFreeCtxt( ctxt );
1434 	xmlHTTPErrMemory("allocating header buffer");
1435 	return ( NULL );
1436     }
1437 
1438     p = bp;
1439 
1440     if (proxy) {
1441 	if (ctxt->port != 80) {
1442 	    p += snprintf( p, blen - (p - bp), "%s http://%s:%d%s",
1443 			method, ctxt->hostname,
1444 			ctxt->port, ctxt->path );
1445 	}
1446 	else
1447 	    p += snprintf( p, blen - (p - bp), "%s http://%s%s", method,
1448 			ctxt->hostname, ctxt->path);
1449     }
1450     else
1451 	p += snprintf( p, blen - (p - bp), "%s %s", method, ctxt->path);
1452 
1453     if (ctxt->query != NULL)
1454 	p += snprintf( p, blen - (p - bp), "?%s", ctxt->query);
1455 
1456     if (ctxt->port == 80) {
1457         p += snprintf( p, blen - (p - bp), " HTTP/1.0\r\nHost: %s\r\n",
1458 		    ctxt->hostname);
1459     } else {
1460         p += snprintf( p, blen - (p - bp), " HTTP/1.0\r\nHost: %s:%d\r\n",
1461 		    ctxt->hostname, ctxt->port);
1462     }
1463 
1464 #ifdef LIBXML_ZLIB_ENABLED
1465     p += snprintf(p, blen - (p - bp), "Accept-Encoding: gzip\r\n");
1466 #endif
1467 
1468     if (contentType != NULL && *contentType)
1469 	p += snprintf(p, blen - (p - bp), "Content-Type: %s\r\n", *contentType);
1470 
1471     if (headers != NULL)
1472 	p += snprintf( p, blen - (p - bp), "%s", headers );
1473 
1474     if (input != NULL)
1475 	snprintf(p, blen - (p - bp), "Content-Length: %d\r\n\r\n", ilen );
1476     else
1477 	snprintf(p, blen - (p - bp), "\r\n");
1478 
1479 #ifdef DEBUG_HTTP
1480     xmlGenericError(xmlGenericErrorContext,
1481 	    "-> %s%s", proxy? "(Proxy) " : "", bp);
1482     if ((blen -= strlen(bp)+1) < 0)
1483 	xmlGenericError(xmlGenericErrorContext,
1484 		"ERROR: overflowed buffer by %d bytes\n", -blen);
1485 #endif
1486     ctxt->outptr = ctxt->out = bp;
1487     ctxt->state = XML_NANO_HTTP_WRITE;
1488     blen = strlen( ctxt->out );
1489 #ifdef DEBUG_HTTP
1490     xmt_bytes = xmlNanoHTTPSend(ctxt, ctxt->out, blen );
1491     if ( xmt_bytes != blen )
1492         xmlGenericError( xmlGenericErrorContext,
1493 			"xmlNanoHTTPMethodRedir:  Only %d of %d %s %s\n",
1494 			xmt_bytes, blen,
1495 			"bytes of HTTP headers sent to host",
1496 			ctxt->hostname );
1497 #else
1498     xmlNanoHTTPSend(ctxt, ctxt->out, blen );
1499 #endif
1500 
1501     if ( input != NULL ) {
1502 #ifdef DEBUG_HTTP
1503         xmt_bytes = xmlNanoHTTPSend( ctxt, input, ilen );
1504 
1505 	if ( xmt_bytes != ilen )
1506 	    xmlGenericError( xmlGenericErrorContext,
1507 			"xmlNanoHTTPMethodRedir:  Only %d of %d %s %s\n",
1508 			xmt_bytes, ilen,
1509 			"bytes of HTTP content sent to host",
1510 			ctxt->hostname );
1511 #else
1512 	xmlNanoHTTPSend( ctxt, input, ilen );
1513 #endif
1514     }
1515 
1516     ctxt->state = XML_NANO_HTTP_READ;
1517 
1518     while ((p = xmlNanoHTTPReadLine(ctxt)) != NULL) {
1519         if (*p == 0) {
1520 	    ctxt->content = ctxt->inrptr;
1521 	    xmlFree(p);
1522 	    break;
1523 	}
1524 	xmlNanoHTTPScanAnswer(ctxt, p);
1525 
1526 #ifdef DEBUG_HTTP
1527 	xmlGenericError(xmlGenericErrorContext, "<- %s\n", p);
1528 #endif
1529         xmlFree(p);
1530     }
1531 
1532     if ((ctxt->location != NULL) && (ctxt->returnValue >= 300) &&
1533         (ctxt->returnValue < 400)) {
1534 #ifdef DEBUG_HTTP
1535 	xmlGenericError(xmlGenericErrorContext,
1536 		"\nRedirect to: %s\n", ctxt->location);
1537 #endif
1538 	while ( xmlNanoHTTPRecv(ctxt) > 0 )
1539             ;
1540         if (nbRedirects < XML_NANO_HTTP_MAX_REDIR) {
1541 	    nbRedirects++;
1542 	    if (redirURL != NULL)
1543 		xmlFree(redirURL);
1544 	    redirURL = xmlMemStrdup(ctxt->location);
1545 	    xmlNanoHTTPFreeCtxt(ctxt);
1546 	    goto retry;
1547 	}
1548 	xmlNanoHTTPFreeCtxt(ctxt);
1549 	if (redirURL != NULL) xmlFree(redirURL);
1550 #ifdef DEBUG_HTTP
1551 	xmlGenericError(xmlGenericErrorContext,
1552 		"xmlNanoHTTPMethodRedir: Too many redirects, aborting ...\n");
1553 #endif
1554 	return(NULL);
1555     }
1556 
1557     if (contentType != NULL) {
1558 	if (ctxt->contentType != NULL)
1559 	    *contentType = xmlMemStrdup(ctxt->contentType);
1560 	else
1561 	    *contentType = NULL;
1562     }
1563 
1564     if ((redir != NULL) && (redirURL != NULL)) {
1565 	*redir = redirURL;
1566     } else {
1567 	if (redirURL != NULL)
1568 	    xmlFree(redirURL);
1569 	if (redir != NULL)
1570 	    *redir = NULL;
1571     }
1572 
1573 #ifdef DEBUG_HTTP
1574     if (ctxt->contentType != NULL)
1575 	xmlGenericError(xmlGenericErrorContext,
1576 		"\nCode %d, content-type '%s'\n\n",
1577 	       ctxt->returnValue, ctxt->contentType);
1578     else
1579 	xmlGenericError(xmlGenericErrorContext,
1580 		"\nCode %d, no content-type\n\n",
1581 	       ctxt->returnValue);
1582 #endif
1583 
1584     return((void *) ctxt);
1585 }
1586 
1587 /**
1588  * xmlNanoHTTPMethod:
1589  * @URL:  The URL to load
1590  * @method:  the HTTP method to use
1591  * @input:  the input string if any
1592  * @contentType:  the Content-Type information IN and OUT
1593  * @headers:  the extra headers
1594  * @ilen:  input length
1595  *
1596  * This function try to open a connection to the indicated resource
1597  * via HTTP using the given @method, adding the given extra headers
1598  * and the input buffer for the request content.
1599  *
1600  * Returns NULL in case of failure, otherwise a request handler.
1601  *     The contentType, if provided must be freed by the caller
1602  */
1603 
1604 void*
xmlNanoHTTPMethod(const char * URL,const char * method,const char * input,char ** contentType,const char * headers,int ilen)1605 xmlNanoHTTPMethod(const char *URL, const char *method, const char *input,
1606                   char **contentType, const char *headers, int ilen) {
1607     return(xmlNanoHTTPMethodRedir(URL, method, input, contentType,
1608 		                  NULL, headers, ilen));
1609 }
1610 
1611 /**
1612  * xmlNanoHTTPFetch:
1613  * @URL:  The URL to load
1614  * @filename:  the filename where the content should be saved
1615  * @contentType:  if available the Content-Type information will be
1616  *                returned at that location
1617  *
1618  * This function try to fetch the indicated resource via HTTP GET
1619  * and save it's content in the file.
1620  *
1621  * Returns -1 in case of failure, 0 in case of success. The contentType,
1622  *     if provided must be freed by the caller
1623  */
1624 int
xmlNanoHTTPFetch(const char * URL,const char * filename,char ** contentType)1625 xmlNanoHTTPFetch(const char *URL, const char *filename, char **contentType) {
1626     void *ctxt = NULL;
1627     char *buf = NULL;
1628     int fd;
1629     int len;
1630     int ret = 0;
1631 
1632     if (filename == NULL) return(-1);
1633     ctxt = xmlNanoHTTPOpen(URL, contentType);
1634     if (ctxt == NULL) return(-1);
1635 
1636     if (!strcmp(filename, "-"))
1637         fd = 0;
1638     else {
1639         fd = open(filename, O_CREAT | O_WRONLY, 00644);
1640 	if (fd < 0) {
1641 	    xmlNanoHTTPClose(ctxt);
1642 	    if ((contentType != NULL) && (*contentType != NULL)) {
1643 	        xmlFree(*contentType);
1644 		*contentType = NULL;
1645 	    }
1646 	    return(-1);
1647 	}
1648     }
1649 
1650     xmlNanoHTTPFetchContent( ctxt, &buf, &len );
1651     if ( len > 0 ) {
1652 	if (write(fd, buf, len) == -1) {
1653 	    ret = -1;
1654 	}
1655     }
1656 
1657     xmlNanoHTTPClose(ctxt);
1658     close(fd);
1659     return(ret);
1660 }
1661 
1662 #ifdef LIBXML_OUTPUT_ENABLED
1663 /**
1664  * xmlNanoHTTPSave:
1665  * @ctxt:  the HTTP context
1666  * @filename:  the filename where the content should be saved
1667  *
1668  * This function saves the output of the HTTP transaction to a file
1669  * It closes and free the context at the end
1670  *
1671  * Returns -1 in case of failure, 0 in case of success.
1672  */
1673 int
xmlNanoHTTPSave(void * ctxt,const char * filename)1674 xmlNanoHTTPSave(void *ctxt, const char *filename) {
1675     char *buf = NULL;
1676     int fd;
1677     int len;
1678     int ret = 0;
1679 
1680     if ((ctxt == NULL) || (filename == NULL)) return(-1);
1681 
1682     if (!strcmp(filename, "-"))
1683         fd = 0;
1684     else {
1685         fd = open(filename, O_CREAT | O_WRONLY, 0666);
1686 	if (fd < 0) {
1687 	    xmlNanoHTTPClose(ctxt);
1688 	    return(-1);
1689 	}
1690     }
1691 
1692     xmlNanoHTTPFetchContent( ctxt, &buf, &len );
1693     if ( len > 0 ) {
1694 	if (write(fd, buf, len) == -1) {
1695 	    ret = -1;
1696 	}
1697     }
1698 
1699     xmlNanoHTTPClose(ctxt);
1700     close(fd);
1701     return(ret);
1702 }
1703 #endif /* LIBXML_OUTPUT_ENABLED */
1704 
1705 /**
1706  * xmlNanoHTTPReturnCode:
1707  * @ctx:  the HTTP context
1708  *
1709  * Get the latest HTTP return code received
1710  *
1711  * Returns the HTTP return code for the request.
1712  */
1713 int
xmlNanoHTTPReturnCode(void * ctx)1714 xmlNanoHTTPReturnCode(void *ctx) {
1715     xmlNanoHTTPCtxtPtr ctxt = (xmlNanoHTTPCtxtPtr) ctx;
1716 
1717     if (ctxt == NULL) return(-1);
1718 
1719     return(ctxt->returnValue);
1720 }
1721 
1722 /**
1723  * xmlNanoHTTPAuthHeader:
1724  * @ctx:  the HTTP context
1725  *
1726  * Get the authentication header of an HTTP context
1727  *
1728  * Returns the stashed value of the WWW-Authenticate or Proxy-Authenticate
1729  * header.
1730  */
1731 const char *
xmlNanoHTTPAuthHeader(void * ctx)1732 xmlNanoHTTPAuthHeader(void *ctx) {
1733     xmlNanoHTTPCtxtPtr ctxt = (xmlNanoHTTPCtxtPtr) ctx;
1734 
1735     if (ctxt == NULL) return(NULL);
1736 
1737     return(ctxt->authHeader);
1738 }
1739 
1740 /**
1741  * xmlNanoHTTPContentLength:
1742  * @ctx:  the HTTP context
1743  *
1744  * Provides the specified content length from the HTTP header.
1745  *
1746  * Return the specified content length from the HTTP header.  Note that
1747  * a value of -1 indicates that the content length element was not included in
1748  * the response header.
1749  */
1750 int
xmlNanoHTTPContentLength(void * ctx)1751 xmlNanoHTTPContentLength( void * ctx ) {
1752     xmlNanoHTTPCtxtPtr	ctxt = (xmlNanoHTTPCtxtPtr)ctx;
1753 
1754     return ( ( ctxt == NULL ) ? -1 : ctxt->ContentLength );
1755 }
1756 
1757 /**
1758  * xmlNanoHTTPRedir:
1759  * @ctx:  the HTTP context
1760  *
1761  * Provides the specified redirection URL if available from the HTTP header.
1762  *
1763  * Return the specified redirection URL or NULL if not redirected.
1764  */
1765 const char *
xmlNanoHTTPRedir(void * ctx)1766 xmlNanoHTTPRedir( void * ctx ) {
1767     xmlNanoHTTPCtxtPtr	ctxt = (xmlNanoHTTPCtxtPtr)ctx;
1768 
1769     return ( ( ctxt == NULL ) ? NULL : ctxt->location );
1770 }
1771 
1772 /**
1773  * xmlNanoHTTPEncoding:
1774  * @ctx:  the HTTP context
1775  *
1776  * Provides the specified encoding if specified in the HTTP headers.
1777  *
1778  * Return the specified encoding or NULL if not available
1779  */
1780 const char *
xmlNanoHTTPEncoding(void * ctx)1781 xmlNanoHTTPEncoding( void * ctx ) {
1782     xmlNanoHTTPCtxtPtr	ctxt = (xmlNanoHTTPCtxtPtr)ctx;
1783 
1784     return ( ( ctxt == NULL ) ? NULL : ctxt->encoding );
1785 }
1786 
1787 /**
1788  * xmlNanoHTTPMimeType:
1789  * @ctx:  the HTTP context
1790  *
1791  * Provides the specified Mime-Type if specified in the HTTP headers.
1792  *
1793  * Return the specified Mime-Type or NULL if not available
1794  */
1795 const char *
xmlNanoHTTPMimeType(void * ctx)1796 xmlNanoHTTPMimeType( void * ctx ) {
1797     xmlNanoHTTPCtxtPtr	ctxt = (xmlNanoHTTPCtxtPtr)ctx;
1798 
1799     return ( ( ctxt == NULL ) ? NULL : ctxt->mimeType );
1800 }
1801 
1802 /**
1803  * xmlNanoHTTPFetchContent:
1804  * @ctx:  the HTTP context
1805  * @ptr:  pointer to set to the content buffer.
1806  * @len:  integer pointer to hold the length of the content
1807  *
1808  * Check if all the content was read
1809  *
1810  * Returns 0 if all the content was read and available, returns
1811  * -1 if received content length was less than specified or an error
1812  * occurred.
1813  */
1814 static int
xmlNanoHTTPFetchContent(void * ctx,char ** ptr,int * len)1815 xmlNanoHTTPFetchContent( void * ctx, char ** ptr, int * len ) {
1816     xmlNanoHTTPCtxtPtr	ctxt = (xmlNanoHTTPCtxtPtr)ctx;
1817 
1818     int			rc = 0;
1819     int			cur_lgth;
1820     int			rcvd_lgth;
1821     int			dummy_int;
1822     char *		dummy_ptr = NULL;
1823 
1824     /*  Dummy up return input parameters if not provided  */
1825 
1826     if ( len == NULL )
1827         len = &dummy_int;
1828 
1829     if ( ptr == NULL )
1830         ptr = &dummy_ptr;
1831 
1832     /*  But can't work without the context pointer  */
1833 
1834     if ( ( ctxt == NULL ) || ( ctxt->content == NULL ) ) {
1835         *len = 0;
1836 	*ptr = NULL;
1837 	return ( -1 );
1838     }
1839 
1840     rcvd_lgth = ctxt->inptr - ctxt->content;
1841 
1842     while ( (cur_lgth = xmlNanoHTTPRecv( ctxt )) > 0 ) {
1843 
1844 	rcvd_lgth += cur_lgth;
1845 	if ( (ctxt->ContentLength > 0) && (rcvd_lgth >= ctxt->ContentLength) )
1846 	    break;
1847     }
1848 
1849     *ptr = ctxt->content;
1850     *len = rcvd_lgth;
1851 
1852     if ( ( ctxt->ContentLength > 0 ) && ( rcvd_lgth < ctxt->ContentLength ) )
1853         rc = -1;
1854     else if ( rcvd_lgth == 0 )
1855 	rc = -1;
1856 
1857     return ( rc );
1858 }
1859 
1860 #ifdef STANDALONE
main(int argc,char ** argv)1861 int main(int argc, char **argv) {
1862     char *contentType = NULL;
1863 
1864     if (argv[1] != NULL) {
1865 	if (argv[2] != NULL)
1866 	    xmlNanoHTTPFetch(argv[1], argv[2], &contentType);
1867         else
1868 	    xmlNanoHTTPFetch(argv[1], "-", &contentType);
1869 	if (contentType != NULL) xmlFree(contentType);
1870     } else {
1871         xmlGenericError(xmlGenericErrorContext,
1872 		"%s: minimal HTTP GET implementation\n", argv[0]);
1873         xmlGenericError(xmlGenericErrorContext,
1874 		"\tusage %s [ URL [ filename ] ]\n", argv[0]);
1875     }
1876     xmlNanoHTTPCleanup();
1877     xmlMemoryDump();
1878     return(0);
1879 }
1880 #endif /* STANDALONE */
1881 #else /* !LIBXML_HTTP_ENABLED */
1882 #ifdef STANDALONE
1883 #include <stdio.h>
main(int argc,char ** argv)1884 int main(int argc, char **argv) {
1885     xmlGenericError(xmlGenericErrorContext,
1886 	    "%s : HTTP support not compiled in\n", argv[0]);
1887     return(0);
1888 }
1889 #endif /* STANDALONE */
1890 #endif /* LIBXML_HTTP_ENABLED */
1891