1 /* 2 * nanohttp.c: minimalist HTTP GET implementation to fetch external subsets. 3 * focuses on size, streamability, reentrancy and portability 4 * 5 * This is clearly not a general purpose HTTP implementation 6 * If you look for one, check: 7 * http://www.w3.org/Library/ 8 * 9 * See Copyright for the status of this software. 10 * 11 * daniel@veillard.com 12 */ 13 14 #define IN_LIBXML 15 #include "libxml.h" 16 17 #ifdef LIBXML_HTTP_ENABLED 18 #include <string.h> 19 #include <stdlib.h> 20 #include <errno.h> 21 22 #ifdef HAVE_UNISTD_H 23 #include <unistd.h> 24 #endif 25 #ifdef HAVE_SYS_TYPES_H 26 #include <sys/types.h> 27 #endif 28 #ifdef HAVE_SYS_SOCKET_H 29 #include <sys/socket.h> 30 #endif 31 #ifdef HAVE_NETINET_IN_H 32 #include <netinet/in.h> 33 #endif 34 #ifdef HAVE_ARPA_INET_H 35 #include <arpa/inet.h> 36 #endif 37 #ifdef HAVE_NETDB_H 38 #include <netdb.h> 39 #endif 40 #ifdef HAVE_RESOLV_H 41 #ifdef HAVE_ARPA_NAMESER_H 42 #include <arpa/nameser.h> 43 #endif 44 #include <resolv.h> 45 #endif 46 #ifdef HAVE_FCNTL_H 47 #include <fcntl.h> 48 #endif 49 #ifdef HAVE_SYS_TIME_H 50 #include <sys/time.h> 51 #endif 52 #ifndef HAVE_POLL_H 53 #ifdef HAVE_SYS_SELECT_H 54 #include <sys/select.h> 55 #endif 56 #else 57 #include <poll.h> 58 #endif 59 #ifdef LIBXML_ZLIB_ENABLED 60 #include <zlib.h> 61 #endif 62 63 64 #ifdef VMS 65 #include <stropts> 66 #define XML_SOCKLEN_T unsigned int 67 #endif 68 69 #if defined(_WIN32) 70 #include <wsockcompat.h> 71 #endif 72 73 #include <libxml/globals.h> 74 #include <libxml/xmlerror.h> 75 #include <libxml/xmlmemory.h> 76 #include <libxml/parser.h> /* for xmlStr(n)casecmp() */ 77 #include <libxml/nanohttp.h> 78 #include <libxml/globals.h> 79 #include <libxml/uri.h> 80 81 /** 82 * A couple portability macros 83 */ 84 #ifndef _WINSOCKAPI_ 85 #if !defined(__BEOS__) || defined(__HAIKU__) 86 #define closesocket(s) close(s) 87 #endif 88 #define SOCKET int 89 #define INVALID_SOCKET (-1) 90 #endif 91 92 #ifdef __BEOS__ 93 #ifndef PF_INET 94 #define PF_INET AF_INET 95 #endif 96 #endif 97 98 #ifndef XML_SOCKLEN_T 99 #define XML_SOCKLEN_T unsigned int 100 #endif 101 102 #ifdef STANDALONE 103 #define DEBUG_HTTP 104 #define xmlStrncasecmp(a, b, n) strncasecmp((char *)a, (char *)b, n) 105 #define xmlStrcasecmpi(a, b) strcasecmp((char *)a, (char *)b) 106 #endif 107 108 #define XML_NANO_HTTP_MAX_REDIR 10 109 110 #define XML_NANO_HTTP_CHUNK 4096 111 112 #define XML_NANO_HTTP_CLOSED 0 113 #define XML_NANO_HTTP_WRITE 1 114 #define XML_NANO_HTTP_READ 2 115 #define XML_NANO_HTTP_NONE 4 116 117 typedef struct xmlNanoHTTPCtxt { 118 char *protocol; /* the protocol name */ 119 char *hostname; /* the host name */ 120 int port; /* the port */ 121 char *path; /* the path within the URL */ 122 char *query; /* the query string */ 123 SOCKET fd; /* the file descriptor for the socket */ 124 int state; /* WRITE / READ / CLOSED */ 125 char *out; /* buffer sent (zero terminated) */ 126 char *outptr; /* index within the buffer sent */ 127 char *in; /* the receiving buffer */ 128 char *content; /* the start of the content */ 129 char *inptr; /* the next byte to read from network */ 130 char *inrptr; /* the next byte to give back to the client */ 131 int inlen; /* len of the input buffer */ 132 int last; /* return code for last operation */ 133 int returnValue; /* the protocol return value */ 134 int version; /* the protocol version */ 135 int ContentLength; /* specified content length from HTTP header */ 136 char *contentType; /* the MIME type for the input */ 137 char *location; /* the new URL in case of redirect */ 138 char *authHeader; /* contents of {WWW,Proxy}-Authenticate header */ 139 char *encoding; /* encoding extracted from the contentType */ 140 char *mimeType; /* Mime-Type extracted from the contentType */ 141 #ifdef LIBXML_ZLIB_ENABLED 142 z_stream *strm; /* Zlib stream object */ 143 int usesGzip; /* "Content-Encoding: gzip" was detected */ 144 #endif 145 } xmlNanoHTTPCtxt, *xmlNanoHTTPCtxtPtr; 146 147 static int initialized = 0; 148 static char *proxy = NULL; /* the proxy name if any */ 149 static int proxyPort; /* the proxy port if any */ 150 static unsigned int timeout = 60;/* the select() timeout in seconds */ 151 152 static int xmlNanoHTTPFetchContent( void * ctx, char ** ptr, int * len ); 153 154 /** 155 * xmlHTTPErrMemory: 156 * @extra: extra information 157 * 158 * Handle an out of memory condition 159 */ 160 static void 161 xmlHTTPErrMemory(const char *extra) 162 { 163 __xmlSimpleError(XML_FROM_HTTP, XML_ERR_NO_MEMORY, NULL, NULL, extra); 164 } 165 166 /** 167 * A portability function 168 */ 169 static int socket_errno(void) { 170 #ifdef _WINSOCKAPI_ 171 int err = WSAGetLastError(); 172 switch(err) { 173 case WSAECONNRESET: 174 return(ECONNRESET); 175 case WSAEINPROGRESS: 176 return(EINPROGRESS); 177 case WSAEINTR: 178 return(EINTR); 179 case WSAESHUTDOWN: 180 return(ESHUTDOWN); 181 case WSAEWOULDBLOCK: 182 return(EWOULDBLOCK); 183 default: 184 return(err); 185 } 186 #else 187 return(errno); 188 #endif 189 } 190 191 #ifdef SUPPORT_IP6 192 static 193 int have_ipv6(void) { 194 SOCKET s; 195 196 s = socket (AF_INET6, SOCK_STREAM, 0); 197 if (s != INVALID_SOCKET) { 198 close (s); 199 return (1); 200 } 201 return (0); 202 } 203 #endif 204 205 /** 206 * xmlNanoHTTPInit: 207 * 208 * Initialize the HTTP protocol layer. 209 * Currently it just checks for proxy information 210 */ 211 212 void 213 xmlNanoHTTPInit(void) { 214 const char *env; 215 #ifdef _WINSOCKAPI_ 216 WSADATA wsaData; 217 #endif 218 219 if (initialized) 220 return; 221 222 #ifdef _WINSOCKAPI_ 223 if (WSAStartup(MAKEWORD(1, 1), &wsaData) != 0) 224 return; 225 #endif 226 227 if (proxy == NULL) { 228 proxyPort = 80; 229 env = getenv("no_proxy"); 230 if (env && ((env[0] == '*') && (env[1] == 0))) 231 goto done; 232 env = getenv("http_proxy"); 233 if (env != NULL) { 234 xmlNanoHTTPScanProxy(env); 235 goto done; 236 } 237 env = getenv("HTTP_PROXY"); 238 if (env != NULL) { 239 xmlNanoHTTPScanProxy(env); 240 goto done; 241 } 242 } 243 done: 244 initialized = 1; 245 } 246 247 /** 248 * xmlNanoHTTPCleanup: 249 * 250 * Cleanup the HTTP protocol layer. 251 */ 252 253 void 254 xmlNanoHTTPCleanup(void) { 255 if (proxy != NULL) { 256 xmlFree(proxy); 257 proxy = NULL; 258 } 259 #ifdef _WINSOCKAPI_ 260 if (initialized) 261 WSACleanup(); 262 #endif 263 initialized = 0; 264 return; 265 } 266 267 /** 268 * xmlNanoHTTPScanURL: 269 * @ctxt: an HTTP context 270 * @URL: The URL used to initialize the context 271 * 272 * (Re)Initialize an HTTP context by parsing the URL and finding 273 * the protocol host port and path it indicates. 274 */ 275 276 static void 277 xmlNanoHTTPScanURL(xmlNanoHTTPCtxtPtr ctxt, const char *URL) { 278 xmlURIPtr uri; 279 int len; 280 281 /* 282 * Clear any existing data from the context 283 */ 284 if (ctxt->protocol != NULL) { 285 xmlFree(ctxt->protocol); 286 ctxt->protocol = NULL; 287 } 288 if (ctxt->hostname != NULL) { 289 xmlFree(ctxt->hostname); 290 ctxt->hostname = NULL; 291 } 292 if (ctxt->path != NULL) { 293 xmlFree(ctxt->path); 294 ctxt->path = NULL; 295 } 296 if (ctxt->query != NULL) { 297 xmlFree(ctxt->query); 298 ctxt->query = NULL; 299 } 300 if (URL == NULL) return; 301 302 uri = xmlParseURIRaw(URL, 1); 303 if (uri == NULL) 304 return; 305 306 if ((uri->scheme == NULL) || (uri->server == NULL)) { 307 xmlFreeURI(uri); 308 return; 309 } 310 311 ctxt->protocol = xmlMemStrdup(uri->scheme); 312 /* special case of IPv6 addresses, the [] need to be removed */ 313 if ((uri->server != NULL) && (*uri->server == '[')) { 314 len = strlen(uri->server); 315 if ((len > 2) && (uri->server[len - 1] == ']')) { 316 ctxt->hostname = (char *) xmlCharStrndup(uri->server + 1, len -2); 317 } else 318 ctxt->hostname = xmlMemStrdup(uri->server); 319 } else 320 ctxt->hostname = xmlMemStrdup(uri->server); 321 if (uri->path != NULL) 322 ctxt->path = xmlMemStrdup(uri->path); 323 else 324 ctxt->path = xmlMemStrdup("/"); 325 if (uri->query != NULL) 326 ctxt->query = xmlMemStrdup(uri->query); 327 if (uri->port != 0) 328 ctxt->port = uri->port; 329 330 xmlFreeURI(uri); 331 } 332 333 /** 334 * xmlNanoHTTPScanProxy: 335 * @URL: The proxy URL used to initialize the proxy context 336 * 337 * (Re)Initialize the HTTP Proxy context by parsing the URL and finding 338 * the protocol host port it indicates. 339 * Should be like http://myproxy/ or http://myproxy:3128/ 340 * A NULL URL cleans up proxy information. 341 */ 342 343 void 344 xmlNanoHTTPScanProxy(const char *URL) { 345 xmlURIPtr uri; 346 347 if (proxy != NULL) { 348 xmlFree(proxy); 349 proxy = NULL; 350 } 351 proxyPort = 0; 352 353 #ifdef DEBUG_HTTP 354 if (URL == NULL) 355 xmlGenericError(xmlGenericErrorContext, 356 "Removing HTTP proxy info\n"); 357 else 358 xmlGenericError(xmlGenericErrorContext, 359 "Using HTTP proxy %s\n", URL); 360 #endif 361 if (URL == NULL) return; 362 363 uri = xmlParseURIRaw(URL, 1); 364 if ((uri == NULL) || (uri->scheme == NULL) || 365 (strcmp(uri->scheme, "http")) || (uri->server == NULL)) { 366 __xmlIOErr(XML_FROM_HTTP, XML_HTTP_URL_SYNTAX, "Syntax Error\n"); 367 if (uri != NULL) 368 xmlFreeURI(uri); 369 return; 370 } 371 372 proxy = xmlMemStrdup(uri->server); 373 if (uri->port != 0) 374 proxyPort = uri->port; 375 376 xmlFreeURI(uri); 377 } 378 379 /** 380 * xmlNanoHTTPNewCtxt: 381 * @URL: The URL used to initialize the context 382 * 383 * Allocate and initialize a new HTTP context. 384 * 385 * Returns an HTTP context or NULL in case of error. 386 */ 387 388 static xmlNanoHTTPCtxtPtr 389 xmlNanoHTTPNewCtxt(const char *URL) { 390 xmlNanoHTTPCtxtPtr ret; 391 392 ret = (xmlNanoHTTPCtxtPtr) xmlMalloc(sizeof(xmlNanoHTTPCtxt)); 393 if (ret == NULL) { 394 xmlHTTPErrMemory("allocating context"); 395 return(NULL); 396 } 397 398 memset(ret, 0, sizeof(xmlNanoHTTPCtxt)); 399 ret->port = 80; 400 ret->returnValue = 0; 401 ret->fd = INVALID_SOCKET; 402 ret->ContentLength = -1; 403 404 xmlNanoHTTPScanURL(ret, URL); 405 406 return(ret); 407 } 408 409 /** 410 * xmlNanoHTTPFreeCtxt: 411 * @ctxt: an HTTP context 412 * 413 * Frees the context after closing the connection. 414 */ 415 416 static void 417 xmlNanoHTTPFreeCtxt(xmlNanoHTTPCtxtPtr ctxt) { 418 if (ctxt == NULL) return; 419 if (ctxt->hostname != NULL) xmlFree(ctxt->hostname); 420 if (ctxt->protocol != NULL) xmlFree(ctxt->protocol); 421 if (ctxt->path != NULL) xmlFree(ctxt->path); 422 if (ctxt->query != NULL) xmlFree(ctxt->query); 423 if (ctxt->out != NULL) xmlFree(ctxt->out); 424 if (ctxt->in != NULL) xmlFree(ctxt->in); 425 if (ctxt->contentType != NULL) xmlFree(ctxt->contentType); 426 if (ctxt->encoding != NULL) xmlFree(ctxt->encoding); 427 if (ctxt->mimeType != NULL) xmlFree(ctxt->mimeType); 428 if (ctxt->location != NULL) xmlFree(ctxt->location); 429 if (ctxt->authHeader != NULL) xmlFree(ctxt->authHeader); 430 #ifdef LIBXML_ZLIB_ENABLED 431 if (ctxt->strm != NULL) { 432 inflateEnd(ctxt->strm); 433 xmlFree(ctxt->strm); 434 } 435 #endif 436 437 ctxt->state = XML_NANO_HTTP_NONE; 438 if (ctxt->fd != INVALID_SOCKET) closesocket(ctxt->fd); 439 ctxt->fd = INVALID_SOCKET; 440 xmlFree(ctxt); 441 } 442 443 /** 444 * xmlNanoHTTPSend: 445 * @ctxt: an HTTP context 446 * 447 * Send the input needed to initiate the processing on the server side 448 * Returns number of bytes sent or -1 on error. 449 */ 450 451 static int 452 xmlNanoHTTPSend(xmlNanoHTTPCtxtPtr ctxt, const char *xmt_ptr, int outlen) 453 { 454 int total_sent = 0; 455 #ifdef HAVE_POLL_H 456 struct pollfd p; 457 #else 458 struct timeval tv; 459 fd_set wfd; 460 #endif 461 462 if ((ctxt->state & XML_NANO_HTTP_WRITE) && (xmt_ptr != NULL)) { 463 while (total_sent < outlen) { 464 int nsent = send(ctxt->fd, SEND_ARG2_CAST (xmt_ptr + total_sent), 465 outlen - total_sent, 0); 466 467 if (nsent > 0) 468 total_sent += nsent; 469 else if ((nsent == -1) && 470 #if defined(EAGAIN) && EAGAIN != EWOULDBLOCK 471 (socket_errno() != EAGAIN) && 472 #endif 473 (socket_errno() != EWOULDBLOCK)) { 474 __xmlIOErr(XML_FROM_HTTP, 0, "send failed\n"); 475 if (total_sent == 0) 476 total_sent = -1; 477 break; 478 } else { 479 /* 480 * No data sent 481 * Since non-blocking sockets are used, wait for 482 * socket to be writable or default timeout prior 483 * to retrying. 484 */ 485 #ifndef HAVE_POLL_H 486 #ifndef _WINSOCKAPI_ 487 if (ctxt->fd > FD_SETSIZE) 488 return -1; 489 #endif 490 491 tv.tv_sec = timeout; 492 tv.tv_usec = 0; 493 FD_ZERO(&wfd); 494 #ifdef _MSC_VER 495 #pragma warning(push) 496 #pragma warning(disable: 4018) 497 #endif 498 FD_SET(ctxt->fd, &wfd); 499 #ifdef _MSC_VER 500 #pragma warning(pop) 501 #endif 502 (void) select(ctxt->fd + 1, NULL, &wfd, NULL, &tv); 503 #else 504 p.fd = ctxt->fd; 505 p.events = POLLOUT; 506 (void) poll(&p, 1, timeout * 1000); 507 #endif /* !HAVE_POLL_H */ 508 } 509 } 510 } 511 512 return total_sent; 513 } 514 515 /** 516 * xmlNanoHTTPRecv: 517 * @ctxt: an HTTP context 518 * 519 * Read information coming from the HTTP connection. 520 * This is a blocking call (but it blocks in select(), not read()). 521 * 522 * Returns the number of byte read or -1 in case of error. 523 */ 524 525 static int 526 xmlNanoHTTPRecv(xmlNanoHTTPCtxtPtr ctxt) 527 { 528 #ifdef HAVE_POLL_H 529 struct pollfd p; 530 #else 531 fd_set rfd; 532 struct timeval tv; 533 #endif 534 535 536 while (ctxt->state & XML_NANO_HTTP_READ) { 537 if (ctxt->in == NULL) { 538 ctxt->in = (char *) xmlMallocAtomic(65000 * sizeof(char)); 539 if (ctxt->in == NULL) { 540 xmlHTTPErrMemory("allocating input"); 541 ctxt->last = -1; 542 return (-1); 543 } 544 ctxt->inlen = 65000; 545 ctxt->inptr = ctxt->content = ctxt->inrptr = ctxt->in; 546 } 547 if (ctxt->inrptr > ctxt->in + XML_NANO_HTTP_CHUNK) { 548 int delta = ctxt->inrptr - ctxt->in; 549 int len = ctxt->inptr - ctxt->inrptr; 550 551 memmove(ctxt->in, ctxt->inrptr, len); 552 ctxt->inrptr -= delta; 553 ctxt->content -= delta; 554 ctxt->inptr -= delta; 555 } 556 if ((ctxt->in + ctxt->inlen) < (ctxt->inptr + XML_NANO_HTTP_CHUNK)) { 557 int d_inptr = ctxt->inptr - ctxt->in; 558 int d_content = ctxt->content - ctxt->in; 559 int d_inrptr = ctxt->inrptr - ctxt->in; 560 char *tmp_ptr = ctxt->in; 561 562 ctxt->inlen *= 2; 563 ctxt->in = (char *) xmlRealloc(tmp_ptr, ctxt->inlen); 564 if (ctxt->in == NULL) { 565 xmlHTTPErrMemory("allocating input buffer"); 566 xmlFree(tmp_ptr); 567 ctxt->last = -1; 568 return (-1); 569 } 570 ctxt->inptr = ctxt->in + d_inptr; 571 ctxt->content = ctxt->in + d_content; 572 ctxt->inrptr = ctxt->in + d_inrptr; 573 } 574 ctxt->last = recv(ctxt->fd, ctxt->inptr, XML_NANO_HTTP_CHUNK, 0); 575 if (ctxt->last > 0) { 576 ctxt->inptr += ctxt->last; 577 return (ctxt->last); 578 } 579 if (ctxt->last == 0) { 580 return (0); 581 } 582 if (ctxt->last == -1) { 583 switch (socket_errno()) { 584 case EINPROGRESS: 585 case EWOULDBLOCK: 586 #if defined(EAGAIN) && EAGAIN != EWOULDBLOCK 587 case EAGAIN: 588 #endif 589 break; 590 591 case ECONNRESET: 592 case ESHUTDOWN: 593 return (0); 594 595 default: 596 __xmlIOErr(XML_FROM_HTTP, 0, "recv failed\n"); 597 return (-1); 598 } 599 } 600 #ifdef HAVE_POLL_H 601 p.fd = ctxt->fd; 602 p.events = POLLIN; 603 if ((poll(&p, 1, timeout * 1000) < 1) 604 #if defined(EINTR) 605 && (errno != EINTR) 606 #endif 607 ) 608 return (0); 609 #else /* !HAVE_POLL_H */ 610 #ifndef _WINSOCKAPI_ 611 if (ctxt->fd > FD_SETSIZE) 612 return 0; 613 #endif 614 615 tv.tv_sec = timeout; 616 tv.tv_usec = 0; 617 FD_ZERO(&rfd); 618 619 #ifdef _MSC_VER 620 #pragma warning(push) 621 #pragma warning(disable: 4018) 622 #endif 623 624 FD_SET(ctxt->fd, &rfd); 625 626 #ifdef _MSC_VER 627 #pragma warning(pop) 628 #endif 629 630 if ((select(ctxt->fd + 1, &rfd, NULL, NULL, &tv) < 1) 631 #if defined(EINTR) 632 && (socket_errno() != EINTR) 633 #endif 634 ) 635 return (0); 636 #endif /* !HAVE_POLL_H */ 637 } 638 return (0); 639 } 640 641 /** 642 * xmlNanoHTTPReadLine: 643 * @ctxt: an HTTP context 644 * 645 * Read one line in the HTTP server output, usually for extracting 646 * the HTTP protocol information from the answer header. 647 * 648 * Returns a newly allocated string with a copy of the line, or NULL 649 * which indicate the end of the input. 650 */ 651 652 static char * 653 xmlNanoHTTPReadLine(xmlNanoHTTPCtxtPtr ctxt) { 654 char buf[4096]; 655 char *bp = buf; 656 int rc; 657 658 while (bp - buf < 4095) { 659 if (ctxt->inrptr == ctxt->inptr) { 660 if ( (rc = xmlNanoHTTPRecv(ctxt)) == 0) { 661 if (bp == buf) 662 return(NULL); 663 else 664 *bp = 0; 665 return(xmlMemStrdup(buf)); 666 } 667 else if ( rc == -1 ) { 668 return ( NULL ); 669 } 670 } 671 *bp = *ctxt->inrptr++; 672 if (*bp == '\n') { 673 *bp = 0; 674 return(xmlMemStrdup(buf)); 675 } 676 if (*bp != '\r') 677 bp++; 678 } 679 buf[4095] = 0; 680 return(xmlMemStrdup(buf)); 681 } 682 683 684 /** 685 * xmlNanoHTTPScanAnswer: 686 * @ctxt: an HTTP context 687 * @line: an HTTP header line 688 * 689 * Try to extract useful information from the server answer. 690 * We currently parse and process: 691 * - The HTTP revision/ return code 692 * - The Content-Type, Mime-Type and charset used 693 * - The Location for redirect processing. 694 * 695 * Returns -1 in case of failure, the file descriptor number otherwise 696 */ 697 698 static void 699 xmlNanoHTTPScanAnswer(xmlNanoHTTPCtxtPtr ctxt, const char *line) { 700 const char *cur = line; 701 702 if (line == NULL) return; 703 704 if (!strncmp(line, "HTTP/", 5)) { 705 int version = 0; 706 int ret = 0; 707 708 cur += 5; 709 while ((*cur >= '0') && (*cur <= '9')) { 710 version *= 10; 711 version += *cur - '0'; 712 cur++; 713 } 714 if (*cur == '.') { 715 cur++; 716 if ((*cur >= '0') && (*cur <= '9')) { 717 version *= 10; 718 version += *cur - '0'; 719 cur++; 720 } 721 while ((*cur >= '0') && (*cur <= '9')) 722 cur++; 723 } else 724 version *= 10; 725 if ((*cur != ' ') && (*cur != '\t')) return; 726 while ((*cur == ' ') || (*cur == '\t')) cur++; 727 if ((*cur < '0') || (*cur > '9')) return; 728 while ((*cur >= '0') && (*cur <= '9')) { 729 ret *= 10; 730 ret += *cur - '0'; 731 cur++; 732 } 733 if ((*cur != 0) && (*cur != ' ') && (*cur != '\t')) return; 734 ctxt->returnValue = ret; 735 ctxt->version = version; 736 } else if (!xmlStrncasecmp(BAD_CAST line, BAD_CAST"Content-Type:", 13)) { 737 const xmlChar *charset, *last, *mime; 738 cur += 13; 739 while ((*cur == ' ') || (*cur == '\t')) cur++; 740 if (ctxt->contentType != NULL) 741 xmlFree(ctxt->contentType); 742 ctxt->contentType = xmlMemStrdup(cur); 743 mime = (const xmlChar *) cur; 744 last = mime; 745 while ((*last != 0) && (*last != ' ') && (*last != '\t') && 746 (*last != ';') && (*last != ',')) 747 last++; 748 if (ctxt->mimeType != NULL) 749 xmlFree(ctxt->mimeType); 750 ctxt->mimeType = (char *) xmlStrndup(mime, last - mime); 751 charset = xmlStrstr(BAD_CAST ctxt->contentType, BAD_CAST "charset="); 752 if (charset != NULL) { 753 charset += 8; 754 last = charset; 755 while ((*last != 0) && (*last != ' ') && (*last != '\t') && 756 (*last != ';') && (*last != ',')) 757 last++; 758 if (ctxt->encoding != NULL) 759 xmlFree(ctxt->encoding); 760 ctxt->encoding = (char *) xmlStrndup(charset, last - charset); 761 } 762 } else if (!xmlStrncasecmp(BAD_CAST line, BAD_CAST"ContentType:", 12)) { 763 const xmlChar *charset, *last, *mime; 764 cur += 12; 765 if (ctxt->contentType != NULL) return; 766 while ((*cur == ' ') || (*cur == '\t')) cur++; 767 ctxt->contentType = xmlMemStrdup(cur); 768 mime = (const xmlChar *) cur; 769 last = mime; 770 while ((*last != 0) && (*last != ' ') && (*last != '\t') && 771 (*last != ';') && (*last != ',')) 772 last++; 773 if (ctxt->mimeType != NULL) 774 xmlFree(ctxt->mimeType); 775 ctxt->mimeType = (char *) xmlStrndup(mime, last - mime); 776 charset = xmlStrstr(BAD_CAST ctxt->contentType, BAD_CAST "charset="); 777 if (charset != NULL) { 778 charset += 8; 779 last = charset; 780 while ((*last != 0) && (*last != ' ') && (*last != '\t') && 781 (*last != ';') && (*last != ',')) 782 last++; 783 if (ctxt->encoding != NULL) 784 xmlFree(ctxt->encoding); 785 ctxt->encoding = (char *) xmlStrndup(charset, last - charset); 786 } 787 } else if (!xmlStrncasecmp(BAD_CAST line, BAD_CAST"Location:", 9)) { 788 cur += 9; 789 while ((*cur == ' ') || (*cur == '\t')) cur++; 790 if (ctxt->location != NULL) 791 xmlFree(ctxt->location); 792 if (*cur == '/') { 793 xmlChar *tmp_http = xmlStrdup(BAD_CAST "http://"); 794 xmlChar *tmp_loc = 795 xmlStrcat(tmp_http, (const xmlChar *) ctxt->hostname); 796 ctxt->location = 797 (char *) xmlStrcat (tmp_loc, (const xmlChar *) cur); 798 } else { 799 ctxt->location = xmlMemStrdup(cur); 800 } 801 } else if (!xmlStrncasecmp(BAD_CAST line, BAD_CAST"WWW-Authenticate:", 17)) { 802 cur += 17; 803 while ((*cur == ' ') || (*cur == '\t')) cur++; 804 if (ctxt->authHeader != NULL) 805 xmlFree(ctxt->authHeader); 806 ctxt->authHeader = xmlMemStrdup(cur); 807 } else if (!xmlStrncasecmp(BAD_CAST line, BAD_CAST"Proxy-Authenticate:", 19)) { 808 cur += 19; 809 while ((*cur == ' ') || (*cur == '\t')) cur++; 810 if (ctxt->authHeader != NULL) 811 xmlFree(ctxt->authHeader); 812 ctxt->authHeader = xmlMemStrdup(cur); 813 #ifdef LIBXML_ZLIB_ENABLED 814 } else if ( !xmlStrncasecmp( BAD_CAST line, BAD_CAST"Content-Encoding:", 17) ) { 815 cur += 17; 816 while ((*cur == ' ') || (*cur == '\t')) cur++; 817 if ( !xmlStrncasecmp( BAD_CAST cur, BAD_CAST"gzip", 4) ) { 818 ctxt->usesGzip = 1; 819 820 ctxt->strm = xmlMalloc(sizeof(z_stream)); 821 822 if (ctxt->strm != NULL) { 823 ctxt->strm->zalloc = Z_NULL; 824 ctxt->strm->zfree = Z_NULL; 825 ctxt->strm->opaque = Z_NULL; 826 ctxt->strm->avail_in = 0; 827 ctxt->strm->next_in = Z_NULL; 828 829 inflateInit2( ctxt->strm, 31 ); 830 } 831 } 832 #endif 833 } else if ( !xmlStrncasecmp( BAD_CAST line, BAD_CAST"Content-Length:", 15) ) { 834 cur += 15; 835 ctxt->ContentLength = strtol( cur, NULL, 10 ); 836 } 837 } 838 839 /** 840 * xmlNanoHTTPConnectAttempt: 841 * @addr: a socket address structure 842 * 843 * Attempt a connection to the given IP:port endpoint. It forces 844 * non-blocking semantic on the socket, and allow 60 seconds for 845 * the host to answer. 846 * 847 * Returns -1 in case of failure, the file descriptor number otherwise 848 */ 849 850 static SOCKET 851 xmlNanoHTTPConnectAttempt(struct sockaddr *addr) 852 { 853 #ifndef HAVE_POLL_H 854 fd_set wfd; 855 #ifdef _WINSOCKAPI_ 856 fd_set xfd; 857 #endif 858 struct timeval tv; 859 #else /* !HAVE_POLL_H */ 860 struct pollfd p; 861 #endif /* !HAVE_POLL_H */ 862 int status; 863 864 int addrlen; 865 866 SOCKET s; 867 868 #ifdef SUPPORT_IP6 869 if (addr->sa_family == AF_INET6) { 870 s = socket(PF_INET6, SOCK_STREAM, IPPROTO_TCP); 871 addrlen = sizeof(struct sockaddr_in6); 872 } else 873 #endif 874 { 875 s = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP); 876 addrlen = sizeof(struct sockaddr_in); 877 } 878 if (s == INVALID_SOCKET) { 879 #ifdef DEBUG_HTTP 880 perror("socket"); 881 #endif 882 __xmlIOErr(XML_FROM_HTTP, 0, "socket failed\n"); 883 return INVALID_SOCKET; 884 } 885 #ifdef _WINSOCKAPI_ 886 { 887 u_long one = 1; 888 889 status = ioctlsocket(s, FIONBIO, &one) == SOCKET_ERROR ? -1 : 0; 890 } 891 #else /* _WINSOCKAPI_ */ 892 #if defined(VMS) 893 { 894 int enable = 1; 895 896 status = ioctl(s, FIONBIO, &enable); 897 } 898 #else /* VMS */ 899 #if defined(__BEOS__) && !defined(__HAIKU__) 900 { 901 bool noblock = true; 902 903 status = 904 setsockopt(s, SOL_SOCKET, SO_NONBLOCK, &noblock, 905 sizeof(noblock)); 906 } 907 #else /* __BEOS__ */ 908 if ((status = fcntl(s, F_GETFL, 0)) != -1) { 909 #ifdef O_NONBLOCK 910 status |= O_NONBLOCK; 911 #else /* O_NONBLOCK */ 912 #ifdef F_NDELAY 913 status |= F_NDELAY; 914 #endif /* F_NDELAY */ 915 #endif /* !O_NONBLOCK */ 916 status = fcntl(s, F_SETFL, status); 917 } 918 if (status < 0) { 919 #ifdef DEBUG_HTTP 920 perror("nonblocking"); 921 #endif 922 __xmlIOErr(XML_FROM_HTTP, 0, "error setting non-blocking IO\n"); 923 closesocket(s); 924 return INVALID_SOCKET; 925 } 926 #endif /* !__BEOS__ */ 927 #endif /* !VMS */ 928 #endif /* !_WINSOCKAPI_ */ 929 930 if (connect(s, addr, addrlen) == -1) { 931 switch (socket_errno()) { 932 case EINPROGRESS: 933 case EWOULDBLOCK: 934 break; 935 default: 936 __xmlIOErr(XML_FROM_HTTP, 0, 937 "error connecting to HTTP server"); 938 closesocket(s); 939 return INVALID_SOCKET; 940 } 941 } 942 #ifndef HAVE_POLL_H 943 tv.tv_sec = timeout; 944 tv.tv_usec = 0; 945 946 #ifdef _MSC_VER 947 #pragma warning(push) 948 #pragma warning(disable: 4018) 949 #endif 950 #ifndef _WINSOCKAPI_ 951 if (s > FD_SETSIZE) 952 return INVALID_SOCKET; 953 #endif 954 FD_ZERO(&wfd); 955 FD_SET(s, &wfd); 956 957 #ifdef _WINSOCKAPI_ 958 FD_ZERO(&xfd); 959 FD_SET(s, &xfd); 960 961 switch (select(s + 1, NULL, &wfd, &xfd, &tv)) 962 #else 963 switch (select(s + 1, NULL, &wfd, NULL, &tv)) 964 #endif 965 #ifdef _MSC_VER 966 #pragma warning(pop) 967 #endif 968 969 #else /* !HAVE_POLL_H */ 970 p.fd = s; 971 p.events = POLLOUT; 972 switch (poll(&p, 1, timeout * 1000)) 973 #endif /* !HAVE_POLL_H */ 974 975 { 976 case 0: 977 /* Time out */ 978 __xmlIOErr(XML_FROM_HTTP, 0, "Connect attempt timed out"); 979 closesocket(s); 980 return INVALID_SOCKET; 981 case -1: 982 /* Ermm.. ?? */ 983 __xmlIOErr(XML_FROM_HTTP, 0, "Connect failed"); 984 closesocket(s); 985 return INVALID_SOCKET; 986 } 987 988 #ifndef HAVE_POLL_H 989 if (FD_ISSET(s, &wfd) 990 #ifdef _WINSOCKAPI_ 991 || FD_ISSET(s, &xfd) 992 #endif 993 ) 994 #else /* !HAVE_POLL_H */ 995 if (p.revents == POLLOUT) 996 #endif /* !HAVE_POLL_H */ 997 { 998 XML_SOCKLEN_T len; 999 1000 len = sizeof(status); 1001 #ifdef SO_ERROR 1002 if (getsockopt(s, SOL_SOCKET, SO_ERROR, (char *) &status, &len) < 1003 0) { 1004 /* Solaris error code */ 1005 __xmlIOErr(XML_FROM_HTTP, 0, "getsockopt failed\n"); 1006 closesocket(s); 1007 return INVALID_SOCKET; 1008 } 1009 #endif 1010 if (status) { 1011 __xmlIOErr(XML_FROM_HTTP, 0, 1012 "Error connecting to remote host"); 1013 closesocket(s); 1014 errno = status; 1015 return INVALID_SOCKET; 1016 } 1017 } else { 1018 /* pbm */ 1019 __xmlIOErr(XML_FROM_HTTP, 0, "select failed\n"); 1020 closesocket(s); 1021 return INVALID_SOCKET; 1022 } 1023 1024 return (s); 1025 } 1026 1027 /** 1028 * xmlNanoHTTPConnectHost: 1029 * @host: the host name 1030 * @port: the port number 1031 * 1032 * Attempt a connection to the given host:port endpoint. It tries 1033 * the multiple IP provided by the DNS if available. 1034 * 1035 * Returns -1 in case of failure, the file descriptor number otherwise 1036 */ 1037 1038 static SOCKET 1039 xmlNanoHTTPConnectHost(const char *host, int port) 1040 { 1041 struct sockaddr *addr = NULL; 1042 struct sockaddr_in sockin; 1043 1044 #ifdef SUPPORT_IP6 1045 struct in6_addr ia6; 1046 struct sockaddr_in6 sockin6; 1047 #endif 1048 SOCKET s; 1049 1050 memset (&sockin, 0, sizeof(sockin)); 1051 #ifdef SUPPORT_IP6 1052 memset (&sockin6, 0, sizeof(sockin6)); 1053 #endif 1054 1055 #if !defined(HAVE_GETADDRINFO) && defined(SUPPORT_IP6) && defined(RES_USE_INET6) 1056 if (have_ipv6 ()) 1057 { 1058 if (!(_res.options & RES_INIT)) 1059 res_init(); 1060 _res.options |= RES_USE_INET6; 1061 } 1062 #endif 1063 1064 #if defined(HAVE_GETADDRINFO) && defined(SUPPORT_IP6) && !defined(_WIN32) 1065 if (have_ipv6 ()) 1066 #endif 1067 #if defined(HAVE_GETADDRINFO) && (defined(SUPPORT_IP6) || defined(_WIN32)) 1068 { 1069 int status; 1070 struct addrinfo hints, *res, *result; 1071 1072 result = NULL; 1073 memset (&hints, 0,sizeof(hints)); 1074 hints.ai_socktype = SOCK_STREAM; 1075 1076 status = getaddrinfo (host, NULL, &hints, &result); 1077 if (status) { 1078 __xmlIOErr(XML_FROM_HTTP, 0, "getaddrinfo failed\n"); 1079 return INVALID_SOCKET; 1080 } 1081 1082 for (res = result; res; res = res->ai_next) { 1083 if (res->ai_family == AF_INET) { 1084 if ((size_t)res->ai_addrlen > sizeof(sockin)) { 1085 __xmlIOErr(XML_FROM_HTTP, 0, "address size mismatch\n"); 1086 freeaddrinfo (result); 1087 return INVALID_SOCKET; 1088 } 1089 memcpy (&sockin, res->ai_addr, res->ai_addrlen); 1090 sockin.sin_port = htons (port); 1091 addr = (struct sockaddr *)&sockin; 1092 #ifdef SUPPORT_IP6 1093 } else if (have_ipv6 () && (res->ai_family == AF_INET6)) { 1094 if ((size_t)res->ai_addrlen > sizeof(sockin6)) { 1095 __xmlIOErr(XML_FROM_HTTP, 0, "address size mismatch\n"); 1096 freeaddrinfo (result); 1097 return INVALID_SOCKET; 1098 } 1099 memcpy (&sockin6, res->ai_addr, res->ai_addrlen); 1100 sockin6.sin6_port = htons (port); 1101 addr = (struct sockaddr *)&sockin6; 1102 #endif 1103 } else 1104 continue; /* for */ 1105 1106 s = xmlNanoHTTPConnectAttempt (addr); 1107 if (s != INVALID_SOCKET) { 1108 freeaddrinfo (result); 1109 return (s); 1110 } 1111 } 1112 1113 if (result) 1114 freeaddrinfo (result); 1115 } 1116 #endif 1117 #if defined(HAVE_GETADDRINFO) && defined(SUPPORT_IP6) && !defined(_WIN32) 1118 else 1119 #endif 1120 #if !defined(HAVE_GETADDRINFO) || !defined(_WIN32) 1121 { 1122 struct hostent *h; 1123 struct in_addr ia; 1124 int i; 1125 1126 h = gethostbyname (GETHOSTBYNAME_ARG_CAST host); 1127 if (h == NULL) { 1128 1129 /* 1130 * Okay, I got fed up by the non-portability of this error message 1131 * extraction code. it work on Linux, if it work on your platform 1132 * and one want to enable it, send me the defined(foobar) needed 1133 */ 1134 #if defined(HAVE_NETDB_H) && defined(HOST_NOT_FOUND) && defined(__linux__) 1135 const char *h_err_txt = ""; 1136 1137 switch (h_errno) { 1138 case HOST_NOT_FOUND: 1139 h_err_txt = "Authoritative host not found"; 1140 break; 1141 1142 case TRY_AGAIN: 1143 h_err_txt = 1144 "Non-authoritative host not found or server failure."; 1145 break; 1146 1147 case NO_RECOVERY: 1148 h_err_txt = 1149 "Non-recoverable errors: FORMERR, REFUSED, or NOTIMP."; 1150 break; 1151 1152 #ifdef NO_ADDRESS 1153 case NO_ADDRESS: 1154 h_err_txt = 1155 "Valid name, no data record of requested type."; 1156 break; 1157 #endif 1158 1159 default: 1160 h_err_txt = "No error text defined."; 1161 break; 1162 } 1163 __xmlIOErr(XML_FROM_HTTP, 0, h_err_txt); 1164 #else 1165 __xmlIOErr(XML_FROM_HTTP, 0, "Failed to resolve host"); 1166 #endif 1167 return INVALID_SOCKET; 1168 } 1169 1170 for (i = 0; h->h_addr_list[i]; i++) { 1171 if (h->h_addrtype == AF_INET) { 1172 /* A records (IPv4) */ 1173 if ((unsigned int) h->h_length > sizeof(ia)) { 1174 __xmlIOErr(XML_FROM_HTTP, 0, "address size mismatch\n"); 1175 return INVALID_SOCKET; 1176 } 1177 memcpy (&ia, h->h_addr_list[i], h->h_length); 1178 sockin.sin_family = h->h_addrtype; 1179 sockin.sin_addr = ia; 1180 sockin.sin_port = (unsigned short)htons ((unsigned short)port); 1181 addr = (struct sockaddr *) &sockin; 1182 #ifdef SUPPORT_IP6 1183 } else if (have_ipv6 () && (h->h_addrtype == AF_INET6)) { 1184 /* AAAA records (IPv6) */ 1185 if ((unsigned int) h->h_length > sizeof(ia6)) { 1186 __xmlIOErr(XML_FROM_HTTP, 0, "address size mismatch\n"); 1187 return INVALID_SOCKET; 1188 } 1189 memcpy (&ia6, h->h_addr_list[i], h->h_length); 1190 sockin6.sin6_family = h->h_addrtype; 1191 sockin6.sin6_addr = ia6; 1192 sockin6.sin6_port = htons (port); 1193 addr = (struct sockaddr *) &sockin6; 1194 #endif 1195 } else 1196 break; /* for */ 1197 1198 s = xmlNanoHTTPConnectAttempt (addr); 1199 if (s != INVALID_SOCKET) 1200 return (s); 1201 } 1202 } 1203 #endif 1204 1205 #ifdef DEBUG_HTTP 1206 xmlGenericError(xmlGenericErrorContext, 1207 "xmlNanoHTTPConnectHost: unable to connect to '%s'.\n", 1208 host); 1209 #endif 1210 return INVALID_SOCKET; 1211 } 1212 1213 1214 /** 1215 * xmlNanoHTTPOpen: 1216 * @URL: The URL to load 1217 * @contentType: if available the Content-Type information will be 1218 * returned at that location 1219 * 1220 * This function try to open a connection to the indicated resource 1221 * via HTTP GET. 1222 * 1223 * Returns NULL in case of failure, otherwise a request handler. 1224 * The contentType, if provided must be freed by the caller 1225 */ 1226 1227 void* 1228 xmlNanoHTTPOpen(const char *URL, char **contentType) { 1229 if (contentType != NULL) *contentType = NULL; 1230 return(xmlNanoHTTPMethod(URL, NULL, NULL, contentType, NULL, 0)); 1231 } 1232 1233 /** 1234 * xmlNanoHTTPOpenRedir: 1235 * @URL: The URL to load 1236 * @contentType: if available the Content-Type information will be 1237 * returned at that location 1238 * @redir: if available the redirected URL will be returned 1239 * 1240 * This function try to open a connection to the indicated resource 1241 * via HTTP GET. 1242 * 1243 * Returns NULL in case of failure, otherwise a request handler. 1244 * The contentType, if provided must be freed by the caller 1245 */ 1246 1247 void* 1248 xmlNanoHTTPOpenRedir(const char *URL, char **contentType, char **redir) { 1249 if (contentType != NULL) *contentType = NULL; 1250 if (redir != NULL) *redir = NULL; 1251 return(xmlNanoHTTPMethodRedir(URL, NULL, NULL, contentType, redir, NULL,0)); 1252 } 1253 1254 /** 1255 * xmlNanoHTTPRead: 1256 * @ctx: the HTTP context 1257 * @dest: a buffer 1258 * @len: the buffer length 1259 * 1260 * This function tries to read @len bytes from the existing HTTP connection 1261 * and saves them in @dest. This is a blocking call. 1262 * 1263 * Returns the number of byte read. 0 is an indication of an end of connection. 1264 * -1 indicates a parameter error. 1265 */ 1266 int 1267 xmlNanoHTTPRead(void *ctx, void *dest, int len) { 1268 xmlNanoHTTPCtxtPtr ctxt = (xmlNanoHTTPCtxtPtr) ctx; 1269 #ifdef LIBXML_ZLIB_ENABLED 1270 int bytes_read = 0; 1271 int orig_avail_in; 1272 int z_ret; 1273 #endif 1274 1275 if (ctx == NULL) return(-1); 1276 if (dest == NULL) return(-1); 1277 if (len <= 0) return(0); 1278 1279 #ifdef LIBXML_ZLIB_ENABLED 1280 if (ctxt->usesGzip == 1) { 1281 if (ctxt->strm == NULL) return(0); 1282 1283 ctxt->strm->next_out = dest; 1284 ctxt->strm->avail_out = len; 1285 ctxt->strm->avail_in = ctxt->inptr - ctxt->inrptr; 1286 1287 while (ctxt->strm->avail_out > 0 && 1288 (ctxt->strm->avail_in > 0 || xmlNanoHTTPRecv(ctxt) > 0)) { 1289 orig_avail_in = ctxt->strm->avail_in = 1290 ctxt->inptr - ctxt->inrptr - bytes_read; 1291 ctxt->strm->next_in = BAD_CAST (ctxt->inrptr + bytes_read); 1292 1293 z_ret = inflate(ctxt->strm, Z_NO_FLUSH); 1294 bytes_read += orig_avail_in - ctxt->strm->avail_in; 1295 1296 if (z_ret != Z_OK) break; 1297 } 1298 1299 ctxt->inrptr += bytes_read; 1300 return(len - ctxt->strm->avail_out); 1301 } 1302 #endif 1303 1304 while (ctxt->inptr - ctxt->inrptr < len) { 1305 if (xmlNanoHTTPRecv(ctxt) <= 0) break; 1306 } 1307 if (ctxt->inptr - ctxt->inrptr < len) 1308 len = ctxt->inptr - ctxt->inrptr; 1309 memcpy(dest, ctxt->inrptr, len); 1310 ctxt->inrptr += len; 1311 return(len); 1312 } 1313 1314 /** 1315 * xmlNanoHTTPClose: 1316 * @ctx: the HTTP context 1317 * 1318 * This function closes an HTTP context, it ends up the connection and 1319 * free all data related to it. 1320 */ 1321 void 1322 xmlNanoHTTPClose(void *ctx) { 1323 xmlNanoHTTPCtxtPtr ctxt = (xmlNanoHTTPCtxtPtr) ctx; 1324 1325 if (ctx == NULL) return; 1326 1327 xmlNanoHTTPFreeCtxt(ctxt); 1328 } 1329 1330 /** 1331 * xmlNanoHTTPMethodRedir: 1332 * @URL: The URL to load 1333 * @method: the HTTP method to use 1334 * @input: the input string if any 1335 * @contentType: the Content-Type information IN and OUT 1336 * @redir: the redirected URL OUT 1337 * @headers: the extra headers 1338 * @ilen: input length 1339 * 1340 * This function try to open a connection to the indicated resource 1341 * via HTTP using the given @method, adding the given extra headers 1342 * and the input buffer for the request content. 1343 * 1344 * Returns NULL in case of failure, otherwise a request handler. 1345 * The contentType, or redir, if provided must be freed by the caller 1346 */ 1347 1348 void* 1349 xmlNanoHTTPMethodRedir(const char *URL, const char *method, const char *input, 1350 char **contentType, char **redir, 1351 const char *headers, int ilen ) { 1352 xmlNanoHTTPCtxtPtr ctxt; 1353 char *bp, *p; 1354 int blen; 1355 SOCKET ret; 1356 int nbRedirects = 0; 1357 char *redirURL = NULL; 1358 #ifdef DEBUG_HTTP 1359 int xmt_bytes; 1360 #endif 1361 1362 if (URL == NULL) return(NULL); 1363 if (method == NULL) method = "GET"; 1364 xmlNanoHTTPInit(); 1365 1366 retry: 1367 if (redirURL == NULL) { 1368 ctxt = xmlNanoHTTPNewCtxt(URL); 1369 if (ctxt == NULL) 1370 return(NULL); 1371 } else { 1372 ctxt = xmlNanoHTTPNewCtxt(redirURL); 1373 if (ctxt == NULL) 1374 return(NULL); 1375 ctxt->location = xmlMemStrdup(redirURL); 1376 } 1377 1378 if ((ctxt->protocol == NULL) || (strcmp(ctxt->protocol, "http"))) { 1379 __xmlIOErr(XML_FROM_HTTP, XML_HTTP_URL_SYNTAX, "Not a valid HTTP URI"); 1380 xmlNanoHTTPFreeCtxt(ctxt); 1381 if (redirURL != NULL) xmlFree(redirURL); 1382 return(NULL); 1383 } 1384 if (ctxt->hostname == NULL) { 1385 __xmlIOErr(XML_FROM_HTTP, XML_HTTP_UNKNOWN_HOST, 1386 "Failed to identify host in URI"); 1387 xmlNanoHTTPFreeCtxt(ctxt); 1388 if (redirURL != NULL) xmlFree(redirURL); 1389 return(NULL); 1390 } 1391 if (proxy) { 1392 blen = strlen(ctxt->hostname) * 2 + 16; 1393 ret = xmlNanoHTTPConnectHost(proxy, proxyPort); 1394 } 1395 else { 1396 blen = strlen(ctxt->hostname); 1397 ret = xmlNanoHTTPConnectHost(ctxt->hostname, ctxt->port); 1398 } 1399 if (ret == INVALID_SOCKET) { 1400 xmlNanoHTTPFreeCtxt(ctxt); 1401 if (redirURL != NULL) xmlFree(redirURL); 1402 return(NULL); 1403 } 1404 ctxt->fd = ret; 1405 1406 if (input == NULL) 1407 ilen = 0; 1408 else 1409 blen += 36; 1410 1411 if (headers != NULL) 1412 blen += strlen(headers) + 2; 1413 if (contentType && *contentType) 1414 /* reserve for string plus 'Content-Type: \r\n" */ 1415 blen += strlen(*contentType) + 16; 1416 if (ctxt->query != NULL) 1417 /* 1 for '?' */ 1418 blen += strlen(ctxt->query) + 1; 1419 blen += strlen(method) + strlen(ctxt->path) + 24; 1420 #ifdef LIBXML_ZLIB_ENABLED 1421 /* reserve for possible 'Accept-Encoding: gzip' string */ 1422 blen += 23; 1423 #endif 1424 if (ctxt->port != 80) { 1425 /* reserve space for ':xxxxx', incl. potential proxy */ 1426 if (proxy) 1427 blen += 17; 1428 else 1429 blen += 11; 1430 } 1431 bp = (char*)xmlMallocAtomic(blen); 1432 if ( bp == NULL ) { 1433 xmlNanoHTTPFreeCtxt( ctxt ); 1434 xmlHTTPErrMemory("allocating header buffer"); 1435 return ( NULL ); 1436 } 1437 1438 p = bp; 1439 1440 if (proxy) { 1441 if (ctxt->port != 80) { 1442 p += snprintf( p, blen - (p - bp), "%s http://%s:%d%s", 1443 method, ctxt->hostname, 1444 ctxt->port, ctxt->path ); 1445 } 1446 else 1447 p += snprintf( p, blen - (p - bp), "%s http://%s%s", method, 1448 ctxt->hostname, ctxt->path); 1449 } 1450 else 1451 p += snprintf( p, blen - (p - bp), "%s %s", method, ctxt->path); 1452 1453 if (ctxt->query != NULL) 1454 p += snprintf( p, blen - (p - bp), "?%s", ctxt->query); 1455 1456 if (ctxt->port == 80) { 1457 p += snprintf( p, blen - (p - bp), " HTTP/1.0\r\nHost: %s\r\n", 1458 ctxt->hostname); 1459 } else { 1460 p += snprintf( p, blen - (p - bp), " HTTP/1.0\r\nHost: %s:%d\r\n", 1461 ctxt->hostname, ctxt->port); 1462 } 1463 1464 #ifdef LIBXML_ZLIB_ENABLED 1465 p += snprintf(p, blen - (p - bp), "Accept-Encoding: gzip\r\n"); 1466 #endif 1467 1468 if (contentType != NULL && *contentType) 1469 p += snprintf(p, blen - (p - bp), "Content-Type: %s\r\n", *contentType); 1470 1471 if (headers != NULL) 1472 p += snprintf( p, blen - (p - bp), "%s", headers ); 1473 1474 if (input != NULL) 1475 snprintf(p, blen - (p - bp), "Content-Length: %d\r\n\r\n", ilen ); 1476 else 1477 snprintf(p, blen - (p - bp), "\r\n"); 1478 1479 #ifdef DEBUG_HTTP 1480 xmlGenericError(xmlGenericErrorContext, 1481 "-> %s%s", proxy? "(Proxy) " : "", bp); 1482 if ((blen -= strlen(bp)+1) < 0) 1483 xmlGenericError(xmlGenericErrorContext, 1484 "ERROR: overflowed buffer by %d bytes\n", -blen); 1485 #endif 1486 ctxt->outptr = ctxt->out = bp; 1487 ctxt->state = XML_NANO_HTTP_WRITE; 1488 blen = strlen( ctxt->out ); 1489 #ifdef DEBUG_HTTP 1490 xmt_bytes = xmlNanoHTTPSend(ctxt, ctxt->out, blen ); 1491 if ( xmt_bytes != blen ) 1492 xmlGenericError( xmlGenericErrorContext, 1493 "xmlNanoHTTPMethodRedir: Only %d of %d %s %s\n", 1494 xmt_bytes, blen, 1495 "bytes of HTTP headers sent to host", 1496 ctxt->hostname ); 1497 #else 1498 xmlNanoHTTPSend(ctxt, ctxt->out, blen ); 1499 #endif 1500 1501 if ( input != NULL ) { 1502 #ifdef DEBUG_HTTP 1503 xmt_bytes = xmlNanoHTTPSend( ctxt, input, ilen ); 1504 1505 if ( xmt_bytes != ilen ) 1506 xmlGenericError( xmlGenericErrorContext, 1507 "xmlNanoHTTPMethodRedir: Only %d of %d %s %s\n", 1508 xmt_bytes, ilen, 1509 "bytes of HTTP content sent to host", 1510 ctxt->hostname ); 1511 #else 1512 xmlNanoHTTPSend( ctxt, input, ilen ); 1513 #endif 1514 } 1515 1516 ctxt->state = XML_NANO_HTTP_READ; 1517 1518 while ((p = xmlNanoHTTPReadLine(ctxt)) != NULL) { 1519 if (*p == 0) { 1520 ctxt->content = ctxt->inrptr; 1521 xmlFree(p); 1522 break; 1523 } 1524 xmlNanoHTTPScanAnswer(ctxt, p); 1525 1526 #ifdef DEBUG_HTTP 1527 xmlGenericError(xmlGenericErrorContext, "<- %s\n", p); 1528 #endif 1529 xmlFree(p); 1530 } 1531 1532 if ((ctxt->location != NULL) && (ctxt->returnValue >= 300) && 1533 (ctxt->returnValue < 400)) { 1534 #ifdef DEBUG_HTTP 1535 xmlGenericError(xmlGenericErrorContext, 1536 "\nRedirect to: %s\n", ctxt->location); 1537 #endif 1538 while ( xmlNanoHTTPRecv(ctxt) > 0 ) 1539 ; 1540 if (nbRedirects < XML_NANO_HTTP_MAX_REDIR) { 1541 nbRedirects++; 1542 if (redirURL != NULL) 1543 xmlFree(redirURL); 1544 redirURL = xmlMemStrdup(ctxt->location); 1545 xmlNanoHTTPFreeCtxt(ctxt); 1546 goto retry; 1547 } 1548 xmlNanoHTTPFreeCtxt(ctxt); 1549 if (redirURL != NULL) xmlFree(redirURL); 1550 #ifdef DEBUG_HTTP 1551 xmlGenericError(xmlGenericErrorContext, 1552 "xmlNanoHTTPMethodRedir: Too many redirects, aborting ...\n"); 1553 #endif 1554 return(NULL); 1555 } 1556 1557 if (contentType != NULL) { 1558 if (ctxt->contentType != NULL) 1559 *contentType = xmlMemStrdup(ctxt->contentType); 1560 else 1561 *contentType = NULL; 1562 } 1563 1564 if ((redir != NULL) && (redirURL != NULL)) { 1565 *redir = redirURL; 1566 } else { 1567 if (redirURL != NULL) 1568 xmlFree(redirURL); 1569 if (redir != NULL) 1570 *redir = NULL; 1571 } 1572 1573 #ifdef DEBUG_HTTP 1574 if (ctxt->contentType != NULL) 1575 xmlGenericError(xmlGenericErrorContext, 1576 "\nCode %d, content-type '%s'\n\n", 1577 ctxt->returnValue, ctxt->contentType); 1578 else 1579 xmlGenericError(xmlGenericErrorContext, 1580 "\nCode %d, no content-type\n\n", 1581 ctxt->returnValue); 1582 #endif 1583 1584 return((void *) ctxt); 1585 } 1586 1587 /** 1588 * xmlNanoHTTPMethod: 1589 * @URL: The URL to load 1590 * @method: the HTTP method to use 1591 * @input: the input string if any 1592 * @contentType: the Content-Type information IN and OUT 1593 * @headers: the extra headers 1594 * @ilen: input length 1595 * 1596 * This function try to open a connection to the indicated resource 1597 * via HTTP using the given @method, adding the given extra headers 1598 * and the input buffer for the request content. 1599 * 1600 * Returns NULL in case of failure, otherwise a request handler. 1601 * The contentType, if provided must be freed by the caller 1602 */ 1603 1604 void* 1605 xmlNanoHTTPMethod(const char *URL, const char *method, const char *input, 1606 char **contentType, const char *headers, int ilen) { 1607 return(xmlNanoHTTPMethodRedir(URL, method, input, contentType, 1608 NULL, headers, ilen)); 1609 } 1610 1611 /** 1612 * xmlNanoHTTPFetch: 1613 * @URL: The URL to load 1614 * @filename: the filename where the content should be saved 1615 * @contentType: if available the Content-Type information will be 1616 * returned at that location 1617 * 1618 * This function try to fetch the indicated resource via HTTP GET 1619 * and save it's content in the file. 1620 * 1621 * Returns -1 in case of failure, 0 in case of success. The contentType, 1622 * if provided must be freed by the caller 1623 */ 1624 int 1625 xmlNanoHTTPFetch(const char *URL, const char *filename, char **contentType) { 1626 void *ctxt = NULL; 1627 char *buf = NULL; 1628 int fd; 1629 int len; 1630 int ret = 0; 1631 1632 if (filename == NULL) return(-1); 1633 ctxt = xmlNanoHTTPOpen(URL, contentType); 1634 if (ctxt == NULL) return(-1); 1635 1636 if (!strcmp(filename, "-")) 1637 fd = 0; 1638 else { 1639 fd = open(filename, O_CREAT | O_WRONLY, 00644); 1640 if (fd < 0) { 1641 xmlNanoHTTPClose(ctxt); 1642 if ((contentType != NULL) && (*contentType != NULL)) { 1643 xmlFree(*contentType); 1644 *contentType = NULL; 1645 } 1646 return(-1); 1647 } 1648 } 1649 1650 xmlNanoHTTPFetchContent( ctxt, &buf, &len ); 1651 if ( len > 0 ) { 1652 if (write(fd, buf, len) == -1) { 1653 ret = -1; 1654 } 1655 } 1656 1657 xmlNanoHTTPClose(ctxt); 1658 close(fd); 1659 return(ret); 1660 } 1661 1662 #ifdef LIBXML_OUTPUT_ENABLED 1663 /** 1664 * xmlNanoHTTPSave: 1665 * @ctxt: the HTTP context 1666 * @filename: the filename where the content should be saved 1667 * 1668 * This function saves the output of the HTTP transaction to a file 1669 * It closes and free the context at the end 1670 * 1671 * Returns -1 in case of failure, 0 in case of success. 1672 */ 1673 int 1674 xmlNanoHTTPSave(void *ctxt, const char *filename) { 1675 char *buf = NULL; 1676 int fd; 1677 int len; 1678 int ret = 0; 1679 1680 if ((ctxt == NULL) || (filename == NULL)) return(-1); 1681 1682 if (!strcmp(filename, "-")) 1683 fd = 0; 1684 else { 1685 fd = open(filename, O_CREAT | O_WRONLY, 0666); 1686 if (fd < 0) { 1687 xmlNanoHTTPClose(ctxt); 1688 return(-1); 1689 } 1690 } 1691 1692 xmlNanoHTTPFetchContent( ctxt, &buf, &len ); 1693 if ( len > 0 ) { 1694 if (write(fd, buf, len) == -1) { 1695 ret = -1; 1696 } 1697 } 1698 1699 xmlNanoHTTPClose(ctxt); 1700 close(fd); 1701 return(ret); 1702 } 1703 #endif /* LIBXML_OUTPUT_ENABLED */ 1704 1705 /** 1706 * xmlNanoHTTPReturnCode: 1707 * @ctx: the HTTP context 1708 * 1709 * Get the latest HTTP return code received 1710 * 1711 * Returns the HTTP return code for the request. 1712 */ 1713 int 1714 xmlNanoHTTPReturnCode(void *ctx) { 1715 xmlNanoHTTPCtxtPtr ctxt = (xmlNanoHTTPCtxtPtr) ctx; 1716 1717 if (ctxt == NULL) return(-1); 1718 1719 return(ctxt->returnValue); 1720 } 1721 1722 /** 1723 * xmlNanoHTTPAuthHeader: 1724 * @ctx: the HTTP context 1725 * 1726 * Get the authentication header of an HTTP context 1727 * 1728 * Returns the stashed value of the WWW-Authenticate or Proxy-Authenticate 1729 * header. 1730 */ 1731 const char * 1732 xmlNanoHTTPAuthHeader(void *ctx) { 1733 xmlNanoHTTPCtxtPtr ctxt = (xmlNanoHTTPCtxtPtr) ctx; 1734 1735 if (ctxt == NULL) return(NULL); 1736 1737 return(ctxt->authHeader); 1738 } 1739 1740 /** 1741 * xmlNanoHTTPContentLength: 1742 * @ctx: the HTTP context 1743 * 1744 * Provides the specified content length from the HTTP header. 1745 * 1746 * Return the specified content length from the HTTP header. Note that 1747 * a value of -1 indicates that the content length element was not included in 1748 * the response header. 1749 */ 1750 int 1751 xmlNanoHTTPContentLength( void * ctx ) { 1752 xmlNanoHTTPCtxtPtr ctxt = (xmlNanoHTTPCtxtPtr)ctx; 1753 1754 return ( ( ctxt == NULL ) ? -1 : ctxt->ContentLength ); 1755 } 1756 1757 /** 1758 * xmlNanoHTTPRedir: 1759 * @ctx: the HTTP context 1760 * 1761 * Provides the specified redirection URL if available from the HTTP header. 1762 * 1763 * Return the specified redirection URL or NULL if not redirected. 1764 */ 1765 const char * 1766 xmlNanoHTTPRedir( void * ctx ) { 1767 xmlNanoHTTPCtxtPtr ctxt = (xmlNanoHTTPCtxtPtr)ctx; 1768 1769 return ( ( ctxt == NULL ) ? NULL : ctxt->location ); 1770 } 1771 1772 /** 1773 * xmlNanoHTTPEncoding: 1774 * @ctx: the HTTP context 1775 * 1776 * Provides the specified encoding if specified in the HTTP headers. 1777 * 1778 * Return the specified encoding or NULL if not available 1779 */ 1780 const char * 1781 xmlNanoHTTPEncoding( void * ctx ) { 1782 xmlNanoHTTPCtxtPtr ctxt = (xmlNanoHTTPCtxtPtr)ctx; 1783 1784 return ( ( ctxt == NULL ) ? NULL : ctxt->encoding ); 1785 } 1786 1787 /** 1788 * xmlNanoHTTPMimeType: 1789 * @ctx: the HTTP context 1790 * 1791 * Provides the specified Mime-Type if specified in the HTTP headers. 1792 * 1793 * Return the specified Mime-Type or NULL if not available 1794 */ 1795 const char * 1796 xmlNanoHTTPMimeType( void * ctx ) { 1797 xmlNanoHTTPCtxtPtr ctxt = (xmlNanoHTTPCtxtPtr)ctx; 1798 1799 return ( ( ctxt == NULL ) ? NULL : ctxt->mimeType ); 1800 } 1801 1802 /** 1803 * xmlNanoHTTPFetchContent: 1804 * @ctx: the HTTP context 1805 * @ptr: pointer to set to the content buffer. 1806 * @len: integer pointer to hold the length of the content 1807 * 1808 * Check if all the content was read 1809 * 1810 * Returns 0 if all the content was read and available, returns 1811 * -1 if received content length was less than specified or an error 1812 * occurred. 1813 */ 1814 static int 1815 xmlNanoHTTPFetchContent( void * ctx, char ** ptr, int * len ) { 1816 xmlNanoHTTPCtxtPtr ctxt = (xmlNanoHTTPCtxtPtr)ctx; 1817 1818 int rc = 0; 1819 int cur_lgth; 1820 int rcvd_lgth; 1821 int dummy_int; 1822 char * dummy_ptr = NULL; 1823 1824 /* Dummy up return input parameters if not provided */ 1825 1826 if ( len == NULL ) 1827 len = &dummy_int; 1828 1829 if ( ptr == NULL ) 1830 ptr = &dummy_ptr; 1831 1832 /* But can't work without the context pointer */ 1833 1834 if ( ( ctxt == NULL ) || ( ctxt->content == NULL ) ) { 1835 *len = 0; 1836 *ptr = NULL; 1837 return ( -1 ); 1838 } 1839 1840 rcvd_lgth = ctxt->inptr - ctxt->content; 1841 1842 while ( (cur_lgth = xmlNanoHTTPRecv( ctxt )) > 0 ) { 1843 1844 rcvd_lgth += cur_lgth; 1845 if ( (ctxt->ContentLength > 0) && (rcvd_lgth >= ctxt->ContentLength) ) 1846 break; 1847 } 1848 1849 *ptr = ctxt->content; 1850 *len = rcvd_lgth; 1851 1852 if ( ( ctxt->ContentLength > 0 ) && ( rcvd_lgth < ctxt->ContentLength ) ) 1853 rc = -1; 1854 else if ( rcvd_lgth == 0 ) 1855 rc = -1; 1856 1857 return ( rc ); 1858 } 1859 1860 #ifdef STANDALONE 1861 int main(int argc, char **argv) { 1862 char *contentType = NULL; 1863 1864 if (argv[1] != NULL) { 1865 if (argv[2] != NULL) 1866 xmlNanoHTTPFetch(argv[1], argv[2], &contentType); 1867 else 1868 xmlNanoHTTPFetch(argv[1], "-", &contentType); 1869 if (contentType != NULL) xmlFree(contentType); 1870 } else { 1871 xmlGenericError(xmlGenericErrorContext, 1872 "%s: minimal HTTP GET implementation\n", argv[0]); 1873 xmlGenericError(xmlGenericErrorContext, 1874 "\tusage %s [ URL [ filename ] ]\n", argv[0]); 1875 } 1876 xmlNanoHTTPCleanup(); 1877 xmlMemoryDump(); 1878 return(0); 1879 } 1880 #endif /* STANDALONE */ 1881 #else /* !LIBXML_HTTP_ENABLED */ 1882 #ifdef STANDALONE 1883 #include <stdio.h> 1884 int main(int argc, char **argv) { 1885 xmlGenericError(xmlGenericErrorContext, 1886 "%s : HTTP support not compiled in\n", argv[0]); 1887 return(0); 1888 } 1889 #endif /* STANDALONE */ 1890 #endif /* LIBXML_HTTP_ENABLED */ 1891