1 #ifndef lint
2 static char rcsId[]="$Header$";
3 #endif
4 /*****
5 * HTTP.c : A first attempt at a simple HTTP library.
6 *
7 * This file Version $Revision$
8 *
9 * Creation date: Tue Oct 21 01:41:31 GMT+0100 1997
10 * Last modification: $Date$
11 * By: $Author$
12 * Current State: $State$
13 *
14 * Author: Richard Offer
15 *
16 * Copyright (C) 1994-1997 by Richard Offer <offer@sgi.com>
17 * All Rights Reserved
18 *
19 * This library is free software; you can redistribute it and/or
20 * modify it under the terms of the GNU Library General Public
21 * License as published by the Free Software Foundation; either
22 * version 2 of the License, or (at your option) any later version.
23 *
24 * This library is distributed in the hope that it will be useful,
25 * but WITHOUT ANY WARRANTY; without even the implied warranty of
26 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
27 * Library General Public License for more details.
28 *
29 * You should have received a copy of the GNU Library General Public
30 * License along with this library; if not, write to the Free
31 * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
32 *
33 * Note from the Author:
34 *
35 * A first attempt at a simple HTTP library, mainly used as a test harness for
36 * the forms work in XmHTML, it does lots of bad things and isn't a complete
37 * implementation. I didn't use the W3C libww 'cause its too big and doesn't
38 * seem to work for POSTs --- rmo
39 *
40 * The code is based on a quick read of the HTTP 1.0 rfc, with ideas for
41 * implementation taken from the Chimera Browser.
42 *
43 *****/
44 /*****
45 * ChangeLog
46 * $Log$
47 * Revision 1.1 2011/06/30 16:10:37 rwcox
48 * Cadd
49 *
50 * Revision 1.1 1997/10/23 00:28:23 newt
51 * Initial Revision
52 *
53 *****/
54 #ifdef HAVE_CONFIG_H
55 #include "config.h"
56 #endif
57
58 #include <stdio.h>
59 #include <stdlib.h>
60 #include <string.h>
61 #ifdef HAVE_STRINGS_H
62 #include <strings.h>
63 #endif
64 #include <unistd.h>
65 #include <ctype.h>
66 #include <errno.h>
67 #include <sys/time.h>
68 #ifdef HAVE_SYS_SELECT_H
69 #include <sys/select.h> /* select() */
70 #endif
71 #include <sys/types.h>
72 #include <sys/socket.h>
73 #include <netdb.h>
74 #include <netinet/in.h>
75 #include <arpa/inet.h>
76
77 #ifndef SO_RCVTIMEO
78 #include <setjmp.h>
79 #include <signal.h>
80 #endif
81
82 #include <http/HTTPP.h>
83
84 #ifdef DMALLOC
85 #include <dmalloc.h>
86 #endif /* DMALLOC */
87
88 #ifdef NEED_SOCKS
89 /* This is _very_ firewall specific, this works for me (after much trial and
90 * error --- offer */
91 #include "socks.h"
92
93 #define connect Rconnect
94 #endif /* NEED_SOCKS */
95
96 /*** External Function Prototype Declarations ***/
97
98 /*** Public Variable Declarations ***/
99 #ifdef DEBUG
100 int http_debug = 0;
101 #endif
102
103 /*** Private Datatype Declarations ****/
104
105 /*** Private Function Prototype Declarations ****/
106
107 /* delete a no longer required response */
108 static void deleteResponse(HTTPResponse * res);
109
110 /* create a new response */
111 static HTTPResponse *newResponse(char *buf);
112
113 /*****
114 * hexify src and append to dest. Return value points to the next available
115 * position in dest.
116 *****/
117 static char *appendHex(char *dest, char *src);
118
119 /* convert all name-value pairs to a valid QUERY_STRING format */
120 static char *encodeFormData(HTTPNamedValues * formdata);
121
122 #ifndef SO_RCVTIMEO
123 static void connectTimeout(int signal);
124 #endif
125
126 /*** Private Variable Declarations ***/
127 #ifndef SO_RCVTIMEO
128 static jmp_buf http_setjmp_buffer;
129 #endif
130
131 #ifndef SO_RCVTIMEO
132 static void
connectTimeout(int signal)133 connectTimeout(int signal)
134 {
135 if(signal == SIGALRM)
136 longjmp(http_setjmp_buffer, 1);
137 }
138 #endif
139
140 /* This is the main routine for sending a request and getting a response,
141 * everything else in this file is waffle */
142 void
loadHTTPURL(void * unused,HTTPRequest * request,HTTPCookieRequest * cookieReq)143 loadHTTPURL(void *unused, HTTPRequest * request, HTTPCookieRequest *cookieReq)
144 {
145 struct hostent *server;
146 struct sockaddr_in name;
147 int sock;
148 char *scheme = NULL, *username = NULL, *password = NULL;
149 char *hostname = NULL, *filename = NULL;
150 int port;
151 char *buf = NULL;
152 size_t offset = 0, bufsize = 0;
153 HTTPResponse *res;
154 ssize_t val;
155 fd_set rfds;
156 struct timeval tv;
157 int retval, retry_count, nreads;
158 char *cookie = NULL ;
159
160 /* see if we have an URI */
161 if(request->url == NULL)
162 {
163 request->ret = HTTPBadURL;
164 return;
165 }
166
167 /* verify request type */
168 if(request->type != HTTPLoadToString &&
169 request->type != HTTPLoadToFile)
170 {
171 request->ret = HTTPBadLoadType;
172 return;
173 }
174
175 /* resolve the url */
176 parseURL(request->url, PARSE_URL, &scheme, &username, &password,
177 &hostname, &port, &filename);
178
179 re_issue_request:
180 /* check protocol */
181 if(scheme == NULL || strncasecmp(scheme, "http", 4))
182 {
183 /* free off the output from parseURL() */
184 freeURL(PARSE_URL, scheme, username, password, hostname, port,
185 filename);
186 request->ret = HTTPBadProtocol;
187 return;
188 }
189
190 #ifdef DEBUG
191 if(http_debug)
192 fprintf(stderr, "Lookin up host %s...\n", hostname);
193 #endif
194
195 /* see if we can resolve the host */
196 if((server = gethostbyname(hostname)) == NULL)
197 {
198 freeURL(PARSE_URL, scheme, username, password, hostname, port,
199 filename);
200 request->ret = HTTPBadHost;
201 return;
202 }
203
204 /* we've got the host, open a socket */
205 if((sock = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)) < 0)
206 {
207 freeURL(PARSE_URL, scheme, username, password, hostname, port,
208 filename);
209 request->ret = HTTPNoSocket;
210 return;
211 }
212 #ifdef DEBUG
213 if(http_debug)
214 fprintf(stderr, "Found, connecting to %s (port %i)\n", hostname, port);
215 #endif
216
217 name.sin_family = AF_INET;
218 name.sin_port = htons(port);
219
220 #ifdef linux
221 memcpy(&name.sin_addr, server->h_addr, server->h_length);
222 #else
223 memcpy(&name.sin_addr.s_addr, server->h_addr, server->h_length);
224 #endif
225
226 /*****
227 * Wouldn't the world be easy if each system knew SO_RCVTIMEO.
228 * But this is not the case on at least linux, so we use a brute force
229 * approach: alarm.
230 *
231 * Just in case your system can enable timeouts on sockets, here's a piece
232 * of code that should work.
233 *****/
234 #ifdef SO_RCVTIMEO
235 /* set socket timeout */
236 tv.tv_sec = request->timeout;
237 tv.tv_usec = 0;
238 if(setsockopt(sock, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(struct timeval)))
239 {
240 freeURL(PARSE_URL, scheme, username, password, hostname, port,
241 filename);
242 request->ret = HTTPNoSocket;
243 close(sock);
244 return;
245 }
246 #else
247 if(setjmp(http_setjmp_buffer))
248 {
249 freeURL(PARSE_URL, scheme, username, password, hostname, port,
250 filename);
251 #ifdef DEBUG
252 if(http_debug)
253 fprintf(stderr, "connect() timed out\n");
254 #endif
255 request->ret = HTTPConnectTimeout;
256 signal(SIGALRM, SIG_DFL);
257 close(sock);
258 return;
259 }
260 signal(SIGALRM, connectTimeout);
261 alarm((long)request->timeout);
262 #endif
263
264 if(connect(sock, (struct sockaddr*)&name, sizeof(name)) < 0)
265 {
266 freeURL(PARSE_URL, scheme, username, password, hostname, port,
267 filename);
268
269 if(errno == EWOULDBLOCK)
270 {
271 #ifdef DEBUG
272 if(http_debug)
273 fprintf(stderr, "connect() timed out\n");
274 #endif
275 request->ret = HTTPConnectTimeout;
276 }
277 else
278 request->ret = HTTPNoConnection;
279 close(sock);
280 return;
281 }
282 #ifndef SO_RCVTIMEO
283 /* remove connection timeout */
284 signal(SIGALRM, SIG_DFL);
285 alarm(0L);
286 #endif
287
288 #ifdef DEBUG
289 if(http_debug)
290 fprintf(stderr, "sending request (%i)\n", request->method);
291 #endif
292
293 if( cookieReq != NULL && cookieReq->cookieList != NULL )
294 cookie = makeCookie(cookieReq->cookieList);
295 #ifdef DEBUG
296 if( http_debug )
297 if( cookie )
298 fprintf(stderr,"The server wants a cookie '%s'\n",cookie);
299 #endif /* _DEBUG */
300
301 switch(request->method)
302 {
303 case HTTPGET:
304 {
305 char *formStr = NULL, *reqStr = NULL;
306
307 if(request->form_data)
308 {
309 formStr = encodeFormData((HTTPNamedValues*)request->form_data);
310 }
311 reqStr = (char*)malloc(strlen(GET_METHOD) + strlen(filename) +
312 (formStr ? strlen(formStr) + 1 : 0) +
313 strlen(HTTPVERSIONHDR) + strlen(USER_AGENT) +
314 (cookie ? strlen(cookie) + 1 : 0) +
315 strlen(NEWLINE) + 3);
316 sprintf(reqStr,
317 "%s%s%s%s%s%s%s%s",
318 GET_METHOD,
319 filename,
320 (formStr ? "?" : ""), /* cgi stuff requires a ? */
321 (formStr ? formStr : ""),
322 HTTPVERSIONHDR,
323 USER_AGENT,
324 ( cookie ? cookie : "" ),
325 NEWLINE);
326 val = write(sock, reqStr, strlen(reqStr) + 1);
327 free(reqStr);
328 if(formStr)
329 free(formStr);
330
331 }
332 break;
333
334 case HTTPPOST:
335 {
336 char *formStr = NULL, *fullReqStr;
337
338 char *reqStr = (char *) malloc(strlen(POST_METHOD) +
339 strlen(filename) + strlen(HTTPVERSIONHDR) +
340 (cookie ? strlen(cookie) + 1 : 0) +
341 strlen(USER_AGENT) + strlen(NEWLINE) + 2);
342 sprintf(reqStr, "%s%s%s%s%s",
343 POST_METHOD,
344 filename,
345 HTTPVERSIONHDR,
346 USER_AGENT,
347 ( cookie ? cookie : "" ) );
348
349 if(request->form_data)
350 {
351 formStr = encodeFormData((HTTPNamedValues*)request->form_data);
352 }
353 fullReqStr = calloc(strlen(reqStr) + strlen(CONTENT_LEN) +
354 strlen(CONTENT_TYPE) + MAX_FORM_LEN +
355 (formStr ? strlen(formStr) : 0 )+ 10 /* safety */,
356 sizeof(char));
357 sprintf(fullReqStr,
358 "%s%s%d%s%s%s%s%s%s",
359 reqStr,
360 CONTENT_LEN,
361 (int)(formStr ? strlen(formStr) : 0),
362 NEWLINE,
363 CONTENT_TYPE,
364 NEWLINE,
365 NEWLINE,
366 formStr,
367 NEWLINE);
368 val = write(sock, fullReqStr, strlen(fullReqStr) + 1);
369
370 free(reqStr);
371 if(formStr)
372 free(formStr);
373 free(fullReqStr);
374 }
375 break;
376
377 case HTTPHEAD:
378 /* not sure about cookies for HEAD, supported ? */
379 {
380 char *reqStr = NULL;
381
382 reqStr = (char*)malloc(strlen(HEAD_METHOD) + strlen(filename) +
383 strlen(HTTPVERSIONHDR) + strlen(USER_AGENT) +
384 strlen(NEWLINE) + 3);
385 sprintf(reqStr,
386 "%s%s%s%s%s",
387 HEAD_METHOD,
388 filename,
389 HTTPVERSIONHDR,
390 USER_AGENT,
391 NEWLINE);
392 val = write(sock, reqStr, strlen(reqStr) + 1);
393 free(reqStr);
394 }
395 break;
396
397 default:
398 /* free off the output from parseURL() */
399 freeURL(PARSE_URL, scheme, username, password, hostname, port,
400 filename);
401 close(sock);
402 request->ret = HTTPMethodUnsupported;
403 return;
404 }
405
406 /* read output from remote HTTP server */
407 offset = 0;
408 val = 0;
409 bufsize = CHUNKSIZE;
410 buf = calloc(bufsize, sizeof(char));
411 nreads = 0;
412 retry_count = 0;
413
414 #ifdef DEBUG
415 if(http_debug)
416 fprintf(stderr, "awaiting input\n");
417 #endif
418
419 /* watch socket to see when it has input */
420 while(1)
421 {
422 /* no of bytes read from socket */
423 val = 0;
424
425 FD_ZERO(&rfds);
426 FD_SET(sock, &rfds);
427
428 /* wait up to the given no of seconds */
429 tv.tv_sec = request->timeout;
430 tv.tv_usec = 0;
431
432 retval = select(sock+1, &rfds, NULL, NULL, &tv);
433
434 if(retval)
435 {
436 #ifdef DEBUG
437 if(http_debug)
438 fprintf(stderr, "reading socket.\n");
439 #endif
440 val = read(sock, buf + offset, bufsize - offset);
441 if(val <= 0) /* error or end of input */
442 break;
443
444 #ifdef DEBUG
445 if(http_debug)
446 fprintf(stderr, "read %i bytes, offset: %i)\n", val,
447 offset);
448 #endif
449 /* keep room for at least CHUNKSIZE bytes */
450 if(bufsize - (offset + val) < CHUNKSIZE)
451 {
452 bufsize += CHUNKSIZE;
453 buf = realloc(buf, bufsize);
454 }
455 offset += val;
456 buf[offset] = '\0'; /* NULL terminate */
457 }
458 else
459 {
460 #ifdef DEBUG
461 if(http_debug)
462 fprintf(stderr, "timed out after %i seconds.\n",
463 request->timeout);
464 #endif
465 /*
466 * abort if we're timed out, have reached the maximum retry
467 * times and no input was received.
468 */
469 if(retry_count == request->retry && offset == 0)
470 {
471 close(sock);
472 free(buf);
473 request->ret = HTTPTimeout;
474 return;
475 }
476 /* break out when we have an offset and this read timed out */
477 else if(offset && val <= 0)
478 break;
479 else
480 {
481 /* read timed out before any input was received */
482 retry_count++;
483 }
484 #ifdef DEBUG
485 if(http_debug)
486 fprintf(stderr, "retrying for the %ith time.\n", retry_count);
487 #endif
488 }
489 }
490
491 /* now parse the read message for headers */
492 res = newResponse(buf);
493 free(buf);
494
495 /* set appropriate return code */
496 if(val < 0)
497 request->ret = HTTPPartialContent;
498 else
499 request->ret = res->status_code;
500
501 #if defined(PRINT_HDRS) && defined(DEBUG)
502 {
503 int i;
504 for(i = 0; i < res->num_headers; i++)
505 {
506 printf("hdr %s = %s\n", res->headers[i].name,
507 res->headers[i].value);
508 }
509 }
510 #endif
511
512 /* valid return code? */
513 if(request->ret > 199 && request->ret < 299)
514 {
515 /* get or post include data, which head does not contain */
516 if(request->method != HTTPHEAD)
517 {
518 int i;
519
520 /* parse the headers for any cookies */
521 for (i = 0; i < res->num_headers; i++ )
522 {
523 if(!strcasecmp(res->headers[i].name, "Set-Cookie"))
524 setCookie(cookieReq,SetCookie,res->headers[i].value,
525 hostname);
526 else if(!strcasecmp(res->headers[i].name, "Set-Cookie2"))
527 setCookie(cookieReq, SetCookie2, res->headers[i].value,
528 hostname);
529 }
530
531 /* store data in string (most likely this was a cgi request) */
532 if(request->type == HTTPLoadToString)
533 {
534 size_t len = (res->data ? strlen((char *) res->data) : 0);
535 for (i = 0; i < res->num_headers; i++)
536 {
537 if(!strcasecmp(res->headers[i].name, "Content-length"))
538 len = atoi(res->headers[i].value);
539 }
540 request->out_data = calloc(len + 1, sizeof(char));
541 memcpy((void *) request->out_data, res->data, len);
542 request->out_data[len] = '\0';
543 request->length = len;
544 }
545 else if(request->type == HTTPLoadToFile)
546 {
547 /* this was a request for a remote file. Save it */
548 FILE *fp;
549
550 if((fp = fopen((char *) request->in_data, "w")) == NULL)
551 {
552 request->ret = HTTPCannotCreateFile;
553 }
554 else
555 {
556 int i;
557 size_t len = (res->data ? strlen((char *) res->data) : 0);
558 size_t written;
559 for (i = 0; i < res->num_headers; i++)
560 {
561 if(!strcasecmp(res->headers[i].name, "Content-length"))
562 len = atoi(res->headers[i].value);
563 }
564 /* flush data */
565 written = fwrite(res->data, sizeof(char), len, fp);
566 fflush(fp);
567 fclose(fp);
568 }
569 }
570 }
571 else
572 {
573 /* store data in string (most likely this was a cgi request) */
574 if(request->type == HTTPLoadToString)
575 {
576 /*****
577 * Transfer header array from the result structure to the
578 * request.
579 *****/
580 request->headers = res->headers;
581 request->num_headers = res->num_headers;
582 res->headers = NULL;
583 res->num_headers = 0;
584 }
585 else if(request->type == HTTPLoadToFile)
586 {
587 /* this was a request for a remote file. Save it */
588 FILE *fp;
589
590 if((fp = fopen((char *) request->in_data, "w")) == NULL)
591 {
592 request->ret = HTTPCannotCreateFile;
593 }
594 else
595 {
596 int i;
597 for (i = 0; i < res->num_headers; i++)
598 {
599 fprintf(fp, "%s = %s\n", res->headers[i].name,
600 res->headers[i].value);
601 }
602 /* flush data */
603 fflush(fp);
604 fclose(fp);
605 }
606 }
607 }
608 }
609 else
610 {
611 /*****
612 * if the URL has moved (_or_ the user left off a trailing '/' from a
613 * directory request), then look in the Location: header for the
614 * correct URL and re-issue the request --- offer dec 97
615 *****/
616 if(request->ret == 301 || request->ret == 302 )
617 {
618 int i;
619 for(i=0; i< res->num_headers; i++)
620 {
621 if(!strcasecmp(res->headers[i].name, "location"))
622 {
623 freeURL(PARSE_URL, scheme, username, password, hostname,
624 port, filename);
625
626 parseURL(res->headers[i].value, PARSE_URL, &scheme,
627 &username, &password, &hostname, &port, &filename);
628 free(request->url);
629 /*****
630 * Update the URL that was requested to point to the
631 * correct one
632 *****/
633 request->url = NewString(res->headers[i].value);
634 goto re_issue_request;
635
636 }
637 }
638 }
639 }
640 deleteResponse(res);
641
642 /* free off the output from parseURL() */
643 freeURL(PARSE_URL, scheme, username, password, hostname, port, filename);
644
645 /* all done */
646 close(sock);
647 }
648
649 void
deleteHTTPRequest(HTTPRequest * req)650 deleteHTTPRequest(HTTPRequest * req)
651 {
652 int i;
653
654 if(req->in_data)
655 free(req->in_data);
656
657 if(req->form_data)
658 {
659 i = 0;
660 while (req->form_data[i].name != NULL)
661 {
662 if(req->form_data[i].name)
663 free(req->form_data[i].name);
664 if(req->form_data[i].value)
665 free(req->form_data[i].value);
666 i++;
667 }
668 free(req->form_data);
669 }
670
671 for(i = 0; i < req->num_headers; i++)
672 {
673 if(req->headers[i].name)
674 free(req->headers[i].name);
675
676 if(req->headers[i].value)
677 free(req->headers[i].value);
678 }
679 if(req->headers)
680 free(req->headers);
681
682 if(req->out_data)
683 free(req->out_data);
684 if(req->url)
685 free(req->url);
686
687 free(req);
688
689 }
690
691 HTTPRequest *
newHTTPRequest(void)692 newHTTPRequest(void)
693 {
694 HTTPRequest *new_r = (HTTPRequest *) calloc(1, sizeof(HTTPRequest));
695
696 new_r->type = HTTPLoadToString;
697 new_r->in_data = NULL;
698 new_r->form_data = NULL;
699 new_r->out_data = NULL;
700 new_r->method = HTTPGET;
701 new_r->url = NULL;
702 new_r->ret = HTTPInvalid;
703 new_r->timeout = DEFAULT_TIMEOUT;
704 new_r->retry = DEFAULT_RETRY;
705 new_r->headers = NULL;
706 new_r->num_headers = 0;
707
708 return(new_r);
709 }
710
711 static void
deleteResponse(HTTPResponse * res)712 deleteResponse(HTTPResponse * res)
713 {
714 int i;
715
716 if(res->data)
717 free(res->data);
718
719 for(i = 0; i < res->num_headers; i++)
720 {
721 if(res->headers[i].name)
722 free(res->headers[i].name);
723
724 if(res->headers[i].value)
725 free(res->headers[i].value);
726 }
727 if(res->headers)
728 free(res->headers);
729
730 free(res);
731
732 }
733
734 /*****
735 * unescape HTTP escaped chars.
736 * Replacement is done inline.
737 *****/
738 void
HTTPUnescapeResponse(char * buf)739 HTTPUnescapeResponse(char *buf)
740 {
741 register unsigned int x, y;
742 register char digit;
743
744 for(x = 0, y = 0; buf[y]; ++x, ++y)
745 {
746 if((buf[x] = buf[y]) == '%')
747 {
748 y++;
749 digit = (buf[y] >= 'A' ? ((buf[y] & 0xdf)-'A')+10 : (buf[y]-'0'));
750 y++;
751 digit *= 16;
752 digit += (buf[y] >= 'A' ? ((buf[y] & 0xdf)-'A')+10 : (buf[y]-'0'));
753 buf[x] = digit;
754 }
755 }
756 buf[x] = '\0';
757 }
758
759 static HTTPResponse *
newResponse(char * buf)760 newResponse(char *buf)
761 {
762
763 HTTPResponse *res = (HTTPResponse *) calloc(1, sizeof(HTTPResponse));
764 int ver, code;
765 int i, start;
766 int SOL;
767 size_t len = 0;
768 char *EOL;
769
770 if(strncasecmp(buf, "HTTP", 4))
771 {
772 res->http_version = HTTP_VERSION_09;
773 res->headers = NULL;
774 res->num_headers = 0;
775 res->status_code = HTTPInvalid;
776 res->data = (unsigned char *) NewString(buf);
777
778 return res;
779 }
780 sscanf(buf, "HTTP/1.%d %d", &ver, &code);
781
782 EOL = strstr(buf, "\r\n");
783 start = EOL - buf + 2;
784 #ifdef DEBUG
785 if(http_debug)
786 fprintf(stderr, "\nHTTP 1.%d return code = %d\n", ver, code);
787 #endif
788
789 if(ver == 0)
790 res->http_version = HTTP_VERSION_10;
791 else
792 res->http_version = HTTP_VERSION_11;
793
794 res->status_code = (HTTPRequestReturn) code;
795
796 for (i = start, SOL = start; i < strlen(buf); i++)
797 {
798 if(buf[i] == '\r' || buf[i] == '\n')
799 {
800 if(buf[i] == '\r' && buf[i + 1] && buf[i + 1] == '\n')
801 {
802 char *colon = strchr(&buf[SOL], ':');
803
804 if(colon == NULL)
805 break;
806
807 if(res->headers == NULL)
808 res->headers =
809 (HTTPNamedValues *) malloc(sizeof(HTTPNamedValues));
810 else
811 res->headers = realloc((void *)res->headers,
812 sizeof(HTTPNamedValues) * (res->num_headers + 1));
813
814 res->headers[res->num_headers].name = NewNString(&buf[SOL],
815 colon - &buf[SOL]);
816 res->headers[res->num_headers].value = NewNString(colon + 2,
817 &buf[i] - colon - 2);
818 if(!strcasecmp(res->headers[res->num_headers].name,
819 "Content-length"))
820 len = atoi(res->headers[res->num_headers].value);
821
822 res->num_headers++;
823
824 if(buf[i + 2] && buf[i + 2] == '\r' &&
825 buf[i + 3] && buf[i + 3] == '\n')
826 {
827 if(len == 0)
828 len = strlen(&buf[i + 4]);
829
830 res->data = calloc(len + 1, sizeof(char));
831 memcpy((void *) res->data, &buf[i + 4], len);
832 res->data[len] = '\0';
833
834 goto finish;
835 }
836 i++;
837 }
838 SOL = i + 1;
839 }
840 }
841 finish:
842
843 return(res);
844 }
845
846 /*****
847 * Fast lookup table to determine which characters should be left alone and
848 * which should be encoded. Much faster than the Chimera implementation -- kdh
849 * const qualifier should put it in the text segment
850 *****/
851 static const unsigned char allow[97] =
852 {/* 0 1 2 3 4 5 6 7 8 9 A B C D E F */
853 0,0,0,0,0,0,0,0,0,0,1,1,0,1,1,1, /* 2x !"#$%&'()*+,-./ */
854 1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0, /* 3x 0123456789:;<=>? */
855 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 4x @ABCDEFGHIJKLMNO */
856 1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,1, /* 5X PQRSTUVWXYZ[\]^_ */
857 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, /* 6x `abcdefghijklmno */
858 1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0 /* 7X pqrstuvwxyz{\}~ DEL */
859 };
860
861 static const char *hex = "0123456789ABCDEF";
862
863 /*****
864 * Name: appendHex
865 * Return Type: char*
866 * Description: appends src to dest, translating certain chars to their
867 * hexadecimal representation as we do;
868 * In:
869 * dest: destination buffer. This buffer must be large enough to contain
870 * the expanded source text;
871 * src: text to be appended;
872 * Returns:
873 * a ptr pointing to the next available position in dest.
874 * Note:
875 * added 97/10/21 by kdh and based on HTEscape() from libwww
876 *****/
877 static char*
appendHex(char * dest,char * src)878 appendHex(char *dest, char *src)
879 {
880 register char *ptr, *chPtr;
881
882 for(ptr = dest, chPtr = src; *chPtr!= '\0'; chPtr++)
883 {
884 /* no negative values */
885 int c = (int)((unsigned char)(*chPtr));
886 if(*chPtr == ' ') /* bloody exception */
887 *ptr++ = '+';
888 else if(c >= 32 && c <= 127 && allow[c-32])
889 *ptr++ = *chPtr; /* acceptable char */
890 else
891 {
892 *ptr++ = '%'; /* hex is following */
893 *ptr++ = hex[c >> 4];
894 *ptr++ = hex[c & 15];
895 }
896 }
897 return(ptr);
898 }
899
900 /*****
901 * Name: encodeFormData
902 * Return Type: char*
903 * Description: creates a fully valid QUERY_STRING from the given name/value
904 * pairs.
905 * In:
906 * formdata: array of name/value pairs from a form submit. Encoding
907 * terminates when a NULL name has been detected.
908 * Returns:
909 * an allocated and hex-encoded QUERY_STRING.
910 * Note:
911 * - this function is based on the corresponding one from Chimera (rmo)
912 * - 97/10/21, heavily modified by kdh
913 *****/
914 static char*
encodeFormData(HTTPNamedValues * formdata)915 encodeFormData(HTTPNamedValues * formdata)
916 {
917 char *data, *chPtr;
918 int nvalues, i, len = 0;
919
920 /*****
921 * First count how many bytes we have to allocate. Each entry gets two
922 * additional bytes: the equal sign and a spacer. Each entry is also
923 * multiplied by three to allow full expansion.
924 * Count no of entries as well.
925 *****/
926 for(i = 0; formdata[i].name != NULL; i++)
927 {
928 if(formdata[i].name)
929 {
930 len += strlen(formdata[i].name) * 3;
931 if(formdata[i].value)
932 len += strlen(formdata[i].value) * 3;
933 len += 2; /* equal sign and spacer */
934 }
935 }
936 nvalues = i;
937 /* allocate & reset query string */
938 data = (char*)calloc(len + 1, sizeof(char));
939
940 /*****
941 * Now compose query string: append & convert to hex at the same time.
942 * We can safely do this as we've already allocated room for hexadecimal
943 * expansion of the *entire* query string.
944 * Room for optimisation: appendHex could be done inline
945 *****/
946 chPtr = data;
947 for(i = 0; i < nvalues; i++)
948 {
949 if(formdata[i].name)
950 {
951 chPtr = appendHex(chPtr, formdata[i].name);
952 *chPtr++ = '=';
953 if(formdata[i].value)
954 chPtr = appendHex(chPtr, formdata[i].value);
955 *chPtr++ = '&'; /* spacer */
956 }
957 }
958 /* mask off last & */
959 data[strlen(data)-1] = '\0';
960
961 #ifdef DEBUG
962 if(http_debug)
963 {
964 fprintf(stderr, "encodeFormData, computed string length: %i, "
965 "used: %i\n", len+1, strlen(data));
966 fprintf(stderr, "return value: %s\n", data);
967 }
968 #endif
969
970 /*****
971 * Could resize data to fit exactly the no of bytes used, but I wonder
972 * if it's worth it as this data has a pretty short lifetime --- kdh
973 *****/
974
975 return(data);
976 }
977
978 /*
979 * stolen from the chimera browser --- rmo.
980 *
981 */
982 #define isspace8(a) ((a) < 33 && (a) > 0)
983
984 void
parseURL(char * url,long parse,char ** scheme,char ** username,char ** password,char ** hostname,int * port,char ** filename)985 parseURL(char *url, long parse, char **scheme, char **username,
986 char **password, char **hostname, int *port, char **filename)
987 {
988 char *start;
989 char *colon, *slash, *fslash;
990 char *at; /* username/password @ */
991 char *ucolon; /* username colon */
992 char *pcolon; /* port number colon */
993 struct _part {
994 int start;
995 int len;
996 } sp, up, pwp, hp, pp, fp;
997
998 sp.start = 0;
999 sp.len = 0;
1000 up.start = 0;
1001 up.len = 0;
1002 pwp.start = 0;
1003 pwp.len = 0;
1004 hp.start = 0;
1005 hp.len = 0;
1006 pp.start = 0;
1007 pp.len = 0;
1008 fp.start = 0;
1009 fp.len = 0;
1010
1011 if(url == NULL)
1012 return;
1013
1014 /* skip leading white-space (if any) */
1015 for (start = url; isspace8(*start); start++);
1016
1017 /* Look for indication of a scheme. */
1018 colon = strchr(start, ':');
1019
1020 /*
1021 * Search for characters that indicate the beginning of the
1022 * path/params/query/fragment part.
1023 */
1024 slash = strchr(start, '/');
1025 if(slash == NULL)
1026 slash = strchr(start, ';');
1027 if(slash == NULL)
1028 slash = strchr(start, '?');
1029 if(slash == NULL)
1030 slash = strchr(start, '#');
1031
1032 /*
1033 * Check to see if there is a scheme. There is a scheme only if
1034 * all other separators appear after the colon.
1035 */
1036 if(colon != NULL && (slash == NULL || colon < slash))
1037 {
1038 sp.start = 0;
1039 sp.len = colon - start;
1040 }
1041 /*
1042 * If there is a slash then sort out the hostname and filename.
1043 * If there is no slash then there is no hostname but there is a
1044 * filename.
1045 */
1046 if(slash != NULL)
1047 {
1048 /* Check for leading //. If its there then there is a host string. */
1049 if((*(slash + 1) == '/') && ((colon == NULL && slash == start) ||
1050 (colon != NULL && slash == colon + 1)))
1051 {
1052 /* Check for filename at end of host string */
1053 slash += 2;
1054 if((fslash = strchr(slash, '/')) != NULL)
1055 {
1056 hp.start = slash - start;;
1057 hp.len = fslash - slash;
1058 fp.start = fslash - start;
1059 fp.len = strlen(fslash);
1060 }
1061 else
1062 { /* there is no filename */
1063 hp.start = slash - start;
1064 hp.len = strlen(slash);
1065 }
1066 }
1067 else
1068 {
1069 /*
1070 * the rest is a filename because there is no // or it appears
1071 * after other characters
1072 */
1073 if(colon != NULL && colon < slash)
1074 {
1075 fp.start = colon + 1 - start;
1076 fp.len = strlen(colon + 1);
1077 }
1078 else
1079 {
1080 fp.start = slash - start;
1081 fp.len = strlen(slash);
1082 }
1083 }
1084 }
1085 else
1086 {
1087 /* No slashes at all so the rest must be a filename */
1088 if(colon == NULL)
1089 {
1090 fp.start = 0;
1091 fp.len = strlen(start);
1092 }
1093 else
1094 {
1095 fp.start = colon - start + 1;
1096 fp.len = strlen(colon + 1);
1097 }
1098 }
1099
1100 /*
1101 * If there is a host string then divide it into
1102 * username:password@hostname:port as needed.
1103 */
1104 if(hp.len != 0)
1105 {
1106 /* Look for username:password. */
1107 if((at = strchr(&url[hp.start], '@')) != NULL)
1108 {
1109
1110 up.start = hp.start;
1111 up.len = at - start - hp.start;
1112
1113 hp.start = at + 1 - start;
1114
1115 if((ucolon = strchr(&url[up.start], ':')) != NULL)
1116 {
1117 if(ucolon - start < hp.start)
1118 {
1119 pwp.start = ucolon + 1 - start;
1120 pwp.len = hp.start - pwp.start;
1121 }
1122 }
1123 }
1124 /* Grab the port. */
1125 if((pcolon = strchr(&url[hp.start], ':')) != NULL &&
1126 pcolon < ( &url[hp.start + hp.len]) )
1127 {
1128 pp.start = pcolon + 1 - start;
1129 pp.len = fp.start - pp.start;
1130 hp.len -= pp.len + 1;
1131 }
1132 }
1133
1134 /* now have all the fragments, make them into strings */
1135
1136 if(parse & PARSE_SCHEME)
1137 {
1138 if(sp.len > 0)
1139 *scheme = NewNString(&url[sp.start], sp.len);
1140 else
1141 *scheme = NULL;
1142 }
1143 if(parse & PARSE_USER)
1144 {
1145 if(up.len > 0)
1146 *username = NewNString(&url[up.start], up.len);
1147 else
1148 *username = NULL;
1149 }
1150 if(parse & PARSE_PASSWORD)
1151 {
1152
1153 if(pwp.len > 0)
1154 *password = NewNString(&url[pwp.start], pwp.len);
1155 else
1156 *password = NULL;
1157 }
1158 if(parse & PARSE_HOSTNAME)
1159 {
1160 if(hp.len > 0)
1161 *hostname = NewNString(&url[hp.start], hp.len);
1162 else
1163 *hostname = NULL;
1164 }
1165 if(parse & PARSE_PORT)
1166 {
1167 if(pp.len > 0)
1168 {
1169 char *tmp = NewNString(&url[pp.start], pp.len);
1170 *port = atoi(tmp);
1171 free(tmp);
1172 }
1173 else
1174 *port = 80;
1175 }
1176 if(parse & PARSE_FILENAME)
1177 {
1178 if(fp.len > 0)
1179 *filename = NewString(&url[fp.start]);
1180 else
1181 *filename = NewString("/");
1182 }
1183 return;
1184 }
1185
1186 /* this is brain dead, needs to be expanded to cover non http schemes -- rmo */
1187
1188 int
HTTPAbsoluteURL(char * url)1189 HTTPAbsoluteURL(char *url)
1190 {
1191 if(strncasecmp(url, "http", 4))
1192 return(0);
1193 else
1194 return(1);
1195 }
1196
1197 /* This is a very flakey routine and it needs a lot of work, it doesn't
1198 do compression of full paths, but it proved adequet for simple testing */
1199
1200 char *
HTTPFindAbsoluteURL(char * url,char * baseUrl)1201 HTTPFindAbsoluteURL(char *url, char *baseUrl)
1202 {
1203 char new_url[1024];
1204 char *tmpP;
1205
1206 char *u_scheme, *u_username, *u_password, *u_hostname, *u_filename;
1207 char *b_scheme, *b_username, *b_password, *b_hostname, *b_filename;
1208 int u_port, b_port;
1209
1210 if(baseUrl == NULL || *baseUrl == '\0')
1211 return (NewString(url));
1212
1213 parseURL(url, PARSE_URL, &u_scheme, &u_username, &u_password,
1214 &u_hostname, &u_port, &u_filename);
1215
1216 parseURL(baseUrl, PARSE_URL, &b_scheme, &b_username, &b_password,
1217 &b_hostname, &b_port, &b_filename);
1218
1219 if(u_scheme)
1220 sprintf(new_url, "%s://", u_scheme);
1221 else
1222 sprintf(new_url, "%s://", b_scheme);
1223
1224 if(u_hostname)
1225 strcat(new_url, u_hostname);
1226 else if(b_hostname)
1227 strcat(new_url, b_hostname);
1228 else
1229 strcat(new_url, "localhost");
1230
1231 if(u_filename && u_filename[0] == '/')
1232 {
1233 strcat(new_url, u_filename);
1234 }
1235 else if(u_filename && u_filename[0] == '~')
1236 {
1237 strcat(new_url, u_filename);
1238 strcat(new_url, "/");
1239 }
1240 else
1241 {
1242 if(b_filename == NULL || b_filename[0] != '/')
1243 printf("still to do\n");
1244 else
1245 {
1246 strcat(new_url, b_filename);
1247 tmpP = strrchr(new_url, '/');
1248 if(*tmpP++)
1249 {
1250 *tmpP = '\0';
1251 strcat(tmpP, u_filename);
1252 }
1253 else
1254 strcat(new_url, u_filename);
1255 }
1256 }
1257 freeURL(PARSE_URL, u_scheme, u_username, u_password, u_hostname,
1258 u_port, u_filename);
1259
1260 freeURL(PARSE_URL, b_scheme, b_username, b_password, b_hostname,
1261 b_port, b_filename);
1262
1263 return (NewString(new_url));
1264 }
1265
1266 void
freeURL(long parse,char * scheme,char * username,char * password,char * hostname,int port,char * filename)1267 freeURL(long parse, char *scheme, char *username, char *password,
1268 char *hostname, int port, char *filename)
1269 {
1270
1271 if((parse & PARSE_SCHEME) && scheme)
1272 free(scheme);
1273
1274 if((parse & PARSE_USER) && username)
1275 free(username);
1276
1277 if((parse & PARSE_PASSWORD) && password)
1278 free(password);
1279
1280 if((parse & PARSE_HOSTNAME) && hostname)
1281 free(hostname);
1282
1283 if((parse & PARSE_FILENAME) && filename)
1284 free(filename);
1285 }
1286
1287 void
HTTPError(char * msg,HTTPRequestReturn error)1288 HTTPError(char *msg, HTTPRequestReturn error)
1289 {
1290 fprintf(stderr, "%s: %s.\n", msg, HTTPErrorString(error));
1291 }
1292
1293 const char*
HTTPErrorString(HTTPRequestReturn error)1294 HTTPErrorString(HTTPRequestReturn error)
1295 {
1296 switch(error)
1297 {
1298 /* 0 and up (client messages) */
1299 case HTTPInvalid:
1300 return("Invalid request (client failure)");
1301 case HTTPBadProtocol:
1302 return("Invalid protocol requested (client failure)");
1303 case HTTPBadHost:
1304 return("Invalid hostname (client failure)");
1305 case HTTPBadURL:
1306 return("Invalid URL (client failure)");
1307 case HTTPBadLoadType:
1308 return("Invalid load type (client failure)");
1309 case HTTPMethodUnsupported:
1310 return("Unsupported method (client failure)");
1311 case HTTPNoSocket:
1312 return("Could not open socket (client failure)");
1313 case HTTPNoConnection:
1314 return("Not connected (client failure)");
1315 case HTTPBadHttp10:
1316 return("Invalid HTTP/1.0 request (client failure)");
1317 case HTTPCannotCreateFile:
1318 return("Could not create file (client failure)");
1319 case HTTPConnectTimeout:
1320 return("Could not connect: timed out (client failure)");
1321 case HTTPTimeout:
1322 return("Connection timed out");
1323
1324 /* 100 and up (informative messages) */
1325 case HTTPContinue:
1326 return("Continue");
1327 case HTTPSwitchProtocols:
1328 return("Bad protocol, switch required");
1329
1330 /* 200 and up (request succeeded) */
1331 case HTTPSuccess:
1332 return("No error");
1333 case HTTPCreated:
1334 return("Document created");
1335 case HTTPAccepted:
1336 return("Request accepted");
1337 case HTTPNonAuthoritativeInfo:
1338 return("Non-authoritative information");
1339 case HTTPNoContent:
1340 return("Document is empty");
1341 case HTTPResetContent:
1342 return("Content has been reset");
1343 case HTTPPartialContent:
1344 return("Partial content");
1345
1346 /* 300 and up (non-fatal errors, retry possible) */
1347 case HTTPMultipleChoices:
1348 return("Request not unique, multiple choices possible");
1349 case HTTPPermMoved:
1350 return("Document has been permanently removed");
1351 case HTTPTempMoved:
1352 return("Document has been temporarely moved");
1353 case HTTPSeeOther:
1354 return("Site has move");
1355 case HTTPNotModified:
1356 return("Document not modified since last access");
1357 case HTTPUseProxy:
1358 return("Document only accessible through proxy");
1359
1360 /* 400 and up (fatal request errors) */
1361 case HTTPBadRequest:
1362 return("Invalid HTTP request");
1363 case HTTPUnauthorised:
1364 return("Client not authorized");
1365 case HTTPPaymentReq:
1366 return("Payment required");
1367 case HTTPForbidden:
1368 return("Access forbidden");
1369 case HTTPNotFound:
1370 return("Document not found");
1371 case HTTPMethodNotAllowed:
1372 return("Access method not allowed");
1373 case HTTPNotAcceptable:
1374 return("Unacceptable request");
1375 case HTTPProxyAuthReq:
1376 return("Proxy authorization required");
1377 case HTTPRequestTimeOut:
1378 return("Timed out");
1379 case HTTPConflict:
1380 return("Conflict of interest");
1381 case HTTPGone:
1382 return("Document has moved");
1383 case HTTPLengthReq:
1384 return("Invalid request length");
1385 case HTTPPreCondFailed:
1386 return("Condition failed");
1387 case HTTPReqEntityTooBig:
1388 return("Request entity too large");
1389 case HTTPURITooBig:
1390 return("URI specification too big");
1391 case HTTPUnsupportedMediaType:
1392 return("Unsupported media type");
1393
1394 /* 500 and up (server errors) */
1395 case HTTPInternalServerError:
1396 return("Internal server error");
1397 case HTTPNotImplemented:
1398 return("Method not implemented");
1399 case HTTPBadGateway:
1400 return("Invalid gateway");
1401 case HTTPServiceUnavailable:
1402 return("Service unavailable");
1403 case HTTPGatewayTimeOut:
1404 return("Gateway timed out");
1405 case HTTPHTTPVersionNotSupported:
1406 return("Unsupported HTPP version");
1407
1408 default:
1409 return("unknown error");
1410 }
1411 }
1412