1 #ifndef lint
2 static char rcsId[]="$Header$";
3 #endif
4 /*****
5 * HTTP.c : A first attempt at a simple HTTP library.
6 *
7 * This file Version	$Revision$
8 *
9 * Creation date:		Tue Oct 21 01:41:31 GMT+0100 1997
10 * Last modification: 	$Date$
11 * By:					$Author$
12 * Current State:		$State$
13 *
14 * Author:				Richard Offer
15 *
16 * Copyright (C) 1994-1997 by Richard Offer <offer@sgi.com>
17 * All Rights Reserved
18 *
19 * This library is free software; you can redistribute it and/or
20 * modify it under the terms of the GNU Library General Public
21 * License as published by the Free Software Foundation; either
22 * version 2 of the License, or (at your option) any later version.
23 *
24 * This library is distributed in the hope that it will be useful,
25 * but WITHOUT ANY WARRANTY; without even the implied warranty of
26 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
27 * Library General Public License for more details.
28 *
29 * You should have received a copy of the GNU Library General Public
30 * License along with this library; if not, write to the Free
31 * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
32 *
33 * Note from the Author:
34 *
35 *	A first attempt at a simple HTTP library, mainly used as a test harness for
36 *	the forms work in XmHTML, it does lots of bad things and isn't a complete
37 *	implementation. I didn't use the W3C libww 'cause its too big and doesn't
38 *	seem to work for POSTs --- rmo
39 *
40 *	The code is based on a quick read of the HTTP 1.0 rfc, with ideas for
41 *	implementation taken from the Chimera Browser.
42 *
43 *****/
44 /*****
45 * ChangeLog
46 * $Log$
47 * Revision 1.1  2011/06/30 16:10:37  rwcox
48 * Cadd
49 *
50 * Revision 1.1  1997/10/23 00:28:23  newt
51 * Initial Revision
52 *
53 *****/
54 #ifdef HAVE_CONFIG_H
55 #include "config.h"
56 #endif
57 
58 #include <stdio.h>
59 #include <stdlib.h>
60 #include <string.h>
61 #ifdef HAVE_STRINGS_H
62 #include <strings.h>
63 #endif
64 #include <unistd.h>
65 #include <ctype.h>
66 #include <errno.h>
67 #include <sys/time.h>
68 #ifdef HAVE_SYS_SELECT_H
69 #include <sys/select.h>		/* select() */
70 #endif
71 #include <sys/types.h>
72 #include <sys/socket.h>
73 #include <netdb.h>
74 #include <netinet/in.h>
75 #include <arpa/inet.h>
76 
77 #ifndef SO_RCVTIMEO
78 #include <setjmp.h>
79 #include <signal.h>
80 #endif
81 
82 #include <http/HTTPP.h>
83 
84 #ifdef DMALLOC
85 #include <dmalloc.h>
86 #endif /* DMALLOC */
87 
88 #ifdef NEED_SOCKS
89 /* This is _very_ firewall specific, this works for me (after much trial and
90  * error --- offer */
91 #include "socks.h"
92 
93 #define connect Rconnect
94 #endif /* NEED_SOCKS */
95 
96 /*** External Function Prototype Declarations ***/
97 
98 /*** Public Variable Declarations ***/
99 #ifdef DEBUG
100 int http_debug = 0;
101 #endif
102 
103 /*** Private Datatype Declarations ****/
104 
105 /*** Private Function Prototype Declarations ****/
106 
107 /* delete a no longer required response */
108 static void deleteResponse(HTTPResponse * res);
109 
110 /* create a new response */
111 static HTTPResponse *newResponse(char *buf);
112 
113 /*****
114 * hexify src and append to dest. Return value points to the next available
115 * position in dest.
116 *****/
117 static char *appendHex(char *dest, char *src);
118 
119 /* convert all name-value pairs to a valid QUERY_STRING format */
120 static char *encodeFormData(HTTPNamedValues * formdata);
121 
122 #ifndef SO_RCVTIMEO
123 static void connectTimeout(int signal);
124 #endif
125 
126 /*** Private Variable Declarations ***/
127 #ifndef SO_RCVTIMEO
128 static jmp_buf http_setjmp_buffer;
129 #endif
130 
131 #ifndef SO_RCVTIMEO
132 static void
connectTimeout(int signal)133 connectTimeout(int signal)
134 {
135 	if(signal == SIGALRM)
136 		longjmp(http_setjmp_buffer, 1);
137 }
138 #endif
139 
140 /* This is the main routine for sending a request and getting a response,
141  * everything else in this file is waffle */
142 void
loadHTTPURL(void * unused,HTTPRequest * request,HTTPCookieRequest * cookieReq)143 loadHTTPURL(void *unused, HTTPRequest * request, HTTPCookieRequest *cookieReq)
144 {
145 	struct hostent *server;
146 	struct sockaddr_in name;
147 	int sock;
148 	char *scheme = NULL, *username = NULL, *password = NULL;
149 	char *hostname = NULL, *filename = NULL;
150 	int port;
151 	char *buf = NULL;
152 	size_t offset = 0, bufsize = 0;
153 	HTTPResponse *res;
154 	ssize_t val;
155 	fd_set rfds;
156 	struct timeval tv;
157 	int retval, retry_count, nreads;
158 	char	*cookie = NULL ;
159 
160 	/* see if we have an URI */
161 	if(request->url == NULL)
162 	{
163 		request->ret = HTTPBadURL;
164 		return;
165 	}
166 
167 	/* verify request type */
168 	if(request->type != HTTPLoadToString &&
169 		request->type != HTTPLoadToFile)
170 	{
171 		request->ret = HTTPBadLoadType;
172 		return;
173 	}
174 
175 	/* resolve the url */
176 	parseURL(request->url, PARSE_URL, &scheme, &username, &password,
177 		&hostname, &port, &filename);
178 
179 re_issue_request:
180 	/* check protocol */
181 	if(scheme == NULL || strncasecmp(scheme, "http", 4))
182 	{
183 		/* free off the output from parseURL() */
184 		freeURL(PARSE_URL, scheme, username, password, hostname, port,
185 			filename);
186 		request->ret = HTTPBadProtocol;
187 		return;
188 	}
189 
190 #ifdef DEBUG
191 	if(http_debug)
192 		fprintf(stderr, "Lookin up host %s...\n", hostname);
193 #endif
194 
195 	/* see if we can resolve the host */
196 	if((server = gethostbyname(hostname)) == NULL)
197 	{
198 		freeURL(PARSE_URL, scheme, username, password, hostname, port,
199 			filename);
200 		request->ret = HTTPBadHost;
201 		return;
202 	}
203 
204 	/* we've got the host, open a socket */
205 	if((sock = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)) < 0)
206 	{
207 		freeURL(PARSE_URL, scheme, username, password, hostname, port,
208 			filename);
209 		request->ret = HTTPNoSocket;
210 		return;
211 	}
212 #ifdef DEBUG
213 	if(http_debug)
214 		fprintf(stderr, "Found, connecting to %s (port %i)\n", hostname, port);
215 #endif
216 
217 	name.sin_family = AF_INET;
218 	name.sin_port = htons(port);
219 
220 #ifdef linux
221 	memcpy(&name.sin_addr, server->h_addr, server->h_length);
222 #else
223 	memcpy(&name.sin_addr.s_addr, server->h_addr, server->h_length);
224 #endif
225 
226 	/*****
227 	* Wouldn't the world be easy if each system knew SO_RCVTIMEO.
228 	* But this is not the case on at least linux, so we use a brute force
229 	* approach: alarm.
230 	*
231 	* Just in case your system can enable timeouts on sockets, here's a piece
232 	* of code that should work.
233 	*****/
234 #ifdef SO_RCVTIMEO
235 	/* set socket timeout */
236 	tv.tv_sec = request->timeout;
237 	tv.tv_usec = 0;
238 	if(setsockopt(sock, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(struct timeval)))
239 	{
240 		freeURL(PARSE_URL, scheme, username, password, hostname, port,
241 			filename);
242 		request->ret = HTTPNoSocket;
243 		close(sock);
244 		return;
245 	}
246 #else
247 	if(setjmp(http_setjmp_buffer))
248 	{
249 		freeURL(PARSE_URL, scheme, username, password, hostname, port,
250 			filename);
251 #ifdef DEBUG
252 		if(http_debug)
253 			fprintf(stderr, "connect() timed out\n");
254 #endif
255 		request->ret = HTTPConnectTimeout;
256 		signal(SIGALRM, SIG_DFL);
257 		close(sock);
258 		return;
259 	}
260 	signal(SIGALRM, connectTimeout);
261 	alarm((long)request->timeout);
262 #endif
263 
264 	if(connect(sock, (struct sockaddr*)&name, sizeof(name)) < 0)
265 	{
266 		freeURL(PARSE_URL, scheme, username, password, hostname, port,
267 			filename);
268 
269 		if(errno == EWOULDBLOCK)
270 		{
271 #ifdef DEBUG
272 			if(http_debug)
273 				fprintf(stderr, "connect() timed out\n");
274 #endif
275 			request->ret = HTTPConnectTimeout;
276 		}
277 		else
278 			request->ret = HTTPNoConnection;
279 		close(sock);
280 		return;
281 	}
282 #ifndef SO_RCVTIMEO
283 	/* remove connection timeout */
284 	signal(SIGALRM, SIG_DFL);
285 	alarm(0L);
286 #endif
287 
288 #ifdef DEBUG
289 	if(http_debug)
290 		fprintf(stderr, "sending request (%i)\n", request->method);
291 #endif
292 
293 	if( cookieReq != NULL && cookieReq->cookieList != NULL )
294 		cookie = makeCookie(cookieReq->cookieList);
295 #ifdef DEBUG
296 	if( http_debug )
297 		if( cookie )
298 			fprintf(stderr,"The server wants a cookie '%s'\n",cookie);
299 #endif /* _DEBUG */
300 
301 	switch(request->method)
302 	{
303 		case HTTPGET:
304 		{
305 			char *formStr = NULL, *reqStr = NULL;
306 
307 			if(request->form_data)
308 			{
309 				formStr = encodeFormData((HTTPNamedValues*)request->form_data);
310 			}
311 			reqStr = (char*)malloc(strlen(GET_METHOD) + strlen(filename) +
312 						(formStr ? strlen(formStr) + 1 : 0) +
313 						strlen(HTTPVERSIONHDR) + strlen(USER_AGENT) +
314 						(cookie ? strlen(cookie) + 1 : 0) +
315 						strlen(NEWLINE) + 3);
316 			sprintf(reqStr,
317 					"%s%s%s%s%s%s%s%s",
318 					GET_METHOD,
319 					filename,
320 					(formStr ? "?" : ""),	/* cgi stuff requires a ? */
321 					(formStr ? formStr : ""),
322 					HTTPVERSIONHDR,
323 					USER_AGENT,
324 					( cookie ? cookie : "" ),
325 					NEWLINE);
326 			val = write(sock, reqStr, strlen(reqStr) + 1);
327 			free(reqStr);
328 			if(formStr)
329 				free(formStr);
330 
331 		}
332 		break;
333 
334 		case HTTPPOST:
335 		{
336 			char *formStr = NULL, *fullReqStr;
337 
338 			char *reqStr = (char *) malloc(strlen(POST_METHOD) +
339 							strlen(filename) + strlen(HTTPVERSIONHDR) +
340 							(cookie ? strlen(cookie) + 1 : 0) +
341 							strlen(USER_AGENT) + strlen(NEWLINE) + 2);
342 			sprintf(reqStr, "%s%s%s%s%s",
343 					POST_METHOD,
344 					filename,
345 					HTTPVERSIONHDR,
346 					USER_AGENT,
347 					( cookie ? cookie : "" ) );
348 
349 			if(request->form_data)
350 			{
351 				formStr = encodeFormData((HTTPNamedValues*)request->form_data);
352 			}
353 			fullReqStr = calloc(strlen(reqStr) + strlen(CONTENT_LEN) +
354 							strlen(CONTENT_TYPE) + MAX_FORM_LEN +
355 							(formStr ? strlen(formStr) : 0 )+ 10 /* safety */,
356 							sizeof(char));
357 			sprintf(fullReqStr,
358 					"%s%s%d%s%s%s%s%s%s",
359 					reqStr,
360 					CONTENT_LEN,
361 					(int)(formStr ? strlen(formStr) : 0),
362 					NEWLINE,
363 					CONTENT_TYPE,
364 					NEWLINE,
365 					NEWLINE,
366 					formStr,
367 					NEWLINE);
368 			val = write(sock, fullReqStr, strlen(fullReqStr) + 1);
369 
370 			free(reqStr);
371 			if(formStr)
372 				free(formStr);
373 			free(fullReqStr);
374 		}
375 		break;
376 
377 		case HTTPHEAD:
378 		/* not sure about cookies for HEAD, supported ? */
379 		{
380 			char *reqStr = NULL;
381 
382 			reqStr = (char*)malloc(strlen(HEAD_METHOD) + strlen(filename) +
383 						strlen(HTTPVERSIONHDR) + strlen(USER_AGENT) +
384 						strlen(NEWLINE) + 3);
385 			sprintf(reqStr,
386 					"%s%s%s%s%s",
387 					HEAD_METHOD,
388 					filename,
389 					HTTPVERSIONHDR,
390 					USER_AGENT,
391 					NEWLINE);
392 			val = write(sock, reqStr, strlen(reqStr) + 1);
393 			free(reqStr);
394 		}
395 		break;
396 
397 		default:
398 			/* free off the output from parseURL() */
399 			freeURL(PARSE_URL, scheme, username, password, hostname, port,
400 				filename);
401 			close(sock);
402 			request->ret = HTTPMethodUnsupported;
403 			return;
404 	}
405 
406 	/* read output from remote HTTP server */
407 	offset = 0;
408 	val = 0;
409 	bufsize = CHUNKSIZE;
410 	buf = calloc(bufsize, sizeof(char));
411 	nreads = 0;
412 	retry_count = 0;
413 
414 #ifdef DEBUG
415 	if(http_debug)
416 		fprintf(stderr, "awaiting input\n");
417 #endif
418 
419 	/* watch socket to see when it has input */
420 	while(1)
421 	{
422 		/* no of bytes read from socket */
423 		val = 0;
424 
425 		FD_ZERO(&rfds);
426 		FD_SET(sock, &rfds);
427 
428 		/* wait up to the given no of seconds */
429 		tv.tv_sec = request->timeout;
430 		tv.tv_usec = 0;
431 
432 		retval = select(sock+1, &rfds, NULL, NULL, &tv);
433 
434 		if(retval)
435 		{
436 #ifdef DEBUG
437 			if(http_debug)
438 				fprintf(stderr, "reading socket.\n");
439 #endif
440 			val = read(sock, buf + offset, bufsize - offset);
441 			if(val <= 0)	/* error or end of input */
442 				break;
443 
444 #ifdef DEBUG
445 			if(http_debug)
446 				fprintf(stderr, "read %i bytes, offset: %i)\n", val,
447 					offset);
448 #endif
449 			/* keep room for at least CHUNKSIZE bytes */
450 			if(bufsize - (offset + val) < CHUNKSIZE)
451 			{
452 				bufsize += CHUNKSIZE;
453 				buf = realloc(buf, bufsize);
454 			}
455 			offset += val;
456 			buf[offset] = '\0';		/* NULL terminate */
457 		}
458 		else
459 		{
460 #ifdef DEBUG
461 			if(http_debug)
462 				fprintf(stderr, "timed out after %i seconds.\n",
463 					request->timeout);
464 #endif
465 			/*
466 			* abort if we're timed out, have reached the maximum retry
467 			* times and no input was received.
468 			*/
469 			if(retry_count == request->retry && offset == 0)
470 			{
471 				close(sock);
472 				free(buf);
473 				request->ret = HTTPTimeout;
474 				return;
475 			}
476 			/* break out when we have an offset and this read timed out */
477 			else if(offset && val <= 0)
478 				break;
479 			else
480 			{
481 				/* read timed out before any input was received */
482 				retry_count++;
483 			}
484 #ifdef DEBUG
485 			if(http_debug)
486 				fprintf(stderr, "retrying for the %ith time.\n", retry_count);
487 #endif
488 		}
489 	}
490 
491 	/* now parse the read message for headers */
492 	res = newResponse(buf);
493 	free(buf);
494 
495 	/* set appropriate return code */
496 	if(val < 0)
497 		request->ret = HTTPPartialContent;
498 	else
499 		request->ret = res->status_code;
500 
501 #if defined(PRINT_HDRS) && defined(DEBUG)
502 	{
503 		int i;
504 		for(i = 0; i < res->num_headers; i++)
505 		{
506 			printf("hdr %s = %s\n", res->headers[i].name,
507 				res->headers[i].value);
508 		}
509 	}
510 #endif
511 
512 	/* valid return code? */
513 	if(request->ret > 199 && request->ret < 299)
514 	{
515 		/* get or post include data, which head does not contain */
516 		if(request->method != HTTPHEAD)
517 		{
518 			int i;
519 
520 			/* parse the headers for any cookies */
521 			for (i = 0; i < res->num_headers; i++ )
522 			{
523 				if(!strcasecmp(res->headers[i].name, "Set-Cookie"))
524 					setCookie(cookieReq,SetCookie,res->headers[i].value,
525 						hostname);
526 				else if(!strcasecmp(res->headers[i].name, "Set-Cookie2"))
527 					setCookie(cookieReq, SetCookie2, res->headers[i].value,
528 						hostname);
529 			}
530 
531 			/* store data in string (most likely this was a cgi request) */
532 			if(request->type == HTTPLoadToString)
533 			{
534 				size_t len = (res->data ? strlen((char *) res->data) : 0);
535 				for (i = 0; i < res->num_headers; i++)
536 				{
537 					if(!strcasecmp(res->headers[i].name, "Content-length"))
538 						len = atoi(res->headers[i].value);
539 				}
540 				request->out_data = calloc(len + 1, sizeof(char));
541 				memcpy((void *) request->out_data, res->data, len);
542 				request->out_data[len] = '\0';
543 				request->length = len;
544 			}
545 			else if(request->type == HTTPLoadToFile)
546 			{
547 				/* this was a request for a remote file. Save it */
548 				FILE *fp;
549 
550 				if((fp = fopen((char *) request->in_data, "w")) == NULL)
551 				{
552 					request->ret = HTTPCannotCreateFile;
553 				}
554 				else
555 				{
556 					int i;
557 					size_t len = (res->data ? strlen((char *) res->data) : 0);
558 					size_t written;
559 					for (i = 0; i < res->num_headers; i++)
560 					{
561 						if(!strcasecmp(res->headers[i].name, "Content-length"))
562 							len = atoi(res->headers[i].value);
563 					}
564 					/* flush data */
565 					written = fwrite(res->data, sizeof(char), len, fp);
566 					fflush(fp);
567 					fclose(fp);
568 				}
569 			}
570 		}
571 		else
572 		{
573 			/* store data in string (most likely this was a cgi request) */
574 			if(request->type == HTTPLoadToString)
575 			{
576 				/*****
577 				* Transfer header array from the result structure to the
578 				* request.
579 				*****/
580 				request->headers = res->headers;
581 				request->num_headers = res->num_headers;
582 				res->headers = NULL;
583 				res->num_headers = 0;
584 			}
585 			else if(request->type == HTTPLoadToFile)
586 			{
587 				/* this was a request for a remote file. Save it */
588 				FILE *fp;
589 
590 				if((fp = fopen((char *) request->in_data, "w")) == NULL)
591 				{
592 					request->ret = HTTPCannotCreateFile;
593 				}
594 				else
595 				{
596 					int i;
597 					for (i = 0; i < res->num_headers; i++)
598 					{
599 						fprintf(fp, "%s = %s\n", res->headers[i].name,
600 							res->headers[i].value);
601 					}
602 					/* flush data */
603 					fflush(fp);
604 					fclose(fp);
605 				}
606 			}
607 		}
608 	}
609 	else
610 	{
611 		/*****
612 		* if the URL has moved (_or_ the user left off a trailing '/' from a
613 		* directory request), then look in the Location: header for the
614 		* correct URL and re-issue the request	--- offer dec 97
615 		*****/
616 		if(request->ret == 301 || request->ret == 302 )
617 		{
618 			int i;
619 			for(i=0; i<  res->num_headers; i++)
620 			{
621 				if(!strcasecmp(res->headers[i].name, "location"))
622 				{
623 					freeURL(PARSE_URL, scheme, username, password, hostname,
624 						port, filename);
625 
626 					parseURL(res->headers[i].value, PARSE_URL, &scheme,
627 						&username, &password, &hostname, &port, &filename);
628 					free(request->url);
629 					/*****
630 					* Update the URL that was requested to point to the
631 					* correct one
632 					*****/
633 					request->url = NewString(res->headers[i].value);
634 					goto re_issue_request;
635 
636 				}
637 			}
638 		}
639 	}
640 	deleteResponse(res);
641 
642 	/* free off the output from parseURL() */
643 	freeURL(PARSE_URL, scheme, username, password, hostname, port, filename);
644 
645 	/* all done */
646 	close(sock);
647 }
648 
649 void
deleteHTTPRequest(HTTPRequest * req)650 deleteHTTPRequest(HTTPRequest * req)
651 {
652 	int i;
653 
654 	if(req->in_data)
655 		free(req->in_data);
656 
657 	if(req->form_data)
658 	{
659 		i = 0;
660 		while (req->form_data[i].name != NULL)
661 		{
662 			if(req->form_data[i].name)
663 				free(req->form_data[i].name);
664 			if(req->form_data[i].value)
665 				free(req->form_data[i].value);
666 			i++;
667 		}
668 		free(req->form_data);
669 	}
670 
671 	for(i = 0; i < req->num_headers; i++)
672 	{
673 		if(req->headers[i].name)
674 			free(req->headers[i].name);
675 
676 		if(req->headers[i].value)
677 			free(req->headers[i].value);
678 	}
679 	if(req->headers)
680 		free(req->headers);
681 
682 	if(req->out_data)
683 		free(req->out_data);
684 	if(req->url)
685 		free(req->url);
686 
687 	free(req);
688 
689 }
690 
691 HTTPRequest *
newHTTPRequest(void)692 newHTTPRequest(void)
693 {
694 	HTTPRequest *new_r = (HTTPRequest *) calloc(1, sizeof(HTTPRequest));
695 
696 	new_r->type = HTTPLoadToString;
697 	new_r->in_data = NULL;
698 	new_r->form_data = NULL;
699 	new_r->out_data = NULL;
700 	new_r->method = HTTPGET;
701 	new_r->url = NULL;
702 	new_r->ret = HTTPInvalid;
703 	new_r->timeout = DEFAULT_TIMEOUT;
704 	new_r->retry   = DEFAULT_RETRY;
705 	new_r->headers = NULL;
706 	new_r->num_headers = 0;
707 
708 	return(new_r);
709 }
710 
711 static void
deleteResponse(HTTPResponse * res)712 deleteResponse(HTTPResponse * res)
713 {
714 	int i;
715 
716 	if(res->data)
717 		free(res->data);
718 
719 	for(i = 0; i < res->num_headers; i++)
720 	{
721 		if(res->headers[i].name)
722 			free(res->headers[i].name);
723 
724 		if(res->headers[i].value)
725 			free(res->headers[i].value);
726 	}
727 	if(res->headers)
728 		free(res->headers);
729 
730 	free(res);
731 
732 }
733 
734 /*****
735 * unescape HTTP escaped chars.
736 * Replacement is done inline.
737 *****/
738 void
HTTPUnescapeResponse(char * buf)739 HTTPUnescapeResponse(char *buf)
740 {
741 	register unsigned int x, y;
742 	register char digit;
743 
744 	for(x = 0, y = 0; buf[y]; ++x, ++y)
745 	{
746 		if((buf[x] = buf[y]) == '%')
747 		{
748 			y++;
749 			digit = (buf[y] >= 'A' ? ((buf[y] & 0xdf)-'A')+10 : (buf[y]-'0'));
750 			y++;
751 			digit *= 16;
752 			digit += (buf[y] >= 'A' ? ((buf[y] & 0xdf)-'A')+10 : (buf[y]-'0'));
753 			buf[x] = digit;
754 		}
755 	}
756 	buf[x] = '\0';
757 }
758 
759 static HTTPResponse *
newResponse(char * buf)760 newResponse(char *buf)
761 {
762 
763 	HTTPResponse *res = (HTTPResponse *) calloc(1, sizeof(HTTPResponse));
764 	int ver, code;
765 	int i, start;
766 	int SOL;
767 	size_t len = 0;
768 	char *EOL;
769 
770 	if(strncasecmp(buf, "HTTP", 4))
771 	{
772 		res->http_version = HTTP_VERSION_09;
773 		res->headers = NULL;
774 		res->num_headers = 0;
775 		res->status_code = HTTPInvalid;
776 		res->data = (unsigned char *) NewString(buf);
777 
778 		return res;
779 	}
780 	sscanf(buf, "HTTP/1.%d %d", &ver, &code);
781 
782 	EOL = strstr(buf, "\r\n");
783 	start = EOL - buf + 2;
784 #ifdef DEBUG
785 	if(http_debug)
786 		fprintf(stderr, "\nHTTP 1.%d return code = %d\n", ver, code);
787 #endif
788 
789 	if(ver == 0)
790 		res->http_version = HTTP_VERSION_10;
791 	else
792 		res->http_version = HTTP_VERSION_11;
793 
794 	res->status_code = (HTTPRequestReturn) code;
795 
796 	for (i = start, SOL = start; i < strlen(buf); i++)
797 	{
798 		if(buf[i] == '\r' || buf[i] == '\n')
799 		{
800 			if(buf[i] == '\r' && buf[i + 1] && buf[i + 1] == '\n')
801 			{
802 				char *colon = strchr(&buf[SOL], ':');
803 
804 				if(colon == NULL)
805 					break;
806 
807 				if(res->headers == NULL)
808 					res->headers =
809 						(HTTPNamedValues *) malloc(sizeof(HTTPNamedValues));
810 				else
811 					res->headers = realloc((void *)res->headers,
812 						sizeof(HTTPNamedValues) * (res->num_headers + 1));
813 
814 				res->headers[res->num_headers].name = NewNString(&buf[SOL],
815 						colon - &buf[SOL]);
816 				res->headers[res->num_headers].value = NewNString(colon + 2,
817 						&buf[i] - colon - 2);
818 				if(!strcasecmp(res->headers[res->num_headers].name,
819 					"Content-length"))
820 					len = atoi(res->headers[res->num_headers].value);
821 
822 				res->num_headers++;
823 
824 				if(buf[i + 2] && buf[i + 2] == '\r' &&
825 					buf[i + 3] && buf[i + 3] == '\n')
826 				{
827 					if(len == 0)
828 						len = strlen(&buf[i + 4]);
829 
830 					res->data = calloc(len + 1, sizeof(char));
831 					memcpy((void *) res->data, &buf[i + 4], len);
832 					res->data[len] = '\0';
833 
834 					goto finish;
835 				}
836 				i++;
837 			}
838 			SOL = i + 1;
839 		}
840 	}
841   finish:
842 
843 	return(res);
844 }
845 
846 /*****
847 * Fast lookup table to determine which characters should be left alone and
848 * which should be encoded. Much faster than the Chimera implementation -- kdh
849 * const qualifier should put it in the text segment
850 *****/
851 static const unsigned char allow[97] =
852 {/* 0 1 2 3 4 5 6 7 8 9 A B C D E F */
853 	0,0,0,0,0,0,0,0,0,0,1,1,0,1,1,1,	/* 2x   !"#$%&'()*+,-./  */
854 	1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,	/* 3x  0123456789:;<=>?  */
855 	1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,	/* 4x  @ABCDEFGHIJKLMNO  */
856 	1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,1,	/* 5X  PQRSTUVWXYZ[\]^_  */
857 	0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,	/* 6x  `abcdefghijklmno  */
858 	1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0 	/* 7X  pqrstuvwxyz{\}~  DEL */
859 };
860 
861 static const char *hex = "0123456789ABCDEF";
862 
863 /*****
864 * Name:			appendHex
865 * Return Type: 	char*
866 * Description: 	appends src to dest, translating certain chars to their
867 *				hexadecimal representation as we do;
868 * In:
869 *	dest:		destination buffer. This buffer must be large enough to contain
870 *				the expanded source text;
871 *	src:		text to be appended;
872 * Returns:
873 *	a ptr pointing to the next available position in dest.
874 * Note:
875 *	added 97/10/21 by kdh and based on HTEscape() from libwww
876 *****/
877 static char*
appendHex(char * dest,char * src)878 appendHex(char *dest, char *src)
879 {
880 	register char *ptr, *chPtr;
881 
882 	for(ptr = dest, chPtr = src; *chPtr!= '\0'; chPtr++)
883 	{
884 		/* no negative values */
885 		int c = (int)((unsigned char)(*chPtr));
886 		if(*chPtr == ' ')	/* bloody exception */
887 			*ptr++ = '+';
888 		else if(c >= 32 && c <= 127 && allow[c-32])
889 			*ptr++ = *chPtr; /* acceptable char */
890 		else
891 		{
892 			*ptr++ = '%';	/* hex is following */
893 			*ptr++ = hex[c >> 4];
894 			*ptr++ = hex[c & 15];
895 		}
896 	}
897 	return(ptr);
898 }
899 
900 /*****
901 * Name: 		encodeFormData
902 * Return Type: 	char*
903 * Description: 	creates a fully valid QUERY_STRING from the given name/value
904 *				pairs.
905 * In:
906 *	formdata:	array of name/value pairs from a form submit. Encoding
907 *				terminates when a NULL name has been detected.
908 * Returns:
909 *	an allocated and hex-encoded QUERY_STRING.
910 * Note:
911 *	- this function is based on the corresponding one from Chimera (rmo)
912 *	- 97/10/21, heavily modified by kdh
913 *****/
914 static char*
encodeFormData(HTTPNamedValues * formdata)915 encodeFormData(HTTPNamedValues * formdata)
916 {
917 	char *data, *chPtr;
918 	int nvalues, i, len = 0;
919 
920 	/*****
921 	* First count how many bytes we have to allocate. Each entry gets two
922 	* additional bytes: the equal sign and a spacer. Each entry is also
923 	* multiplied by three to allow full expansion.
924 	* Count no of entries as well.
925 	*****/
926 	for(i = 0; formdata[i].name != NULL; i++)
927 	{
928 		if(formdata[i].name)
929 		{
930 			len += strlen(formdata[i].name) * 3;
931 			if(formdata[i].value)
932 				len += strlen(formdata[i].value) * 3;
933 			len += 2;	/* equal sign and spacer */
934 		}
935 	}
936 	nvalues = i;
937 	/* allocate & reset query string */
938 	data = (char*)calloc(len + 1, sizeof(char));
939 
940 	/*****
941 	* Now compose query string: append & convert to hex at the same time.
942 	* We can safely do this as we've already allocated room for hexadecimal
943 	* expansion of the *entire* query string.
944 	* Room for optimisation: appendHex could be done inline
945 	*****/
946 	chPtr = data;
947 	for(i = 0; i < nvalues; i++)
948 	{
949 		if(formdata[i].name)
950 		{
951 			chPtr = appendHex(chPtr, formdata[i].name);
952 			*chPtr++ = '=';
953 			if(formdata[i].value)
954 				chPtr = appendHex(chPtr, formdata[i].value);
955 			*chPtr++ = '&';	/* spacer */
956 		}
957 	}
958 	/* mask off last & */
959 	data[strlen(data)-1] = '\0';
960 
961 #ifdef DEBUG
962 	if(http_debug)
963 	{
964 		fprintf(stderr, "encodeFormData, computed string length: %i, "
965 			"used: %i\n", len+1, strlen(data));
966 		fprintf(stderr, "return value: %s\n", data);
967 	}
968 #endif
969 
970 	/*****
971 	* Could resize data to fit exactly the no of bytes used, but I wonder
972 	* if it's worth it as this data has a pretty short lifetime --- kdh
973 	*****/
974 
975 	return(data);
976 }
977 
978 /*
979  * stolen from the chimera browser --- rmo.
980  *
981  */
982 #define isspace8(a) ((a) < 33 && (a) > 0)
983 
984 void
parseURL(char * url,long parse,char ** scheme,char ** username,char ** password,char ** hostname,int * port,char ** filename)985 parseURL(char *url, long parse, char **scheme, char **username,
986 	char **password, char **hostname, int *port, char **filename)
987 {
988 	char *start;
989 	char *colon, *slash, *fslash;
990 	char *at;					/* username/password @ */
991 	char *ucolon;				/* username colon */
992 	char *pcolon;				/* port number colon */
993 	struct _part {
994 		int start;
995 		int len;
996 	} sp, up, pwp, hp, pp, fp;
997 
998 	sp.start = 0;
999 	sp.len = 0;
1000 	up.start = 0;
1001 	up.len = 0;
1002 	pwp.start = 0;
1003 	pwp.len = 0;
1004 	hp.start = 0;
1005 	hp.len = 0;
1006 	pp.start = 0;
1007 	pp.len = 0;
1008 	fp.start = 0;
1009 	fp.len = 0;
1010 
1011 	if(url == NULL)
1012 		return;
1013 
1014 	/* skip leading white-space (if any) */
1015 	for (start = url; isspace8(*start); start++);
1016 
1017 	/* Look for indication of a scheme. */
1018 	colon = strchr(start, ':');
1019 
1020 	/*
1021 	 * Search for characters that indicate the beginning of the
1022 	 * path/params/query/fragment part.
1023 	 */
1024 	slash = strchr(start, '/');
1025 	if(slash == NULL)
1026 		slash = strchr(start, ';');
1027 	if(slash == NULL)
1028 		slash = strchr(start, '?');
1029 	if(slash == NULL)
1030 		slash = strchr(start, '#');
1031 
1032 	/*
1033 	 * Check to see if there is a scheme.  There is a scheme only if
1034 	 * all other separators appear after the colon.
1035 	 */
1036 	if(colon != NULL && (slash == NULL || colon < slash))
1037 	{
1038 		sp.start = 0;
1039 		sp.len = colon - start;
1040 	}
1041 	/*
1042 	 * If there is a slash then sort out the hostname and filename.
1043 	 * If there is no slash then there is no hostname but there is a
1044 	 * filename.
1045 	 */
1046 	if(slash != NULL)
1047 	{
1048 		/* Check for leading //. If its there then there is a host string. */
1049 		if((*(slash + 1) == '/') && ((colon == NULL && slash == start) ||
1050 								(colon != NULL && slash == colon + 1)))
1051 		{
1052 			/* Check for filename at end of host string */
1053 			slash += 2;
1054 			if((fslash = strchr(slash, '/')) != NULL)
1055 			{
1056 				hp.start = slash - start;;
1057 				hp.len = fslash - slash;
1058 				fp.start = fslash - start;
1059 				fp.len = strlen(fslash);
1060 			}
1061 			else
1062 			{	/* there is no filename */
1063 				hp.start = slash - start;
1064 				hp.len = strlen(slash);
1065 			}
1066 		}
1067 		else
1068 		{
1069 			/*
1070 			 * the rest is a filename because there is no // or it appears
1071 			 * after other characters
1072 			 */
1073 			if(colon != NULL && colon < slash)
1074 			{
1075 				fp.start = colon + 1 - start;
1076 				fp.len = strlen(colon + 1);
1077 			}
1078 			else
1079 			{
1080 				fp.start = slash - start;
1081 				fp.len = strlen(slash);
1082 			}
1083 		}
1084 	}
1085 	else
1086 	{
1087 		/* No slashes at all so the rest must be a filename */
1088 		if(colon == NULL)
1089 		{
1090 			fp.start = 0;
1091 			fp.len = strlen(start);
1092 		}
1093 		else
1094 		{
1095 			fp.start = colon - start + 1;
1096 			fp.len = strlen(colon + 1);
1097 		}
1098 	}
1099 
1100 	/*
1101 	 * If there is a host string then divide it into
1102 	 * username:password@hostname:port as needed.
1103 	 */
1104 	if(hp.len != 0)
1105 	{
1106 		/* Look for username:password. */
1107 		if((at = strchr(&url[hp.start], '@')) != NULL)
1108 		{
1109 
1110 			up.start = hp.start;
1111 			up.len = at - start - hp.start;
1112 
1113 			hp.start = at + 1 - start;
1114 
1115 			if((ucolon = strchr(&url[up.start], ':')) != NULL)
1116 			{
1117 				if(ucolon - start < hp.start)
1118 				{
1119 					pwp.start = ucolon + 1 - start;
1120 					pwp.len = hp.start - pwp.start;
1121 				}
1122 			}
1123 		}
1124 		/* Grab the port. */
1125 		if((pcolon = strchr(&url[hp.start], ':')) != NULL &&
1126 			pcolon < ( &url[hp.start + hp.len]) )
1127 		{
1128 			pp.start = pcolon + 1 - start;
1129 			pp.len = fp.start - pp.start;
1130 			hp.len -= pp.len + 1;
1131 		}
1132 	}
1133 
1134 	/* now have all the fragments, make them into strings */
1135 
1136 	if(parse & PARSE_SCHEME)
1137 	{
1138 		if(sp.len > 0)
1139 			*scheme = NewNString(&url[sp.start], sp.len);
1140 		else
1141 			*scheme = NULL;
1142 	}
1143 	if(parse & PARSE_USER)
1144 	{
1145 		if(up.len > 0)
1146 			*username = NewNString(&url[up.start], up.len);
1147 		else
1148 			*username = NULL;
1149 	}
1150 	if(parse & PARSE_PASSWORD)
1151 	{
1152 
1153 		if(pwp.len > 0)
1154 			*password = NewNString(&url[pwp.start], pwp.len);
1155 		else
1156 			*password = NULL;
1157 	}
1158 	if(parse & PARSE_HOSTNAME)
1159 	{
1160 		if(hp.len > 0)
1161 			*hostname = NewNString(&url[hp.start], hp.len);
1162 		else
1163 			*hostname = NULL;
1164 	}
1165 	if(parse & PARSE_PORT)
1166 	{
1167 		if(pp.len > 0)
1168 		{
1169 			char *tmp = NewNString(&url[pp.start], pp.len);
1170 			*port = atoi(tmp);
1171 			free(tmp);
1172 		}
1173 		else
1174 			*port = 80;
1175 	}
1176 	if(parse & PARSE_FILENAME)
1177 	{
1178 		if(fp.len > 0)
1179 			*filename = NewString(&url[fp.start]);
1180 		else
1181 			*filename = NewString("/");
1182 	}
1183 	return;
1184 }
1185 
1186 /* this is brain dead, needs to be expanded to cover non http schemes -- rmo */
1187 
1188 int
HTTPAbsoluteURL(char * url)1189 HTTPAbsoluteURL(char *url)
1190 {
1191 	if(strncasecmp(url, "http", 4))
1192 		return(0);
1193 	else
1194 		return(1);
1195 }
1196 
1197 /* This is a very flakey routine and it needs a lot of work, it doesn't
1198    do compression of full paths, but it proved adequet for simple testing */
1199 
1200 char *
HTTPFindAbsoluteURL(char * url,char * baseUrl)1201 HTTPFindAbsoluteURL(char *url, char *baseUrl)
1202 {
1203 	char new_url[1024];
1204 	char *tmpP;
1205 
1206 	char *u_scheme, *u_username, *u_password, *u_hostname, *u_filename;
1207 	char *b_scheme, *b_username, *b_password, *b_hostname, *b_filename;
1208 	int u_port, b_port;
1209 
1210 	if(baseUrl == NULL || *baseUrl == '\0')
1211 		return (NewString(url));
1212 
1213 	parseURL(url, PARSE_URL, &u_scheme, &u_username, &u_password,
1214 		&u_hostname, &u_port, &u_filename);
1215 
1216 	parseURL(baseUrl, PARSE_URL, &b_scheme, &b_username, &b_password,
1217 		&b_hostname, &b_port, &b_filename);
1218 
1219 	if(u_scheme)
1220 		sprintf(new_url, "%s://", u_scheme);
1221 	else
1222 		sprintf(new_url, "%s://", b_scheme);
1223 
1224 	if(u_hostname)
1225 		strcat(new_url, u_hostname);
1226 	else if(b_hostname)
1227 		strcat(new_url, b_hostname);
1228 	else
1229 		strcat(new_url, "localhost");
1230 
1231 	if(u_filename && u_filename[0] == '/')
1232 	{
1233 		strcat(new_url, u_filename);
1234 	}
1235 	else if(u_filename && u_filename[0] == '~')
1236 	{
1237 		strcat(new_url, u_filename);
1238 		strcat(new_url, "/");
1239 	}
1240 	else
1241 	{
1242 		if(b_filename == NULL || b_filename[0] != '/')
1243 			printf("still to do\n");
1244 		else
1245 		{
1246 			strcat(new_url, b_filename);
1247 			tmpP = strrchr(new_url, '/');
1248 			if(*tmpP++)
1249 			{
1250 				*tmpP = '\0';
1251 				strcat(tmpP, u_filename);
1252 			}
1253 			else
1254 				strcat(new_url, u_filename);
1255 		}
1256 	}
1257 	freeURL(PARSE_URL, u_scheme, u_username, u_password, u_hostname,
1258 		u_port, u_filename);
1259 
1260 	freeURL(PARSE_URL, b_scheme, b_username, b_password, b_hostname,
1261 		b_port, b_filename);
1262 
1263 	return (NewString(new_url));
1264 }
1265 
1266 void
freeURL(long parse,char * scheme,char * username,char * password,char * hostname,int port,char * filename)1267 freeURL(long parse, char *scheme, char *username, char *password,
1268 	char *hostname, int port, char *filename)
1269 {
1270 
1271 	if((parse & PARSE_SCHEME) && scheme)
1272 		free(scheme);
1273 
1274 	if((parse & PARSE_USER) && username)
1275 		free(username);
1276 
1277 	if((parse & PARSE_PASSWORD) && password)
1278 		free(password);
1279 
1280 	if((parse & PARSE_HOSTNAME) && hostname)
1281 		free(hostname);
1282 
1283 	if((parse & PARSE_FILENAME) && filename)
1284 		free(filename);
1285 }
1286 
1287 void
HTTPError(char * msg,HTTPRequestReturn error)1288 HTTPError(char *msg, HTTPRequestReturn error)
1289 {
1290 	fprintf(stderr, "%s: %s.\n", msg, HTTPErrorString(error));
1291 }
1292 
1293 const char*
HTTPErrorString(HTTPRequestReturn error)1294 HTTPErrorString(HTTPRequestReturn error)
1295 {
1296 	switch(error)
1297 	{
1298 		/* 0 and up (client messages) */
1299 		case HTTPInvalid:
1300 			return("Invalid request (client failure)");
1301 		case HTTPBadProtocol:
1302 			return("Invalid protocol requested (client failure)");
1303 		case HTTPBadHost:
1304 			return("Invalid hostname (client failure)");
1305 		case HTTPBadURL:
1306 			return("Invalid URL (client failure)");
1307 		case HTTPBadLoadType:
1308 			return("Invalid load type (client failure)");
1309 		case HTTPMethodUnsupported:
1310 			return("Unsupported method (client failure)");
1311 		case HTTPNoSocket:
1312 			return("Could not open socket (client failure)");
1313 		case HTTPNoConnection:
1314 			return("Not connected (client failure)");
1315 		case HTTPBadHttp10:
1316 			return("Invalid HTTP/1.0 request (client failure)");
1317 		case HTTPCannotCreateFile:
1318 			return("Could not create file (client failure)");
1319 		case HTTPConnectTimeout:
1320 			return("Could not connect: timed out (client failure)");
1321 		case HTTPTimeout:
1322 			return("Connection timed out");
1323 
1324 		/* 100 and up (informative messages) */
1325 		case HTTPContinue:
1326 			return("Continue");
1327 		case HTTPSwitchProtocols:
1328 			return("Bad protocol, switch required");
1329 
1330 		/* 200 and up (request succeeded) */
1331 		case HTTPSuccess:
1332 			return("No error");
1333 		case HTTPCreated:
1334 			return("Document created");
1335 		case HTTPAccepted:
1336 			return("Request accepted");
1337 		case HTTPNonAuthoritativeInfo:
1338 			return("Non-authoritative information");
1339 		case HTTPNoContent:
1340 			return("Document is empty");
1341 		case HTTPResetContent:
1342 			return("Content has been reset");
1343 		case HTTPPartialContent:
1344 			return("Partial content");
1345 
1346 		/* 300 and up (non-fatal errors, retry possible) */
1347 		case HTTPMultipleChoices:
1348 			return("Request not unique, multiple choices possible");
1349 		case HTTPPermMoved:
1350 			return("Document has been permanently removed");
1351 		case HTTPTempMoved:
1352 			return("Document has been temporarely moved");
1353 		case HTTPSeeOther:
1354 			return("Site has move");
1355 		case HTTPNotModified:
1356 			return("Document not modified since last access");
1357 		case HTTPUseProxy:
1358 			return("Document only accessible through proxy");
1359 
1360 		/* 400 and up (fatal request errors) */
1361 		case HTTPBadRequest:
1362 			return("Invalid HTTP request");
1363 		case HTTPUnauthorised:
1364 			return("Client not authorized");
1365 		case HTTPPaymentReq:
1366 			return("Payment required");
1367 		case HTTPForbidden:
1368 			return("Access forbidden");
1369 		case HTTPNotFound:
1370 			return("Document not found");
1371 		case HTTPMethodNotAllowed:
1372 			return("Access method not allowed");
1373 		case HTTPNotAcceptable:
1374 			return("Unacceptable request");
1375 		case HTTPProxyAuthReq:
1376 			return("Proxy authorization required");
1377 		case HTTPRequestTimeOut:
1378 			return("Timed out");
1379 		case HTTPConflict:
1380 			return("Conflict of interest");
1381 		case HTTPGone:
1382 			return("Document has moved");
1383 		case HTTPLengthReq:
1384 			return("Invalid request length");
1385 		case HTTPPreCondFailed:
1386 			return("Condition failed");
1387 		case HTTPReqEntityTooBig:
1388 			return("Request entity too large");
1389 		case HTTPURITooBig:
1390 			return("URI specification too big");
1391 		case HTTPUnsupportedMediaType:
1392 			return("Unsupported media type");
1393 
1394 		/* 500 and up (server errors) */
1395 		case HTTPInternalServerError:
1396 			return("Internal server error");
1397 		case HTTPNotImplemented:
1398 			return("Method not implemented");
1399 		case HTTPBadGateway:
1400 			return("Invalid gateway");
1401 		case HTTPServiceUnavailable:
1402 			return("Service unavailable");
1403 		case HTTPGatewayTimeOut:
1404 			return("Gateway timed out");
1405 		case HTTPHTTPVersionNotSupported:
1406 			return("Unsupported HTPP version");
1407 
1408 		default:
1409 			return("unknown error");
1410 	}
1411 }
1412