1 /*
2  * http_fetcher.c - HTTP handling functions
3  *
4  * HTTP Fetcher Copyright (C) 2001, 2003, 2004 Lyle Hanson
5  * (lhanson@users.sourceforge.net)
6  *
7  * This library is free software; you can redistribute it and/or modify it under
8  * the terms of the GNU Library General Public License as published by the
9  * Free Software Foundation; either version 2 of the License, or (at your
10  * option) any later version.
11  *
12  * This library is distributed in the hope that it will be useful, but WITHOUT
13  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
15  * License for more details.
16  *
17  * See LICENSE file for details
18  *
19  * Modified by Yann Orlarey, Grame to be used within Faust (2013/01/23)
20  *
21  */
22 
23 #include <cstdlib>
24 #include <stdlib.h>
25 #include <stdio.h>
26 #include <string.h>
27 #include <ctype.h>
28 #include <errno.h>
29 #include <sys/types.h>
30 #ifndef _WIN32
31 #include <strings.h>
32 #include <netdb.h>
33 #include <unistd.h>
34 #include <netinet/in.h>
35 #include <sys/socket.h>
36 #include <sys/time.h>
37 #define herror perror
38 #else
39 #include <winsock2.h>
40 #define close closesocket
41 #define write(s, buf, len) send(s, buf, (int)(len), 0)
42 #define read(s, buf, len) recv(s, buf, (int)(len), 0)
43 #define rindex strchr
44 #define herror perror
45 
46 #pragma warning(disable: 4996)
47 
48 #endif
49 #include "compatibility.hh"
50 #include "sourcefetcher.hh"
51 
52 #define VERSION "0.2"
53 
54 /* Globals */
55 int timeout = DEFAULT_READ_TIMEOUT;
56 char* userAgent = NULL;
57 char* referer = NULL;
58 int hideUserAgent = 0;
59 int hideReferer = 1;
60 static int followRedirects = DEFAULT_REDIRECTS;	/* # of redirects to  follow */
61 extern const char* http_errlist[];              /* Array of HTTP Fetcher error messages */
62 extern char convertedError[128];                /* Buffer to used when errors contain %d */
63 static int errorSource = 0;
64 static int http_errno = 0;
65 static int errorInt = 0;                        /* When the error message has a %d in it, this variable is inserted */
66 
67 const char* http_errlist[] =
68 {
69 	"Success",                                          /* HF_SUCCESS		*/
70 	"Internal Error. What the hell?!",                  /* HF_METAERROR		*/
71 	"Got NULL url",                                     /* HF_NULLURL		*/
72 	"Timed out, no metadata for %d seconds",            /* HF_HEADTIMEOUT 	*/
73 	"Timed out, no data for %d seconds",                /* HF_DATATIMEOUT	*/
74 	"Couldn't find return code in HTTP response",       /* HF_FRETURNCODE	*/
75 	"Couldn't convert return code in HTTP response",	/* HF_CRETURNCODE	*/
76 	"Request returned a status code of %d",             /* HF_STATUSCODE	*/
77 	"Couldn't convert Content-Length to integer",       /* HF_CONTENTLEN	*/
78 	"Network error, description unavailable",           /* HF_HERROR		*/
79 	"Status code of %d but no Location: field",         /* HF_CANTREDIRECT  */
80 	"Followed the maximum number of redirects (%d)"     /* HF_MAXREDIRECTS  */
81 };
82 
83 /*
84  * Used to copy in messages from http_errlist[] and replace %d's with the
85  * value of errorInt.  Then we can pass the pointer to THIS
86  */
87 char convertedError[128];
88 
89 /*
90  * Actually downloads the page, registering a hit (donation) If the fileBuf
91  * passed in is NULL, the url is downloaded and then freed; otherwise the
92  * necessary space is allocated for fileBuf. Returns size of download on
93  * success, -1 on error is set.
94  */
http_fetch(const char * url_tmp,char ** fileBuf)95 int http_fetch(const char* url_tmp, char** fileBuf)
96 {
97 	fd_set rfds;
98 	struct timeval tv;
99 	char headerBuf [HEADER_BUF_SIZE];
100 	char* tmp, *url, *pageBuf, *requestBuf = NULL, *host, *charIndex;
101 	int sock, bytesRead = 0, contentLength = -1, bufsize = REQUEST_BUF_SIZE;
102 	int i, ret = -1, tempSize, selectRet, found = 0,	/* For redirects */
103         redirectsFollowed = 0;
104 
105 	if (url_tmp == NULL) {
106 		errorSource = FETCHER_ERROR;
107 		http_errno = HF_NULLURL;
108 		return -1;
109 	}
110 	/*
111 	 * Copy the url passed in into a buffer we can work with, change,
112 	 * etc.
113 	 */
114 	url = (char*)malloc(strlen(url_tmp) + 1);
115 	if (url == NULL) {
116 		errorSource = ERRNO;
117 		return -1;
118 	}
119 	strncpy(url, url_tmp, strlen(url_tmp) + 1);
120 
121 	/*
122 	 * This loop allows us to follow redirects if need be.  An
123 	 * afterthought, added to provide this basic functionality.  Will
124 	 * hopefully be designed better in 2.x.x ;)
125 	 */
126 	/*
127 	 * while(!found && (followRedirects < 0 || redirectsFollowed <
128 	 * followRedirects))
129 	 */
130 	do {
131 		/* Seek to the file path portion of the url */
132 		charIndex = strstr(url, "://");
133 		if (charIndex != NULL) {
134 			/* url contains a protocol field */
135 			charIndex += strlen("://");
136 			host = charIndex;
137 			charIndex = strchr(charIndex, '/');
138 		} else {
139 			host = (char* )url;
140 			charIndex = strchr(url, '/');
141 		}
142 
143 		/* Compose a request string */
144 		requestBuf = (char*)malloc(bufsize);
145 		if (requestBuf == NULL) {
146 			free(url);
147 			errorSource = ERRNO;
148 			return -1;
149 		}
150 		requestBuf[0] = 0;
151 
152 		if (charIndex == NULL) {
153 			/*
154 			 * The url has no '/' in it, assume the user is
155 			 * making a root-level request
156 			 */
157 			tempSize = (int)strlen("GET /") + (int)strlen(HTTP_VERSION) + 2;
158 			if (_checkBufSize(&requestBuf, &bufsize, tempSize) ||
159 			    snprintf(requestBuf, bufsize, "GET / %s\r\n", HTTP_VERSION) < 0) {
160 				free(url);
161 				free(requestBuf);
162 				errorSource = ERRNO;
163 				return -1;
164 			}
165 		} else {
166 			tempSize = (int)strlen("GET ") + (int)strlen(charIndex) +
167 				(int)strlen(HTTP_VERSION) + 4;
168 			/* + 4 is for ' ', '\r', '\n', and NULL */
169 			if (_checkBufSize(&requestBuf, &bufsize, tempSize) ||
170 			    snprintf(requestBuf, bufsize, "GET %s %s\r\n",
171 				     charIndex, HTTP_VERSION) < 0) {
172 				free(url);
173 				free(requestBuf);
174 				errorSource = ERRNO;
175 				return -1;
176 			}
177 		}
178 
179 		/* Null out the end of the hostname if need be */
180 		if (charIndex != NULL)
181 			*charIndex = 0;
182 
183 		/*
184 		 * Use Host: even though 1.0 doesn't specify it.  Some
185 		 * servers won't play nice if we don't send Host, and it
186 		 * shouldn't hurt anything
187 		 */
188 		ret = (int)bufsize - (int)strlen(requestBuf);	/* Space left in buffer */
189 		tempSize = (int)strlen("Host: ") + (int)strlen(host) + 3;
190 		/* +3 for "\r\n\0" */
191 		if (_checkBufSize(&requestBuf, &bufsize, tempSize + 128)) {
192 			free(url);
193 			free(requestBuf);
194 			errorSource = ERRNO;
195 			return -1;
196 		}
197 		strcat(requestBuf, "Host: ");
198 		strcat(requestBuf, host);
199 		strcat(requestBuf, "\r\n");
200 
201 		if (!hideReferer && referer != NULL) {	/* NO default referer */
202 			tempSize = (int)strlen("Referer: ") + (int)strlen(referer) + 3;
203 			/* + 3 is for '\r', '\n', and NULL */
204 			if (_checkBufSize(&requestBuf, &bufsize, tempSize)) {
205 				free(url);
206 				free(requestBuf);
207 				errorSource = ERRNO;
208 				return -1;
209 			}
210 			strcat(requestBuf, "Referer: ");
211 			strcat(requestBuf, referer);
212 			strcat(requestBuf, "\r\n");
213 		}
214 		if (!hideUserAgent && userAgent == NULL) {
215 			tempSize = (int)strlen("User-Agent: ") +
216 				(int)strlen(DEFAULT_USER_AGENT) + (int)strlen(VERSION) + 4;
217 			/* + 4 is for '\', '\r', '\n', and NULL */
218 			if (_checkBufSize(&requestBuf, &bufsize, tempSize)) {
219 				free(url);
220 				free(requestBuf);
221 				errorSource = ERRNO;
222 				return -1;
223 			}
224 			strcat(requestBuf, "User-Agent: ");
225 			strcat(requestBuf, DEFAULT_USER_AGENT);
226 			strcat(requestBuf, "/");
227 			strcat(requestBuf, VERSION);
228 			strcat(requestBuf, "\r\n");
229 		} else if (!hideUserAgent) {
230 			tempSize = (int)strlen("User-Agent: ") + (int)strlen(userAgent) + 3;
231 			/* + 3 is for '\r', '\n', and NULL */
232 			if (_checkBufSize(&requestBuf, &bufsize, tempSize)) {
233 				free(url);
234 				free(requestBuf);
235 				errorSource = ERRNO;
236 				return -1;
237 			}
238 			strcat(requestBuf, "User-Agent: ");
239 			strcat(requestBuf, userAgent);
240 			strcat(requestBuf, "\r\n");
241 		}
242 		tempSize = (int)strlen("Connection: Close\r\n\r\n");
243 		if (_checkBufSize(&requestBuf, &bufsize, tempSize)) {
244 			free(url);
245 			free(requestBuf);
246 			errorSource = ERRNO;
247 			return -1;
248 		}
249 		strcat(requestBuf, "Connection: Close\r\n\r\n");
250 
251 		/* Now free any excess memory allocated to the buffer */
252 		tmp = (char*)realloc(requestBuf, strlen(requestBuf) + 1);
253 		if (tmp == NULL) {
254 			free(url);
255 			free(requestBuf);
256 			errorSource = ERRNO;
257 			return -1;
258 		}
259 		requestBuf = tmp;
260 
261 		sock = makeSocket(host);	/* errorSource set within
262                                     * makeSocket */
263 		if (sock == -1) {
264 			free(url);
265 			free(requestBuf);
266 			return -1;
267 		}
268 		free(url);
269 		url = NULL;
270 
271 		if (write(sock, requestBuf, strlen(requestBuf)) == -1) {
272 			close(sock);
273 			free(requestBuf);
274 			errorSource = ERRNO;
275 			return -1;
276 		}
277 		free(requestBuf);
278 		requestBuf = NULL;
279 
280 		/* Grab enough of the response to get the metadata */
281 		ret = _http_read_header(sock, headerBuf);	/* errorSource set
282                                                     * within */
283 		if (ret < 0) {
284 			close(sock);
285 			return -1;
286 		}
287 		/* Get the return code */
288 		charIndex = strstr(headerBuf, "HTTP/");
289 		if (charIndex == NULL) {
290 			close(sock);
291 			errorSource = FETCHER_ERROR;
292 			http_errno = HF_FRETURNCODE;
293 			return -1;
294 		}
295 		while (*charIndex != ' ')
296 			charIndex++;
297 		charIndex++;
298 
299 		ret = sscanf(charIndex, "%d", &i);
300 		if (ret != 1) {
301 			close(sock);
302 			errorSource = FETCHER_ERROR;
303 			http_errno = HF_CRETURNCODE;
304 			return -1;
305 		}
306 		if (i < 200 || i > 307) {
307 			close(sock);
308 			errorInt = i;	/* Status code, to be inserted in
309                              * error string */
310 			errorSource = FETCHER_ERROR;
311 			http_errno = HF_STATUSCODE;
312 			return -1;
313 		}
314 		/*
315 		 * If a redirect, repeat operation until final URL is found
316 		 * or we redirect followRedirects times.  Note the case
317 		 * sensitive "Location", should probably be made more robust
318 		 * in the future (without relying on the non-standard
319 		 * strcasecmp()). This bit mostly by Dean Wilder, tweaked by
320 		 * me
321 		 */
322 		if (i >= 300) {
323 			redirectsFollowed++;
324 
325 			/*
326 			 * Pick up redirect URL, allocate new url, and repeat
327 			 * process
328 			 */
329 			charIndex = strstr(headerBuf, "Location:");
330 			if (!charIndex) {
331 				close(sock);
332 				errorInt = i;	/* Status code, to be
333                                  * inserted in error string */
334 				errorSource = FETCHER_ERROR;
335 				http_errno = HF_CANTREDIRECT;
336 				return -1;
337 			}
338 			charIndex += strlen("Location:");
339 			/* Skip any whitespace... */
340 			while (*charIndex != '\0' && isspace(*charIndex))
341 				charIndex++;
342 			if (*charIndex == '\0') {
343 				close(sock);
344 				errorInt = i;	/* Status code, to be
345                                  * inserted in error string */
346 				errorSource = FETCHER_ERROR;
347 				http_errno = HF_CANTREDIRECT;
348 				return -1;
349 			}
350 			i = (int)strcspn(charIndex, " \r\n");
351 			if (i > 0) {
352 				url = (char*)malloc(i + 1);
353 				strncpy(url, charIndex, i);
354 				url[i] = '\0';
355 			} else
356 				/*
357 				 * Found 'Location:' but contains no URL!
358 				 * We'll handle it as 'found', hopefully the
359 				 * resulting document will give the user a
360 				 * hint as to what happened.
361 				 */
362 				found = 1;
363 		} else {
364 			found = 1;
365 		}
366 	} while (!found && (followRedirects < 0 || redirectsFollowed <= followRedirects));
367 
368 	if (url) {		/* Redirection code may malloc this, then
369                      * exceed followRedirects */
370 		free(url);
371 		url = NULL;
372 	}
373 	if (redirectsFollowed >= followRedirects && !found) {
374 		close(sock);
375 		errorInt = followRedirects;	/* To be inserted in error
376                                      * string */
377 		errorSource = FETCHER_ERROR;
378 		http_errno = HF_MAXREDIRECTS;
379 		return -1;
380 	}
381 	/*
382 	 * Parse out about how big the data segment is. Note that under
383 	 * current HTTP standards (1.1 and prior), the Content-Length field
384 	 * is not guaranteed to be accurate or even present. I just use it
385 	 * here so I can allocate a ballpark amount of memory.
386 	 *
387 	 * Note that some servers use different capitalization
388 	 */
389 	charIndex = strstr(headerBuf, "Content-Length:");
390 	if (charIndex == NULL)
391 		charIndex = strstr(headerBuf, "Content-length:");
392 
393 	if (charIndex != NULL) {
394 		ret = sscanf(charIndex + strlen("content-length: "), "%d",
395 			     &contentLength);
396 		if (ret < 1) {
397 			close(sock);
398 			errorSource = FETCHER_ERROR;
399 			http_errno = HF_CONTENTLEN;
400 			return -1;
401 		}
402 	}
403 	/* Allocate enough memory to hold the page */
404 	if (contentLength == -1)
405 		contentLength = DEFAULT_PAGE_BUF_SIZE;
406 
407 	pageBuf = (char*)malloc(contentLength);
408 	if (pageBuf == NULL) {
409 		close(sock);
410 		errorSource = ERRNO;
411 		return -1;
412 	}
413 	/* Begin reading the body of the file */
414 	while (ret > 0) {
415 		FD_ZERO(&rfds);
416 		FD_SET(sock, &rfds);
417 		tv.tv_sec = timeout;
418 		tv.tv_usec = 0;
419 
420 		if (timeout >= 0)
421 			selectRet = select(sock + 1, &rfds, NULL, NULL, &tv);
422 		else		/* No timeout, can block indefinately */
423 			selectRet = select(sock + 1, &rfds, NULL, NULL, NULL);
424 
425 		if (selectRet == 0) {
426 			errorSource = FETCHER_ERROR;
427 			http_errno = HF_DATATIMEOUT;
428 			errorInt = timeout;
429 			close(sock);
430 			free(pageBuf);
431 			return -1;
432 		} else if (selectRet == -1) {
433 			close(sock);
434 			free(pageBuf);
435 			errorSource = ERRNO;
436 			return -1;
437 		}
438 		ret = int(read(sock, pageBuf + bytesRead, contentLength));
439 		if (ret == -1) {
440 			close(sock);
441 			free(pageBuf);
442 			errorSource = ERRNO;
443 			return -1;
444 		}
445 		bytesRead += ret;
446 
447 		if (ret > 0) {
448 			/*
449 			 * To be tolerant of inaccurate Content-Length
450 			 * fields, we'll allocate another read-sized chunk to
451 			 * make sure we have enough room.
452 			 */
453 			tmp = (char*)realloc(pageBuf, bytesRead + contentLength);
454 			if (tmp == NULL) {
455 				close(sock);
456 				free(pageBuf);
457 				errorSource = ERRNO;
458 				return -1;
459 			}
460 			pageBuf = tmp;
461 		}
462 	}
463 
464 	/*
465 	 * The download buffer is too large.  Trim off the safety padding.
466 	 * Note that we add one NULL byte to the end of the data, as it may
467 	 * not already be NULL terminated and we can't be sure what type of
468 	 * data it is or what the caller will do with it.
469 	 */
470 	tmp = (char*)realloc(pageBuf, bytesRead + 1);
471 	/*
472 	 * tmp shouldn't be null, since we're _shrinking_ the buffer, and if
473 	 * it DID fail, we could go on with the too-large buffer, but
474 	 * something would DEFINATELY be wrong, so we'll just give an error
475 	 * message
476 	 */
477 	if (tmp == NULL) {
478 		close(sock);
479 		free(pageBuf);
480 		errorSource = ERRNO;
481 		return -1;
482 	}
483 	pageBuf = tmp;
484 	pageBuf[bytesRead] = '\0';	/* NULL terminate the data */
485 
486 	if (fileBuf == NULL)        /* They just wanted us to "hit" the url */
487 		free(pageBuf);
488 	else
489 		*fileBuf = pageBuf;
490 
491 	close(sock);
492 	return bytesRead;
493 }
494 
495 /*
496  * Changes the User Agent.  Returns 0 on success, -1 on error.
497  */
http_setUserAgent(const char * newAgent)498 int http_setUserAgent(const char* newAgent)
499 {
500 	static int freeOldAgent = 0;	/* Indicates previous
501                                      * malloc's */
502 	char* tmp;
503 
504 	if (newAgent == NULL) {
505 		if (freeOldAgent)
506 			free(userAgent);
507 		userAgent = NULL;
508 		hideUserAgent = 1;
509 	} else {
510 		tmp = (char*)malloc(strlen(newAgent) + 1);
511 		if (tmp == NULL) {
512 			errorSource = ERRNO;
513 			return -1;
514 		}
515 		if (freeOldAgent)
516 			free(userAgent);
517 		userAgent = tmp;
518 		strcpy(userAgent, newAgent);
519 		freeOldAgent = 1;
520 		hideUserAgent = 0;
521 	}
522 
523 	return 0;
524 }
525 
526 /*
527  * Changes the Referer.  Returns 0 on success, -1 on error
528  */
http_setReferer(const char * newReferer)529 int http_setReferer(const char* newReferer)
530 {
531 	static int freeOldReferer = 0;	/* Indicated previous
532                                     * malloc's */
533 	char* tmp;
534 
535 	if (newReferer == NULL) {
536 		if (freeOldReferer)
537 			free(referer);
538 		referer = NULL;
539 		hideReferer = 1;
540 	} else {
541 		tmp = (char*)malloc(strlen(newReferer) + 1);
542 		if (tmp == NULL) {
543 			errorSource = ERRNO;
544 			return -1;
545 		}
546 		if (freeOldReferer)
547 			free(referer);
548 		referer = tmp;
549 		strcpy(referer, newReferer);
550 		freeOldReferer = 1;
551 		hideReferer = 0;
552 	}
553 
554 	return 0;
555 }
556 
557 /*
558  * Changes the amount of time that HTTP Fetcher will wait for data before
559  * timing out on reads
560  */
http_setTimeout(int seconds)561 void http_setTimeout(int seconds)
562 {
563 	timeout = seconds;
564 }
565 
566 /*
567  * Changes the number of HTTP redirects HTTP Fetcher will automatically
568  * follow.  If a request returns a status code of 3XX and contains a
569  * "Location:" field, the library will transparently follow up to the
570  * specified number of redirects.  With this implementation (which is just a
571  * stopgap, really) the caller won't be aware of any redirection and will
572  * assume the returned document came from the original URL. To disable
573  * redirects, pass a 0.  To follow unlimited redirects (probably unwise),
574  * pass a negative value.  The default is to follow 3 redirects.
575  */
http_setRedirects(int redirects)576 void http_setRedirects(int redirects)
577 {
578 	followRedirects = redirects;
579 }
580 
581 /*
582  * Puts the filename portion of the url into 'filename'. Returns: 0 on
583  * success 1 when url contains no end filename (i.e., 'www.foo.com/'), and
584  * **filename should not be assumed to be valid -1 on error
585  */
http_parseFilename(const char * url,char ** filename)586 int http_parseFilename(const char* url, char** filename)
587 {
588 	char* ptr;
589 
590 	if (url == NULL) {
591 		errorSource = FETCHER_ERROR;
592 		http_errno = HF_NULLURL;
593 		return -1;
594 	}
595 	ptr = (char*)strrchr(url, '/');
596 	if (ptr == NULL)
597 		/* Root level request, apparently */
598 		return 1;
599 
600 	ptr++;
601 	if (*ptr == '\0')
602 		return 1;
603 
604 	*filename = (char*)malloc(strlen(ptr) + 1);
605 	if (*filename == NULL) {
606 		errorSource = ERRNO;
607 		return -1;
608 	}
609 	strcpy(*filename, ptr);
610 
611 	return 0;
612 }
613 
614 /*
615  * Depending on the source of error, calls either perror() or prints an HTTP
616  * Fetcher error message to stdout
617  */
http_perror(const char * string)618 void http_perror(const char* string)
619 {
620 	if (errorSource == ERRNO)
621 		perror(string);
622 	else if (errorSource == H_ERRNO)
623 		herror(string);
624 	else if (errorSource == FETCHER_ERROR) {
625 		const char* stringIndex;
626 
627 		if (strstr(http_errlist[http_errno], "%d") == NULL) {
628 			fputs(string, stderr);
629 			fputs(": ", stderr);
630 			fputs(http_errlist[http_errno], stderr);
631 			fputs("\n", stderr);
632 		} else {
633 			/*
634 			 * The error string has a %d in it, we need to insert
635 			 * errorInt
636 			 */
637 			stringIndex = http_errlist[http_errno];
638 			while (*stringIndex != '%') {	/* Print up to the %d */
639 				fputc(*stringIndex, stderr);
640 				stringIndex++;
641 			}
642 			fprintf(stderr, "%d", errorInt);	/* Print the number */
643 			stringIndex += 2;	/* Skip past the %d */
644 			while (*stringIndex != 0) {	/* Print up to the end
645 							 * NULL */
646 				fputc(*stringIndex, stderr);
647 				stringIndex++;
648 			}
649 			fputs("\n", stderr);
650 		}
651 	}
652 }
653 
654 /*
655  * Returns a pointer to the current error description message. The message
656  * pointed to is only good until the next call to http_strerror(), so if you
657  * need to hold on to the message for a while you should make a copy of it
658  */
http_strerror()659 const char* http_strerror()
660 {
661 	if (errorSource == ERRNO)
662 		return strerror(errno);
663 	else if (errorSource == H_ERRNO)
664 #ifdef HAVE_HSTRERROR
665 		return hstrerror(h_errno);
666 #else
667 		return http_errlist[HF_HERROR];
668 #endif
669 	else if (errorSource == FETCHER_ERROR) {
670 		if (strstr(http_errlist[http_errno], "%d") == NULL)
671 			return http_errlist[http_errno];
672 		else {
673 			/*
674 			 * The error string has a %d in it, we need to insert
675 			 * errorInt. convertedError[128] has been declared
676 			 * for that purpose
677 			 */
678 			char* stringIndex, *originalError;
679 
680 			originalError = (char*)http_errlist[http_errno];
681 			convertedError[0] = 0;	/* Start off with NULL */
682 			stringIndex = strstr(originalError, "%d");
683 			strncat(convertedError, originalError,	/* Copy up to %d */
684             labs(long(stringIndex - originalError)));
685 			sprintf(&convertedError[strlen(convertedError)], "%d", errorInt);
686 			stringIndex += 2;	/* Skip past the %d */
687 			strcat(convertedError, stringIndex);
688 
689 			return convertedError;
690 		}
691 	}
692 	return http_errlist[HF_METAERROR];	/* Should NEVER happen */
693 }
694 
695 /*
696  * Reads the metadata of an HTTP response. Perhaps a little inefficient, as
697  * it reads 1 byte at a time, but I don't think it's that much of a loss
698  * (most headers aren't HUGE). Returns: # of bytes read on success, or -1 on
699  * error
700  */
_http_read_header(int sock,char * headerPtr)701 int _http_read_header(int sock, char* headerPtr)
702 {
703 	fd_set rfds;
704 	struct timeval tv;
705 	int	bytesRead = 0, newlines = 0, ret, selectRet;
706 
707 	while (newlines != 2 && bytesRead != HEADER_BUF_SIZE) {
708 		FD_ZERO(&rfds);
709 		FD_SET(sock, &rfds);
710 		tv.tv_sec = timeout;
711 		tv.tv_usec = 0;
712 
713 		if (timeout >= 0)
714 			selectRet = select(sock + 1, &rfds, NULL, NULL, &tv);
715 		else		/* No timeout, can block indefinately */
716 			selectRet = select(sock + 1, &rfds, NULL, NULL, NULL);
717 
718 		if (selectRet == 0) {
719 			errorSource = FETCHER_ERROR;
720 			http_errno = HF_HEADTIMEOUT;
721 			errorInt = timeout;
722 			return -1;
723 		} else if (selectRet == -1) {
724 			errorSource = ERRNO;
725 			return -1;
726 		}
727 		ret = int(read(sock, headerPtr, 1));
728 		if (ret == -1) {
729 			errorSource = ERRNO;
730 			return -1;
731 		}
732 		bytesRead++;
733 
734 		if (*headerPtr == '\r') {	/* Ignore CR */
735 			/*
736 			 * Basically do nothing special, just don't set
737 			 * newlines to 0
738 			 */
739 			headerPtr++;
740 			continue;
741 		} else if (*headerPtr == '\n')	/* LF is the separator */
742 			newlines++;
743 		else
744 			newlines = 0;
745 
746 		headerPtr++;
747 	}
748 
749 	headerPtr -= 3;		/* Snip the trailing LF's */
750 	*headerPtr = '\0';
751 	return bytesRead;
752 }
753 
754 /*
755  * Opens a TCP socket and returns the descriptor Returns: socket descriptor,
756  * or -1 on error
757  */
makeSocket(char * host)758 int makeSocket(char* host)
759 {
760 	int	sock;               /* Socket descriptor */
761 	struct sockaddr_in sa;	/* Socket address */
762 	struct hostent* hp;     /* Host entity */
763 	int	ret;
764 	int	port;
765 	char* p;
766 
767 	/* Check for port number specified in URL */
768 	p = strchr(host, ':');
769 	if (p) {
770         port = std::atoi(p + 1);
771 		*p = '\0';
772 	} else
773 		port = PORT_NUMBER;
774 
775 	hp = gethostbyname(host);
776 	if (hp == NULL) {
777 		errorSource = H_ERRNO;
778 		return -1;
779 	}
780 	/* Copy host address from hostent to (server) socket address */
781 	memcpy((char*)&sa.sin_addr, (char*)hp->h_addr, hp->h_length);
782 	sa.sin_family = hp->h_addrtype;	/* Set service sin_family to PF_INET */
783 	sa.sin_port = htons(port);	/* Put portnum into sockaddr */
784 
785 	sock = (int)socket(hp->h_addrtype, SOCK_STREAM, 0);
786 	if (sock == -1) {
787 		errorSource = ERRNO;
788 		return -1;
789 	}
790 	ret = connect(sock, (struct sockaddr*)&sa, sizeof(sa));
791 	if (ret == -1) {
792 		errorSource = ERRNO;
793 		return -1;
794 	}
795 	return sock;
796 }
797 
798 /*
799  * Determines if the given NULL-terminated buffer is large enough to
800  * concatenate the given number of characters.  If not, it attempts to grow
801  * the buffer to fit. Returns: 0 on success, or -1 on error (original buffer
802  * is unchanged).
803  */
_checkBufSize(char ** buf,int * bufsize,int more)804 int _checkBufSize(char** buf, int* bufsize, int more)
805 {
806 	char* tmp;
807 	int	roomLeft = (int)*bufsize - (int)(strlen(*buf) + 1);
808 	if (roomLeft > more)
809 		return 0;
810 	tmp = (char*)realloc(*buf, *bufsize + more + 1);
811 	if (tmp == NULL)
812 		return -1;
813 	*buf = tmp;
814 	*bufsize += more + 1;
815 	return 0;
816 }
817