xref: /netbsd/external/bsd/ipf/dist/lib/load_http.c (revision f52ace7a)
1 /*	$NetBSD: load_http.c,v 1.3 2012/07/22 14:27:36 darrenr Exp $	*/
2 
3 /*
4  * Copyright (C) 2012 by Darren Reed.
5  *
6  * See the IPFILTER.LICENCE file for details on licencing.
7  *
8  * Id: load_http.c,v 1.1.1.2 2012/07/22 13:44:39 darrenr
9  */
10 
11 #include "ipf.h"
12 #include <ctype.h>
13 
14 /*
15  * Because the URL can be included twice into the buffer, once as the
16  * full path for the "GET" and once as the "Host:", the buffer it is
17  * put in needs to be larger than 512*2 to make room for the supporting
18  * text. Why not just use snprintf and truncate? The warning about the
19  * URL being too long tells you something is wrong and does not fetch
20  * any data - just truncating the URL (with snprintf, etc) and sending
21  * that to the server is allowing an unknown and unintentioned action
22  * to happen.
23  */
24 #define	MAX_URL_LEN	512
25 #define	LOAD_BUFSIZE	(MAX_URL_LEN * 2 + 128)
26 
27 /*
28  * Format expected is one addres per line, at the start of each line.
29  */
30 alist_t *
load_http(char * url)31 load_http(char *url)
32 {
33 	int fd, len, left, port, endhdr, removed, linenum = 0;
34 	char *s, *t, *u, buffer[LOAD_BUFSIZE], *myurl;
35 	alist_t *a, *rtop, *rbot;
36 	int rem;
37 
38 	/*
39 	 * More than this would just be absurd.
40 	 */
41 	if (strlen(url) > MAX_URL_LEN) {
42 		fprintf(stderr, "load_http has a URL > %d bytes?!\n",
43 			MAX_URL_LEN);
44 		return NULL;
45 	}
46 
47 	fd = -1;
48 	rtop = NULL;
49 	rbot = NULL;
50 
51 	myurl = strdup(url);
52 	if (myurl == NULL)
53 		goto done;
54 
55 	rem = sizeof(buffer);
56 	left = snprintf(buffer, rem, "GET %s HTTP/1.0\r\n", url);
57 	if (left < 0 || left > rem)
58 		goto done;
59 	rem -= left;
60 
61 	s = myurl + 7;			/* http:// */
62 	t = strchr(s, '/');
63 	if (t == NULL) {
64 		fprintf(stderr, "load_http has a malformed URL '%s'\n", url);
65 		goto done;
66 	}
67 	*t++ = '\0';
68 
69 	/*
70 	 * 10 is the length of 'Host: \r\n\r\n' below.
71 	 */
72 	if (strlen(s) + strlen(buffer) + 10 > sizeof(buffer)) {
73 		fprintf(stderr, "load_http has a malformed URL '%s'\n", url);
74 		free(myurl);
75 		return NULL;
76 	}
77 
78 	u = strchr(s, '@');
79 	if (u != NULL)
80 		s = u + 1;		/* AUTH */
81 
82 	left = snprintf(buffer + left, rem, "Host: %s\r\n\r\n", s);
83 	if (left < 0 || left > rem)
84 		goto done;
85 	rem -= left;
86 
87 	u = strchr(s, ':');
88 	if (u != NULL) {
89 		*u++ = '\0';
90 		port = atoi(u);
91 		if (port < 0 || port > 65535)
92 			goto done;
93 	} else {
94 		port = 80;
95 	}
96 
97 
98 	fd = connecttcp(s, port);
99 	if (fd == -1)
100 		goto done;
101 
102 	len = strlen(buffer);
103 	if (write(fd, buffer, len) != len)
104 		goto done;
105 
106 	s = buffer;
107 	endhdr = 0;
108 	left = sizeof(buffer) - 1;
109 
110 	while ((len = read(fd, s, left)) > 0) {
111 		s[len] = '\0';
112 		left -= len;
113 		s += len;
114 
115 		if (endhdr >= 0) {
116 			if (endhdr == 0) {
117 				t = strchr(buffer, ' ');
118 				if (t == NULL)
119 					continue;
120 				t++;
121 				if (*t != '2')
122 					break;
123 			}
124 
125 			u = buffer;
126 			while ((t = strchr(u, '\r')) != NULL) {
127 				if (t == u) {
128 					if (*(t + 1) == '\n') {
129 						u = t + 2;
130 						endhdr = -1;
131 						break;
132 					} else
133 						t++;
134 				} else if (*(t + 1) == '\n') {
135 					endhdr++;
136 					u = t + 2;
137 				} else
138 					u = t + 1;
139 			}
140 			if (endhdr >= 0)
141 				continue;
142 			removed = (u - buffer) + 1;
143 			memmove(buffer, u, (sizeof(buffer) - left) - removed);
144 			s -= removed;
145 			left += removed;
146 		}
147 
148 		do {
149 			t = strchr(buffer, '\n');
150 			if (t == NULL)
151 				break;
152 
153 			linenum++;
154 			*t = '\0';
155 
156 			for (u = buffer; isdigit((unsigned char)*u) ||
157 			    (*u == '.'); u++)
158 				continue;
159 			if (*u == '/') {
160 				char *slash;
161 
162 				slash = u;
163 				u++;
164 				while (isdigit((unsigned char)*u))
165 					u++;
166 				if (!isspace((unsigned char)*u) && *u)
167 					u = slash;
168 			}
169 
170 			/*
171 			 * Remove comment and continue to the next line if
172 			 * the comment is at the start of the line.
173 			 */
174 			u = strchr(buffer, '#');
175 			if (u != NULL) {
176 				*u = '\0';
177 				if (u == buffer)
178 					continue;
179 			}
180 
181 			/*
182 			 * Trim off tailing white spaces, will include \r
183 			 */
184 			for (u = t - 1; (u >= buffer) && ISSPACE(*u); u--)
185 				*u = '\0';
186 
187 			a = alist_new(AF_UNSPEC, buffer);
188 			if (a != NULL) {
189 				if (rbot != NULL)
190 					rbot->al_next = a;
191 				else
192 					rtop = a;
193 				rbot = a;
194 			} else {
195 				fprintf(stderr,
196 					"%s:%d unrecognised content:%s\n",
197 					url, linenum, buffer);
198 			}
199 
200 			t++;
201 			removed = t - buffer;
202 			memmove(buffer, t, sizeof(buffer) - left - removed);
203 			s -= removed;
204 			left += removed;
205 
206 		} while (1);
207 	}
208 
209 done:
210 	if (myurl != NULL)
211 		free(myurl);
212 	if (fd != -1)
213 		close(fd);
214 	return rtop;
215 }
216