1
2 /***************************************************************************/
3
4 /*
5 * Portions Copyright (c) 1999 GMRS Software GmbH
6 * Carl-von-Linde-Str. 38, D-85716 Unterschleissheim, http://www.gmrs.de
7 * All rights reserved.
8 *
9 * Author: Arno Unkrig <arno@unkrig.de>
10 */
11
12 /* This program is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU General Public License as published by
14 * the Free Software Foundation; either version 2 of the License, or
15 * (at your option) any later version.
16 *
17 * This program is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU General Public License in the file COPYING for more details.
21 */
22
23 /***************************************************************************/
24
25 /*
26 * Changes to version 1.2.2 were made by Martin Bayer <mbayer@zedat.fu-berlin.de>
27 * Dates and reasons of modifications:
28 * Thu Oct 4 21:49:09 CEST 2001: ported to g++ 3.0
29 * Sun Apr 7 11:59:03 CEST 2002: Handle URLs with missing node
30 * Mon Jul 22 13:53:02 CEST 2002: Made finaly reading from STDIN work.
31 */
32
33 /***************************************************************************/
34
35
36 #include <stdlib.h>
37 #include <stdio.h>
38 #include <string.h>
39 #include <ctype.h>
40 #include <errno.h>
41 #include <sys/types.h>
42 #include <sys/socket.h>
43 #include <fcntl.h>
44 #include <unistd.h>
45 #ifdef SYS_POLL_MISSING /* { */
46 struct pollfd {
47 int fd; /* file descriptor */
48 short events; /* requested events */
49 short revents; /* returned events */
50 };
51 extern "C" int poll(struct pollfd *ufds, unsigned int nfds, int timeout);
52 #define POLLIN 0x0001 /* There is data to read */
53 #define POLLPRI 0x0002 /* There is urgent data to read */
54 #define POLLOUT 0x0004 /* Writing now will not block */
55 #define POLLERR 0x0008 /* Error condition */
56 #define POLLHUP 0x0010 /* Hung up */
57 #define POLLNVAL 0x0020 /* Invalid request: fd not open */
58 #else /* } { */
59 #include <sys/poll.h>
60 #endif /* } */
61 #include <netinet/in.h>
62 #include <arpa/inet.h>
63 #include <netdb.h>
64 #include <signal.h>
65
66 #include "urlistream.h"
67
68 using std::ios;
69
70 /* ------------------------------------------------------------------------- */
71
72 /*
73 * Compile with
74 *
75 * CC -DTESTING urlistream.C -o urlcat
76 */
77
78 /* ------------------------------------------------------------------------- */
79
80 void
open(const char * url,int timeout)81 urlistream::open(
82 const char *url,
83 int timeout /* = default_timeout */ // Milliseconds
84 )
85 {
86 if (is_open()) close();
87
88 /*
89 * "http:" <address> URL?
90 */
91 if (!memcmp(url, "http:", 5)) {
92 http_open(url + 5, timeout);
93 return;
94 }
95
96 /*
97 * "file:" <file-name> URL?
98 */
99 if (!memcmp(url, "file:", 5)) {
100 file_open(url + 5);
101 return;
102 }
103
104 /*
105 * is the URL a bare file name?
106 */
107 if (strchr(url, ':') == NULL) {
108 file_open(url);
109 return;
110 }
111
112 open_error_ = "Unknown protocol (only \"file:\" and \"http:\" allowed)";
113 }
114
115 void
open(const string & url,int timeout)116 urlistream::open(
117 const string &url,
118 int timeout /* = default_timeout */ // Milliseconds
119 )
120 {
121 open(url.c_str(), timeout);
122 }
123
124 /* ------------------------------------------------------------------------- */
125
126 void
http_open(const char * address,int timeout)127 urlistream::http_open(
128 const char *address, // The URL portion after "http:"
129 int timeout // Milliseconds
130 )
131 {
132
133 /*
134 * Break up the HTTP address:
135 *
136 * "//" <host> [ ":" <port> ] <node>
137 *
138 * A missing node is interpreted as node "/" - Arno
139 */
140 char host_name[100];
141 char port_name[100];
142 char node_name[1000];
143 {
144 const char *p = address;
145 char *q;
146
147 if (*p++ != '/' || *p++ != '/') {
148 open_error_ = "HTTP address does not begin with \"//\"";
149 return;
150 }
151
152 for (q = host_name; *p && *p != ':' && *p != '/'; ++p) {
153 if (q < host_name + sizeof(host_name) - 1) *q++ = *p;
154 }
155 *q = '\0';
156
157 if (*p == ':') {
158 ++p;
159 for (q = port_name; *p && *p != '/'; ++p) {
160 if (q < port_name + sizeof(port_name) - 1) *q++ = *p;
161 }
162 *q = '\0';
163 } else {
164 strcpy(port_name, "80");
165 }
166
167 for (q = node_name; *p && *p != '#'; ++p) {
168 if (q < node_name + sizeof(node_name) - 1) *q++ = *p;
169 }
170 *q = '\0';
171 if (!node_name[0]) {
172 strcpy(node_name, "/");
173 }
174 }
175
176 struct sockaddr_in soc_address;
177 soc_address.sin_family = AF_INET;
178
179 /*
180 * Parse the host name.
181 */
182 {
183 const char *p;
184 int dot_count = 0;
185 for (p = host_name; *p; ++p) {
186 if (*p == '.') { ++dot_count; } else if (!isdigit(*p)) break;
187 }
188 if (*p == '\0' && dot_count == 3) {
189 soc_address.sin_addr.s_addr = inet_addr(host_name);
190 } else {
191 struct hostent *h = gethostbyname(host_name);
192 if (
193 h == 0 ||
194 h->h_addrtype != AF_INET ||
195 h->h_length != sizeof(struct in_addr)
196 ) {
197 open_error_ = "Could not resolve host name";
198 return;
199 }
200 soc_address.sin_addr = *(struct in_addr *) h->h_addr;
201 }
202 }
203
204 /*
205 * Parse the port name.
206 */
207 if (isdigit(port_name[0])) {
208 soc_address.sin_port = htons(atoi(port_name));
209 } else {
210 struct servent *s = getservbyname(port_name, 0);
211 soc_address.sin_port = htons(s ? s->s_port : 80);
212 }
213
214 /*
215 * Strip the "#anchor" suffix from the node name.
216 */
217 { char *p = strchr(node_name, '#'); if (p) *p = '\0'; }
218
219 /*
220 * On-the-fly definition of "FileHandle" which closes a UNIX file descriptor
221 * on destruction.
222 */
223 class FileHandle {
224 public:
225 FileHandle() : fd(-1) {}
226 ~FileHandle() { if (fd != -1) ::close(fd); }
227 operator int() { return fd; }
228 int operator=(int x) { return (fd = x); }
229 private:
230 int fd;
231 } fd;
232
233 fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
234 if (fd == -1) {
235 open_error_ = strerror(errno);
236 return;
237 }
238
239 /*
240 * Make the socket non-blocking, so the "connect()" can be canceled. This
241 * means that when we issue the "connect()" we should NOT have to wait for
242 * the accept on the other end.
243 */
244 if (fcntl(fd, F_SETFL, O_NONBLOCK) == -1) {
245 open_error_ = strerror(errno);
246 return;
247 }
248
249 /*
250 * Issue the "connect()". Since the server can't do an instantaneous
251 * "accept()" and we are non-blocking, this will almost certainly return
252 * a negative status.
253 */
254 if (connect(
255 fd,
256 (struct sockaddr *) &soc_address, sizeof(soc_address)
257 ) == -1) {
258 if (errno != EINPROGRESS && errno != EAGAIN) {
259 open_error_ = strerror(errno);
260 return;
261 }
262
263 for (;;) {
264 struct pollfd p;
265 int res;
266
267 p.fd = fd;
268 p.events = POLLOUT;
269 res = poll(&p, 1, timeout);
270
271 /*
272 * Interrupted "poll()"?
273 */
274 if (res == -1 && errno == EINTR) continue;
275
276 /*
277 * Check for errors.
278 */
279 if (res == -1 && errno != EALREADY) {
280 open_error_ = strerror(errno);
281 return;
282 }
283
284 /*
285 * Check for timeout.
286 */
287 if (res == 0) {
288 open_error_ = "\"connect()\" timed out";
289 return;
290 }
291
292 /*
293 * Extra check here for connection success, if we try to
294 * connect again, and get EISCONN, it means we have a
295 * successful connection.
296 *
297 * Notice: On SINIX 5.43 B2000, the "poll()" returns "1" when the
298 * timeout occurs (!?). When we call "poll()", we encounter a SIGPIPE
299 * (!?). If we ignore it, "poll()" returns EINVAL (22) (!?).
300 */
301 void (*sigpipe_handler)(int) = signal(SIGPIPE, SIG_IGN); /* { */
302 res = connect(fd, (struct sockaddr *) &soc_address, sizeof(soc_address));
303 (void) signal(SIGPIPE, sigpipe_handler); /* } */
304
305 if (res == -1 && errno == EISCONN) break;
306 if (res == -1 && errno != EALREADY) {
307 open_error_ = strerror(errno);
308 return;
309 }
310 }
311 }
312
313 /*
314 * Make the socket blocking again on good "connect()".
315 */
316 if (fcntl(fd, F_SETFL, 0) == -1) {
317 open_error_ = strerror(errno);
318 return;
319 }
320
321 /*
322 * Issue the HTTP request.
323 *
324 * Notice: "GET xyz" means "return the document without a header".
325 */
326 char command[4 + (sizeof(node_name) - 1) + 4 + 1];
327 sprintf(command, "GET %s\r\n\r\n", node_name);
328 ssize_t command_length = strlen(command);
329
330 if (::write(fd, command, command_length) != command_length) {
331 open_error_ = "Error sending HTTP GET request";
332 return;
333 }
334
335 /*
336 * Attach the file descriptor to the ifstream.
337 */
338 fd_ = fd;
339
340 fd = -1; // ...so that it is not implicitly "::close()"'d.
341 }
342
343 /* ------------------------------------------------------------------------- */
344
345 void
file_open(const char * file_name)346 urlistream::file_open(const char *file_name)
347 {
348 fd_ = !strcmp(file_name, "-") ? ::dup(0) : ::open(file_name, O_RDONLY);
349 open_error_ = strerror(errno);
350 }
351
352 /* ------------------------------------------------------------------------- */
353
354 const char *
open_error()355 urlistream::open_error() const
356 {
357 return open_error_ ? open_error_ : "No error";
358 }
359
360 /* ------------------------------------------------------------------------- */
361
362 int
get()363 urlistream::get()
364 {
365 char ch;
366 int ret = ::read(fd_, &ch, 1);
367 return (ret > 0 ? ch : -1);
368 }
369
370 #ifdef TESTING /* { */
371
372 int
main(int argc,char ** argv)373 main(int argc, char **argv)
374 {
375 if (argc < 2) {
376 cerr << "Usage: urlcat <url> [ ... ]" << endl;
377 exit(1);
378 }
379
380 for (int i = 1; i < argc; ++i) {
381 urlistream uis(argv[i]);
382 if (!uis.is_open()) {
383 cerr << "Opening \"" << argv[i] << "\": " << uis.open_error() << endl;
384 exit(1);
385 }
386
387 for (;;) {
388 int c = uis.get();
389 if (c == EOF) break;
390 cout << (char) c;
391 }
392 }
393
394 return 0;
395 }
396
397 #endif /* } */
398
399 /* ------------------------------------------------------------------------- */
400