1 
2  /***************************************************************************/
3 
4 /*
5  * Portions Copyright (c) 1999 GMRS Software GmbH
6  * Carl-von-Linde-Str. 38, D-85716 Unterschleissheim, http://www.gmrs.de
7  * All rights reserved.
8  *
9  * Author: Arno Unkrig <arno@unkrig.de>
10  */
11 
12 /* This program is free software; you can redistribute it and/or modify
13  * it under the terms of the GNU General Public License as published by
14  * the Free Software Foundation; either version 2 of the License, or
15  * (at your option) any later version.
16  *
17  * This program is distributed in the hope that it will be useful,
18  * but WITHOUT ANY WARRANTY; without even the implied warranty of
19  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20  * GNU General Public License in the file COPYING for more details.
21  */
22 
23  /***************************************************************************/
24 
25 /*
26  * Changes to version 1.2.2 were made by Martin Bayer <mbayer@zedat.fu-berlin.de>
27  * Dates and reasons of modifications:
28  * Thu Oct  4 21:49:09 CEST 2001: ported to g++ 3.0
29  * Sun Apr  7 11:59:03 CEST 2002: Handle URLs with missing node
30  * Mon Jul 22 13:53:02 CEST 2002: Made finaly reading from STDIN work.
31  */
32 
33  /***************************************************************************/
34 
35 
36 #include <stdlib.h>
37 #include <stdio.h>
38 #include <string.h>
39 #include <ctype.h>
40 #include <errno.h>
41 #include <sys/types.h>
42 #include <sys/socket.h>
43 #include <fcntl.h>
44 #include <unistd.h>
45 #ifdef SYS_POLL_MISSING /* { */
46 struct pollfd {
47   int fd;           /* file descriptor */
48   short events;     /* requested events */
49   short revents;    /* returned events */
50 };
51 extern "C" int poll(struct pollfd *ufds, unsigned int nfds, int timeout);
52 #define POLLIN      0x0001    /* There is data to read */
53 #define POLLPRI     0x0002    /* There is urgent data to read */
54 #define POLLOUT     0x0004    /* Writing now will not block */
55 #define POLLERR     0x0008    /* Error condition */
56 #define POLLHUP     0x0010    /* Hung up */
57 #define POLLNVAL    0x0020    /* Invalid request: fd not open */
58 #else /* } { */
59 #include <sys/poll.h>
60 #endif /* } */
61 #include <netinet/in.h>
62 #include <arpa/inet.h>
63 #include <netdb.h>
64 #include <signal.h>
65 
66 #include "urlistream.h"
67 
68 using std::ios;
69 
70 /* ------------------------------------------------------------------------- */
71 
72 /*
73  * Compile with
74  *
75  *     CC -DTESTING urlistream.C -o urlcat
76  */
77 
78 /* ------------------------------------------------------------------------- */
79 
80 void
open(const char * url,int timeout)81 urlistream::open(
82   const char *url,
83   int        timeout /* = default_timeout */  // Milliseconds
84 )
85 {
86   if (is_open()) close();
87 
88   /*
89    * "http:" <address> URL?
90    */
91   if (!memcmp(url, "http:", 5)) {
92     http_open(url + 5, timeout);
93     return;
94   }
95 
96   /*
97    * "file:" <file-name> URL?
98    */
99   if (!memcmp(url, "file:", 5)) {
100     file_open(url + 5);
101     return;
102   }
103 
104   /*
105    * is the URL a bare file name?
106    */
107   if (strchr(url, ':') == NULL) {
108     file_open(url);
109     return;
110   }
111 
112   open_error_ = "Unknown protocol (only \"file:\" and \"http:\" allowed)";
113 }
114 
115 void
open(const string & url,int timeout)116 urlistream::open(
117   const string &url,
118   int          timeout /* = default_timeout */  // Milliseconds
119 )
120 {
121   open(url.c_str(), timeout);
122 }
123 
124 /* ------------------------------------------------------------------------- */
125 
126 void
http_open(const char * address,int timeout)127 urlistream::http_open(
128   const char *address,    // The URL portion after "http:"
129   int        timeout      // Milliseconds
130 )
131 {
132 
133   /*
134    * Break up the HTTP address:
135    *
136    *   "//" <host> [ ":" <port> ] <node>
137    *
138    * A missing node is interpreted as node "/" - Arno
139    */
140   char host_name[100];
141   char port_name[100];
142   char node_name[1000];
143   {
144     const char *p = address;
145     char       *q;
146 
147     if (*p++ != '/' || *p++ != '/') {
148       open_error_ = "HTTP address does not begin with \"//\"";
149       return;
150     }
151 
152     for (q = host_name; *p && *p != ':' && *p != '/'; ++p) {
153       if (q < host_name + sizeof(host_name) - 1) *q++ = *p;
154     }
155     *q = '\0';
156 
157     if (*p == ':') {
158       ++p;
159       for (q = port_name; *p && *p != '/'; ++p) {
160 	if (q < port_name + sizeof(port_name) - 1) *q++ = *p;
161       }
162       *q = '\0';
163     } else {
164       strcpy(port_name, "80");
165     }
166 
167     for (q = node_name; *p && *p != '#'; ++p) {
168       if (q < node_name + sizeof(node_name) - 1) *q++ = *p;
169     }
170     *q = '\0';
171     if (!node_name[0]) {
172       strcpy(node_name, "/");
173     }
174   }
175 
176   struct sockaddr_in soc_address;
177   soc_address.sin_family = AF_INET;
178 
179   /*
180    * Parse the host name.
181    */
182   {
183     const char *p;
184     int dot_count = 0;
185     for (p = host_name; *p; ++p) {
186       if (*p == '.') { ++dot_count; } else if (!isdigit(*p)) break;
187     }
188     if (*p == '\0' && dot_count == 3) {
189       soc_address.sin_addr.s_addr = inet_addr(host_name);
190     } else {
191       struct hostent *h = gethostbyname(host_name);
192       if (
193         h == 0 ||
194         h->h_addrtype != AF_INET ||
195         h->h_length != sizeof(struct in_addr)
196       ) {
197 	open_error_ = "Could not resolve host name";
198 	return;
199       }
200       soc_address.sin_addr = *(struct in_addr *) h->h_addr;
201     }
202   }
203 
204   /*
205    * Parse the port name.
206    */
207   if (isdigit(port_name[0])) {
208     soc_address.sin_port = htons(atoi(port_name));
209   } else {
210     struct servent *s = getservbyname(port_name, 0);
211     soc_address.sin_port = htons(s ? s->s_port : 80);
212   }
213 
214   /*
215    * Strip the "#anchor" suffix from the node name.
216    */
217   { char *p = strchr(node_name, '#'); if (p) *p = '\0'; }
218 
219   /*
220    * On-the-fly definition of "FileHandle" which closes a UNIX file descriptor
221    * on destruction.
222    */
223   class FileHandle {
224   public:
225     FileHandle() : fd(-1) {}
226     ~FileHandle() { if (fd != -1) ::close(fd); }
227     operator int() { return fd; }
228     int operator=(int x) { return (fd = x); }
229   private:
230     int fd;
231   } fd;
232 
233   fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
234   if (fd == -1) {
235     open_error_ = strerror(errno);
236     return;
237   }
238 
239   /*
240    * Make the socket non-blocking, so the "connect()" can be canceled. This
241    * means that when we issue the "connect()" we should NOT have to wait for
242    * the accept on the other end.
243    */
244   if (fcntl(fd, F_SETFL, O_NONBLOCK) == -1) {
245     open_error_ = strerror(errno);
246     return;
247   }
248 
249   /*
250    * Issue the "connect()". Since the server can't do an instantaneous
251    * "accept()" and we are non-blocking, this will almost certainly return
252    * a negative status.
253    */
254   if (connect(
255     fd,
256     (struct sockaddr *) &soc_address, sizeof(soc_address)
257   ) == -1) {
258     if (errno != EINPROGRESS && errno != EAGAIN) {
259       open_error_ = strerror(errno);
260       return;
261     }
262 
263     for (;;) {
264       struct pollfd p;
265       int           res;
266 
267       p.fd     = fd;
268       p.events = POLLOUT;
269       res = poll(&p, 1, timeout);
270 
271       /*
272        * Interrupted "poll()"?
273        */
274       if (res == -1 && errno == EINTR) continue;
275 
276       /*
277        * Check for errors.
278        */
279       if (res == -1 && errno != EALREADY) {
280         open_error_ = strerror(errno);
281 	return;
282       }
283 
284       /*
285        * Check for timeout.
286        */
287       if (res == 0) {
288 	open_error_ = "\"connect()\" timed out";
289 	return;
290       }
291 
292       /*
293        * Extra check here for connection success, if we try to
294        * connect again, and get EISCONN, it means we have a
295        * successful connection.
296        *
297        * Notice: On SINIX 5.43 B2000, the "poll()" returns "1" when the
298        * timeout occurs (!?). When we call "poll()", we encounter a SIGPIPE
299        * (!?). If we ignore it, "poll()" returns EINVAL (22) (!?).
300        */
301       void (*sigpipe_handler)(int) = signal(SIGPIPE, SIG_IGN); /* { */
302       res = connect(fd, (struct sockaddr *) &soc_address, sizeof(soc_address));
303       (void) signal(SIGPIPE, sigpipe_handler); /* } */
304 
305       if (res == -1 && errno == EISCONN) break;
306       if (res == -1 && errno != EALREADY) {
307         open_error_ = strerror(errno);
308 	return;
309       }
310     }
311   }
312 
313   /*
314    * Make the socket blocking again on good "connect()".
315    */
316   if (fcntl(fd, F_SETFL, 0) == -1) {
317     open_error_ = strerror(errno);
318     return;
319   }
320 
321   /*
322    * Issue the HTTP request.
323    *
324    * Notice: "GET xyz" means "return the document without a header".
325    */
326   char command[4 + (sizeof(node_name) - 1) + 4 + 1];
327   sprintf(command, "GET %s\r\n\r\n", node_name);
328   ssize_t command_length = strlen(command);
329 
330   if (::write(fd, command, command_length) != command_length) {
331     open_error_ = "Error sending HTTP GET request";
332     return;
333   }
334 
335   /*
336    * Attach the file descriptor to the ifstream.
337    */
338   fd_ = fd;
339 
340   fd = -1;    // ...so that it is not implicitly "::close()"'d.
341 }
342 
343 /* ------------------------------------------------------------------------- */
344 
345 void
file_open(const char * file_name)346 urlistream::file_open(const char *file_name)
347 {
348   fd_ = !strcmp(file_name, "-") ? ::dup(0) : ::open(file_name, O_RDONLY);
349   open_error_ = strerror(errno);
350 }
351 
352 /* ------------------------------------------------------------------------- */
353 
354 const char *
open_error()355 urlistream::open_error() const
356 {
357   return open_error_ ? open_error_ : "No error";
358 }
359 
360 /* ------------------------------------------------------------------------- */
361 
362 int
get()363 urlistream::get()
364 {
365   char ch;
366   int ret = ::read(fd_, &ch, 1);
367   return (ret > 0 ? ch : -1);
368 }
369 
370 #ifdef TESTING /* { */
371 
372 int
main(int argc,char ** argv)373 main(int argc, char **argv)
374 {
375   if (argc < 2) {
376     cerr << "Usage:  urlcat <url> [ ... ]" << endl;
377     exit(1);
378   }
379 
380   for (int i = 1; i < argc; ++i) {
381     urlistream uis(argv[i]);
382     if (!uis.is_open()) {
383       cerr << "Opening \"" << argv[i] << "\": " << uis.open_error() << endl;
384       exit(1);
385     }
386 
387     for (;;) {
388       int c = uis.get();
389       if (c == EOF) break;
390       cout << (char) c;
391     }
392   }
393 
394   return 0;
395 }
396 
397 #endif /* } */
398 
399 /* ------------------------------------------------------------------------- */
400