1 /*************************************************************************/
2 /*                                                                       */
3 /*                  Language Technologies Institute                      */
4 /*                     Carnegie Mellon University                        */
5 /*                         Copyright (c) 2011                            */
6 /*                        All Rights Reserved.                           */
7 /*                                                                       */
8 /*  Permission is hereby granted, free of charge, to use and distribute  */
9 /*  this software and its documentation without restriction, including   */
10 /*  without limitation the rights to use, copy, modify, merge, publish,  */
11 /*  distribute, sublicense, and/or sell copies of this work, and to      */
12 /*  permit persons to whom this work is furnished to do so, subject to   */
13 /*  the following conditions:                                            */
14 /*   1. The code must retain the above copyright notice, this list of    */
15 /*      conditions and the following disclaimer.                         */
16 /*   2. Any modifications must be clearly marked as such.                */
17 /*   3. Original authors' names are not deleted.                         */
18 /*   4. The authors' names are not used to endorse or promote products   */
19 /*      derived from this software without specific prior written        */
20 /*      permission.                                                      */
21 /*                                                                       */
22 /*  CARNEGIE MELLON UNIVERSITY AND THE CONTRIBUTORS TO THIS WORK         */
23 /*  DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING      */
24 /*  ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT   */
25 /*  SHALL CARNEGIE MELLON UNIVERSITY NOR THE CONTRIBUTORS BE LIABLE      */
26 /*  FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES    */
27 /*  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN   */
28 /*  AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,          */
29 /*  ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF       */
30 /*  THIS SOFTWARE.                                                       */
31 /*                                                                       */
32 /*************************************************************************/
33 /*             Author:  Alan W Black (awb@cs.cmu.edu)                    */
34 /*               Date:  November 2011                                    */
35 /*************************************************************************/
36 /*                                                                       */
37 /*  Support to access (some forms of) url                                */
38 /*      http:  and file:                                                 */
39 /*  Should support libcurl if available -- but a simple form if not      */
40 /*                                                                       */
41 /*  Only support http: if sockets are available                          */
42 /*                                                                       */
43 /*************************************************************************/
44 #include "cst_math.h"
45 #include "cst_file.h"
46 #include "cst_string.h"
47 #include "cst_tokenstream.h"
48 #include "cst_socket.h"
49 
50 #ifndef CST_NO_SOCKETS
51 #ifndef _MSC_VER
52 #include <stdlib.h>
53 #include <unistd.h>
54 #else
55 #include <io.h>
56 #include <WinSock.h>
57 #endif
58 #endif
59 
cst_urlp(const char * url)60 int cst_urlp(const char *url)
61 {
62     /* Return 1 if url is a url, 0 otherwise */
63     /* This is decided by the initial substring being "http:" or "file:" */
64     if ((cst_strlen(url) > 4) &&
65         (cst_streqn("http:",url,5) ||
66          cst_streqn("file:",url,5)))
67         return TRUE;
68     else
69         return FALSE;
70 }
71 
cst_url_open(const char * url)72 cst_file cst_url_open(const char *url)
73 {
74     /* Always opens it for reading */
75     cst_tokenstream *urlts;
76     const cst_string *protocol;
77     int port;
78     cst_string *host;
79     int fd;
80     char *url_request;
81     char *path;
82     cst_file ofd;
83     int state,n;
84     char c;
85 
86     urlts = ts_open_string(url, "", ":/", "", "");
87 
88     protocol = ts_get(urlts);
89     if (cst_streq(protocol,"http"))
90     {
91 #ifdef CST_NO_SOCKETS
92         ts_close(urlts);
93         return NULL;
94 #else
95         if (!cst_streq(ts_get(urlts),":") ||
96             !cst_streq(ts_get(urlts),"/") ||
97             !cst_streq(ts_get(urlts),"/"))
98         {
99             ts_close(urlts);
100             return NULL;
101         }
102         host = cst_strdup(ts_get(urlts));
103         if (cst_streq(ts_get(urlts),":"))
104             port = (int)cst_atof(ts_get(urlts));
105         else
106             port = 80;
107 
108         /* Open port to web server */
109         fd = cst_socket_open(host,port);
110         if (fd < 0)
111         {
112             cst_free(host);
113             ts_close(urlts);
114             return NULL;
115         }
116 
117         url_request = cst_alloc(char,cst_strlen(url)+17);
118         cst_sprintf(url_request,"GET %s HTTP/1.2\n\n",url);
119         n = write(fd,url_request,cst_strlen(url_request));
120         cst_free(url_request);
121 
122         /* Skip http header -- until \n\n */
123         state=0;
124         while (state != 4)
125         {
126             n=read(fd,&c,1);
127             if (n == 0)
128             {   /* eof or link gone down */
129                 cst_free(host);
130                 ts_close(urlts);
131                 return NULL;
132             }
133             if ((state == 0) && (c == '\r'))
134                 state=1;
135             else if ((state == 1) && (c == '\n'))
136                 state=2;
137             else if ((state == 2) && (c == '\r'))
138                 state=3;
139             else if ((state == 3) && (c == '\n'))
140                 state=4;
141             /* Not sure you can get no CRs in the stream */
142             else if ((state == 0) && (c == '\n'))
143                 state=2;
144             else if ((state == 2) && (c == '\n'))
145                 state=4;
146             else
147                 state = 0;
148         }
149 
150         ofd = fdopen(fd,"rb");
151 
152         ts_close(urlts);
153         cst_free(host);
154 
155         return ofd;
156 #endif
157     }
158     else if (cst_streq(protocol,"file"))
159     {
160         if (!cst_streq(ts_get(urlts),":") ||
161             !cst_streq(ts_get(urlts),"/") ||
162             !cst_streq(ts_get(urlts),"/"))
163         {
164             ts_close(urlts);
165             return NULL;
166         }
167         path = cst_strdup(&urlts->string_buffer[urlts->file_pos-1]);
168         /* printf("awb_debug fileurl %s\n",path); */
169 
170         ofd = cst_fopen(path,CST_OPEN_READ);
171 
172         ts_close(urlts);
173         cst_free(path);
174 
175         return ofd;
176     }
177     else
178     {   /* Unsupported protocol */
179         return NULL;
180     }
181 }
182 
183