1 /*************************************************************************/
2 /* */
3 /* Language Technologies Institute */
4 /* Carnegie Mellon University */
5 /* Copyright (c) 2011 */
6 /* All Rights Reserved. */
7 /* */
8 /* Permission is hereby granted, free of charge, to use and distribute */
9 /* this software and its documentation without restriction, including */
10 /* without limitation the rights to use, copy, modify, merge, publish, */
11 /* distribute, sublicense, and/or sell copies of this work, and to */
12 /* permit persons to whom this work is furnished to do so, subject to */
13 /* the following conditions: */
14 /* 1. The code must retain the above copyright notice, this list of */
15 /* conditions and the following disclaimer. */
16 /* 2. Any modifications must be clearly marked as such. */
17 /* 3. Original authors' names are not deleted. */
18 /* 4. The authors' names are not used to endorse or promote products */
19 /* derived from this software without specific prior written */
20 /* permission. */
21 /* */
22 /* CARNEGIE MELLON UNIVERSITY AND THE CONTRIBUTORS TO THIS WORK */
23 /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24 /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25 /* SHALL CARNEGIE MELLON UNIVERSITY NOR THE CONTRIBUTORS BE LIABLE */
26 /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27 /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28 /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29 /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30 /* THIS SOFTWARE. */
31 /* */
32 /*************************************************************************/
33 /* Author: Alan W Black (awb@cs.cmu.edu) */
34 /* Date: November 2011 */
35 /*************************************************************************/
36 /* */
37 /* Support to access (some forms of) url */
38 /* http: and file: */
39 /* Should support libcurl if available -- but a simple form if not */
40 /* */
41 /* Only support http: if sockets are available */
42 /* */
43 /*************************************************************************/
44 #include "cst_math.h"
45 #include "cst_file.h"
46 #include "cst_string.h"
47 #include "cst_tokenstream.h"
48 #include "cst_socket.h"
49
50 #ifndef CST_NO_SOCKETS
51 #ifndef _MSC_VER
52 #include <stdlib.h>
53 #include <unistd.h>
54 #else
55 #include <io.h>
56 #include <WinSock.h>
57 #endif
58 #endif
59
cst_urlp(const char * url)60 int cst_urlp(const char *url)
61 {
62 /* Return 1 if url is a url, 0 otherwise */
63 /* This is decided by the initial substring being "http:" or "file:" */
64 if ((cst_strlen(url) > 4) &&
65 (cst_streqn("http:",url,5) ||
66 cst_streqn("file:",url,5)))
67 return TRUE;
68 else
69 return FALSE;
70 }
71
cst_url_open(const char * url)72 cst_file cst_url_open(const char *url)
73 {
74 /* Always opens it for reading */
75 cst_tokenstream *urlts;
76 const cst_string *protocol;
77 int port;
78 cst_string *host;
79 int fd;
80 char *url_request;
81 char *path;
82 cst_file ofd;
83 int state,n;
84 char c;
85
86 urlts = ts_open_string(url, "", ":/", "", "");
87
88 protocol = ts_get(urlts);
89 if (cst_streq(protocol,"http"))
90 {
91 #ifdef CST_NO_SOCKETS
92 ts_close(urlts);
93 return NULL;
94 #else
95 if (!cst_streq(ts_get(urlts),":") ||
96 !cst_streq(ts_get(urlts),"/") ||
97 !cst_streq(ts_get(urlts),"/"))
98 {
99 ts_close(urlts);
100 return NULL;
101 }
102 host = cst_strdup(ts_get(urlts));
103 if (cst_streq(ts_get(urlts),":"))
104 port = (int)cst_atof(ts_get(urlts));
105 else
106 port = 80;
107
108 /* Open port to web server */
109 fd = cst_socket_open(host,port);
110 if (fd < 0)
111 {
112 cst_free(host);
113 ts_close(urlts);
114 return NULL;
115 }
116
117 url_request = cst_alloc(char,cst_strlen(url)+17);
118 cst_sprintf(url_request,"GET %s HTTP/1.2\n\n",url);
119 n = write(fd,url_request,cst_strlen(url_request));
120 cst_free(url_request);
121
122 /* Skip http header -- until \n\n */
123 state=0;
124 while (state != 4)
125 {
126 n=read(fd,&c,1);
127 if (n == 0)
128 { /* eof or link gone down */
129 cst_free(host);
130 ts_close(urlts);
131 return NULL;
132 }
133 if ((state == 0) && (c == '\r'))
134 state=1;
135 else if ((state == 1) && (c == '\n'))
136 state=2;
137 else if ((state == 2) && (c == '\r'))
138 state=3;
139 else if ((state == 3) && (c == '\n'))
140 state=4;
141 /* Not sure you can get no CRs in the stream */
142 else if ((state == 0) && (c == '\n'))
143 state=2;
144 else if ((state == 2) && (c == '\n'))
145 state=4;
146 else
147 state = 0;
148 }
149
150 ofd = fdopen(fd,"rb");
151
152 ts_close(urlts);
153 cst_free(host);
154
155 return ofd;
156 #endif
157 }
158 else if (cst_streq(protocol,"file"))
159 {
160 if (!cst_streq(ts_get(urlts),":") ||
161 !cst_streq(ts_get(urlts),"/") ||
162 !cst_streq(ts_get(urlts),"/"))
163 {
164 ts_close(urlts);
165 return NULL;
166 }
167 path = cst_strdup(&urlts->string_buffer[urlts->file_pos-1]);
168 /* printf("awb_debug fileurl %s\n",path); */
169
170 ofd = cst_fopen(path,CST_OPEN_READ);
171
172 ts_close(urlts);
173 cst_free(path);
174
175 return ofd;
176 }
177 else
178 { /* Unsupported protocol */
179 return NULL;
180 }
181 }
182
183