1 // This file is part of BOINC.
2 // http://boinc.berkeley.edu
3 // Copyright (C) 2009 University of California
4 //
5 // BOINC is free software; you can redistribute it and/or modify it
6 // under the terms of the GNU Lesser General Public License
7 // as published by the Free Software Foundation,
8 // either version 3 of the License, or (at your option) any later version.
9 //
10 // BOINC is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
13 // See the GNU Lesser General Public License for more details.
14 //
15 // You should have received a copy of the GNU Lesser General Public License
16 // along with BOINC. If not, see <http://www.gnu.org/licenses/>.
17
18 #if defined(_WIN32) && !defined(__STDWX_H__)
19 #include "boinc_win.h"
20 #elif defined(_WIN32) && defined(__STDWX_H__)
21 #include "stdwx.h"
22 #else
23 #include "config.h"
24 #include <string>
25 #include <stdio.h>
26 #include <string.h>
27 #include <ctype.h>
28 #include <stdlib.h>
29 #endif
30
31 #include "str_util.h"
32 #include "str_replace.h"
33
34 #include "url.h"
35
36 using std::string;
37
38 // Break a URL down into its protocol, server, port and file components
39 // URL format:
40 // [{http|https|socks}://][user[:passwd]@]host.dom.dom[:port][/dir/file]
41 //
parse_url(const char * url,PARSED_URL & purl)42 void parse_url(const char* url, PARSED_URL& purl) {
43 char* p, *q, *buf;
44 char _buf[256];
45
46 // strip off the protocol if present
47 //
48 if (strncmp(url, "http://", 7) == 0) {
49 safe_strcpy(_buf, url+7);
50 purl.protocol = URL_PROTOCOL_HTTP;
51 } else if (strncmp(url, "https://", 8) == 0) {
52 safe_strcpy(_buf, url+8);
53 purl.protocol = URL_PROTOCOL_HTTPS;
54 } else if (strncmp(url, "socks://", 8) == 0) {
55 safe_strcpy(_buf, url+8);
56 purl.protocol = URL_PROTOCOL_SOCKS;
57 } else {
58 safe_strcpy(_buf, url);
59 purl.protocol = URL_PROTOCOL_UNKNOWN;
60 }
61 buf = _buf;
62
63 // parse user name and password
64 //
65 safe_strcpy(purl.user, "");
66 safe_strcpy(purl.passwd, "");
67 p = strchr(buf, '@');
68 if (p) {
69 *p = 0;
70 q = strchr(buf, ':');
71 if (q) {
72 *q = 0;
73 safe_strcpy(purl.user, buf);
74 safe_strcpy(purl.passwd, q+1);
75 } else {
76 safe_strcpy(purl.user, buf);
77 }
78 buf = p+1;
79 }
80
81 // parse and strip off file part if present
82 //
83 p = strchr(buf, '/');
84 if (p) {
85 safe_strcpy(purl.file, p+1);
86 *p = 0;
87 } else {
88 safe_strcpy(purl.file, "");
89 }
90
91 // parse and strip off port if present
92 //
93 p = strchr(buf,':');
94 if (p) {
95 purl.port = atol(p+1);
96 *p = 0;
97 } else {
98 // CMC note: if they didn't pass in a port #,
99 // but the url starts with https://, assume they
100 // want a secure port (HTTPS, port 443)
101 purl.port = (purl.protocol == URL_PROTOCOL_HTTPS) ? 443 : 80;
102 }
103
104 // what remains is the host
105 //
106 safe_strcpy(purl.host, buf);
107 }
108
x2c(char * what)109 static char x2c(char *what) {
110 register char digit;
111
112 digit = (what[0] >= 'A' ? ((what[0] & 0xdf) - 'A')+10 : (what[0] - '0'));
113 digit *= 16;
114 digit += (what[1] >= 'A' ? ((what[1] & 0xdf) - 'A')+10 : (what[1] - '0'));
115 return(digit);
116 }
117
c2x(char * what)118 void c2x(char *what) {
119 char buf[3];
120 char num = atoi(what);
121 char d1 = num / 16;
122 char d2 = num % 16;
123 int abase1, abase2;
124 if (d1 < 10) abase1 = 48;
125 else abase1 = 55;
126 if (d2 < 10) abase2 = 48;
127 else abase2 = 55;
128 buf[0] = d1+abase1;
129 buf[1] = d2+abase2;
130 buf[2] = 0;
131
132 strcpy(what, buf);
133 }
134
135 // The following functions do "URL-escaping", i.e. escaping GET arguments
136 // to be passed in a URL
137
138 // size not really needed since unescaping can only shrink
139 //
unescape_url(char * url,int url_size)140 void unescape_url(char *url, int url_size) {
141 int x,y;
142
143 for (x=0,y=0; url[y] && (x<url_size);++x,++y) {
144 if ((url[x] = url[y]) == '%') {
145 url[x] = x2c(&url[y+1]);
146 y+=2;
147 }
148 }
149 url[x] = '\0';
150 }
151
152 // unescape_url needs to be able to handle potentially hostile URLs
153 //
unescape_url(string & url)154 void unescape_url(string& url) {
155 char buf[1024];
156 strlcpy(buf, url.c_str(), sizeof(buf));
157 unescape_url(buf, sizeof(buf));
158 url = buf;
159 }
160
escape_url(const char * in,char * out,int out_size)161 void escape_url(const char *in, char*out, int out_size) {
162 char buf[256];
163 int x, y;
164 for (x=0, y=0; in[x] && (y<out_size-3); ++x) {
165 if (isalnum(in[x])) {
166 out[y] = in[x];
167 ++y;
168 } else {
169 out[y] = '%';
170 ++y;
171 out[y] = 0;
172 snprintf(buf, sizeof(buf), "%d", (char)in[x]);
173 c2x(buf);
174 strlcat(out, buf, out_size);
175 y += 2;
176 }
177 }
178 out[y] = 0;
179 }
180
181 // escape_url needs to be able to handle potentially hostile URLs
182 //
escape_url(string & url)183 void escape_url(string& url) {
184 char buf[1024];
185 escape_url(url.c_str(), buf, sizeof(buf));
186 url = buf;
187 }
188
189 // Escape a URL for the project directory, cutting off the "http://",
190 // converting everthing other than alphanumbers, ., - and _ to "_".
191 //
escape_url_readable(char * in,char * out)192 void escape_url_readable(char *in, char* out) {
193 int x, y;
194 char *temp;
195
196 temp = strstr(in,"://");
197 if (temp) {
198 in = temp + strlen("://");
199 }
200 for (x=0, y=0; in[x]; ++x) {
201 if (isalnum(in[x]) || in[x]=='.' || in[x]=='-' || in[x]=='_') {
202 out[y] = in[x];
203 ++y;
204 } else {
205 out[y] = '_';
206 ++y;
207 }
208 }
209 out[y] = 0;
210 }
211
212
213 // Canonicalize a master url.
214 // - Convert the first part of a URL (before the "://") to http://,
215 // or prepend it
216 // - Remove double slashes in the rest
217 // - Add a trailing slash if necessary
218 //
canonicalize_master_url(char * url,int len)219 void canonicalize_master_url(char* url, int len) {
220 char buf[1024];
221 size_t n;
222 bool bSSL = false; // keep track if they sent in https://
223
224 char *p = strstr(url, "://");
225 if (p) {
226 bSSL = (bool) (p == url + 5);
227 strlcpy(buf, p+3, sizeof(buf));
228 } else {
229 strlcpy(buf, url, sizeof(buf));
230 }
231 while (1) {
232 p = strstr(buf, "//");
233 if (!p) break;
234 strcpy_overlap(p, p+1);
235 }
236 n = strlen(buf);
237 if (buf[n-1] != '/' && (n<sizeof(buf)-2)) {
238 safe_strcat(buf, "/");
239 }
240 snprintf(url, len, "http%s://%s", (bSSL ? "s" : ""), buf);
241 url[len-1] = 0;
242 }
243
canonicalize_master_url(string & url)244 void canonicalize_master_url(string& url) {
245 char buf[1024];
246 safe_strcpy(buf, url.c_str());
247 canonicalize_master_url(buf, sizeof(buf));
248 url = buf;
249 }
250
251 // is the string a valid master URL, in canonical form?
252 //
valid_master_url(char * buf)253 bool valid_master_url(char* buf) {
254 char* p, *q;
255 size_t n;
256 bool bSSL = false;
257
258 p = strstr(buf, "http://");
259 if (p != buf) {
260 // allow https
261 p = strstr(buf, "https://");
262 if (p == buf) {
263 bSSL = true;
264 } else {
265 return false; // no http or https, it's bad!
266 }
267 }
268 q = p+strlen(bSSL ? "https://" : "http://");
269 p = strstr(q, ".");
270 if (!p) return false;
271 if (p == q) return false;
272 q = p+1;
273 p = strstr(q, "/");
274 if (!p) return false;
275 if (p == q) return false;
276 n = strlen(buf);
277 if (buf[n-1] != '/') return false;
278 return true;
279 }
280
escape_project_url(char * in,char * out)281 void escape_project_url(char *in, char* out) {
282 escape_url_readable(in, out);
283 char& last = out[strlen(out)-1];
284 // remove trailing _
285 if (last == '_') {
286 last = '\0';
287 }
288 }
289
is_https(const char * url)290 bool is_https(const char* url) {
291 return (strncmp(url, "https://", 8) == 0);
292 }
293