1 // This file is part of BOINC.
2 // http://boinc.berkeley.edu
3 // Copyright (C) 2009 University of California
4 //
5 // BOINC is free software; you can redistribute it and/or modify it
6 // under the terms of the GNU Lesser General Public License
7 // as published by the Free Software Foundation,
8 // either version 3 of the License, or (at your option) any later version.
9 //
10 // BOINC is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
13 // See the GNU Lesser General Public License for more details.
14 //
15 // You should have received a copy of the GNU Lesser General Public License
16 // along with BOINC.  If not, see <http://www.gnu.org/licenses/>.
17 
18 #if   defined(_WIN32) && !defined(__STDWX_H__)
19 #include "boinc_win.h"
20 #elif defined(_WIN32) && defined(__STDWX_H__)
21 #include "stdwx.h"
22 #else
23 #include "config.h"
24 #include <string>
25 #include <stdio.h>
26 #include <string.h>
27 #include <ctype.h>
28 #include <stdlib.h>
29 #endif
30 
31 #include "str_util.h"
32 #include "str_replace.h"
33 
34 #include "url.h"
35 
36 using std::string;
37 
38 // Break a URL down into its protocol, server, port and file components
39 // URL format:
40 // [{http|https|socks}://][user[:passwd]@]host.dom.dom[:port][/dir/file]
41 //
parse_url(const char * url,PARSED_URL & purl)42 void parse_url(const char* url, PARSED_URL& purl) {
43     char* p, *q, *buf;
44     char _buf[256];
45 
46     // strip off the protocol if present
47     //
48     if (strncmp(url, "http://", 7) == 0) {
49         safe_strcpy(_buf, url+7);
50         purl.protocol = URL_PROTOCOL_HTTP;
51     } else if (strncmp(url, "https://", 8) == 0) {
52         safe_strcpy(_buf, url+8);
53         purl.protocol = URL_PROTOCOL_HTTPS;
54     } else if (strncmp(url, "socks://", 8) == 0) {
55         safe_strcpy(_buf, url+8);
56         purl.protocol = URL_PROTOCOL_SOCKS;
57     } else {
58         safe_strcpy(_buf, url);
59         purl.protocol = URL_PROTOCOL_UNKNOWN;
60     }
61     buf = _buf;
62 
63     // parse user name and password
64     //
65     safe_strcpy(purl.user, "");
66     safe_strcpy(purl.passwd, "");
67     p = strchr(buf, '@');
68     if (p) {
69         *p = 0;
70         q = strchr(buf, ':');
71         if (q) {
72             *q = 0;
73             safe_strcpy(purl.user, buf);
74             safe_strcpy(purl.passwd, q+1);
75         } else {
76             safe_strcpy(purl.user, buf);
77         }
78         buf = p+1;
79     }
80 
81     // parse and strip off file part if present
82     //
83     p = strchr(buf, '/');
84     if (p) {
85         safe_strcpy(purl.file, p+1);
86         *p = 0;
87     } else {
88         safe_strcpy(purl.file, "");
89     }
90 
91     // parse and strip off port if present
92     //
93     p = strchr(buf,':');
94     if (p) {
95         purl.port = atol(p+1);
96         *p = 0;
97     } else {
98         // CMC note:  if they didn't pass in a port #,
99         //    but the url starts with https://, assume they
100         //    want a secure port (HTTPS, port 443)
101         purl.port = (purl.protocol == URL_PROTOCOL_HTTPS) ? 443 : 80;
102     }
103 
104     // what remains is the host
105     //
106     safe_strcpy(purl.host, buf);
107 }
108 
x2c(char * what)109 static char x2c(char *what) {
110     register char digit;
111 
112     digit = (what[0] >= 'A' ? ((what[0] & 0xdf) - 'A')+10 : (what[0] - '0'));
113     digit *= 16;
114     digit += (what[1] >= 'A' ? ((what[1] & 0xdf) - 'A')+10 : (what[1] - '0'));
115     return(digit);
116 }
117 
c2x(char * what)118 void c2x(char *what) {
119     char buf[3];
120     char num = atoi(what);
121     char d1 = num / 16;
122     char d2 = num % 16;
123     int abase1, abase2;
124     if (d1 < 10) abase1 = 48;
125     else abase1 = 55;
126     if (d2 < 10) abase2 = 48;
127     else abase2 = 55;
128     buf[0] = d1+abase1;
129     buf[1] = d2+abase2;
130     buf[2] = 0;
131 
132     strcpy(what, buf);
133 }
134 
135 // The following functions do "URL-escaping", i.e. escaping GET arguments
136 // to be passed in a URL
137 
138 // size not really needed since unescaping can only shrink
139 //
unescape_url(char * url,int url_size)140 void unescape_url(char *url, int url_size) {
141     int x,y;
142 
143     for (x=0,y=0; url[y] && (x<url_size);++x,++y) {
144         if ((url[x] = url[y]) == '%') {
145             url[x] = x2c(&url[y+1]);
146             y+=2;
147         }
148     }
149     url[x] = '\0';
150 }
151 
152 // unescape_url needs to be able to handle potentially hostile URLs
153 //
unescape_url(string & url)154 void unescape_url(string& url) {
155     char buf[1024];
156     strlcpy(buf, url.c_str(), sizeof(buf));
157     unescape_url(buf, sizeof(buf));
158     url = buf;
159 }
160 
escape_url(const char * in,char * out,int out_size)161 void escape_url(const char *in, char*out, int out_size) {
162     char buf[256];
163     int x, y;
164     for (x=0, y=0; in[x] && (y<out_size-3); ++x) {
165         if (isalnum(in[x])) {
166             out[y] = in[x];
167             ++y;
168         } else {
169             out[y] = '%';
170             ++y;
171             out[y] = 0;
172             snprintf(buf, sizeof(buf), "%d", (char)in[x]);
173             c2x(buf);
174             strlcat(out, buf, out_size);
175             y += 2;
176         }
177     }
178     out[y] = 0;
179 }
180 
181 // escape_url needs to be able to handle potentially hostile URLs
182 //
escape_url(string & url)183 void escape_url(string& url) {
184     char buf[1024];
185     escape_url(url.c_str(), buf, sizeof(buf));
186     url = buf;
187 }
188 
189 // Escape a URL for the project directory, cutting off the "http://",
190 // converting everthing other than alphanumbers, ., - and _ to "_".
191 //
escape_url_readable(char * in,char * out)192 void escape_url_readable(char *in, char* out) {
193     int x, y;
194     char *temp;
195 
196     temp = strstr(in,"://");
197     if (temp) {
198         in = temp + strlen("://");
199     }
200     for (x=0, y=0; in[x]; ++x) {
201         if (isalnum(in[x]) || in[x]=='.' || in[x]=='-' || in[x]=='_') {
202             out[y] = in[x];
203             ++y;
204         } else {
205             out[y] = '_';
206             ++y;
207         }
208     }
209     out[y] = 0;
210 }
211 
212 
213 // Canonicalize a master url.
214 //   - Convert the first part of a URL (before the "://") to http://,
215 // or prepend it
216 //   - Remove double slashes in the rest
217 //   - Add a trailing slash if necessary
218 //
canonicalize_master_url(char * url,int len)219 void canonicalize_master_url(char* url, int len) {
220     char buf[1024];
221     size_t n;
222     bool bSSL = false; // keep track if they sent in https://
223 
224     char *p = strstr(url, "://");
225     if (p) {
226         bSSL = (bool) (p == url + 5);
227         strlcpy(buf, p+3, sizeof(buf));
228     } else {
229         strlcpy(buf, url, sizeof(buf));
230     }
231     while (1) {
232         p = strstr(buf, "//");
233         if (!p) break;
234         strcpy_overlap(p, p+1);
235     }
236     n = strlen(buf);
237     if (buf[n-1] != '/' && (n<sizeof(buf)-2)) {
238         safe_strcat(buf, "/");
239     }
240     snprintf(url, len, "http%s://%s", (bSSL ? "s" : ""), buf);
241     url[len-1] = 0;
242 }
243 
canonicalize_master_url(string & url)244 void canonicalize_master_url(string& url) {
245     char buf[1024];
246     safe_strcpy(buf, url.c_str());
247     canonicalize_master_url(buf, sizeof(buf));
248     url = buf;
249 }
250 
251 // is the string a valid master URL, in canonical form?
252 //
valid_master_url(char * buf)253 bool valid_master_url(char* buf) {
254     char* p, *q;
255     size_t n;
256     bool bSSL = false;
257 
258     p = strstr(buf, "http://");
259     if (p != buf) {
260         // allow https
261         p = strstr(buf, "https://");
262         if (p == buf) {
263             bSSL = true;
264         } else {
265             return false; // no http or https, it's bad!
266         }
267     }
268     q = p+strlen(bSSL ? "https://" : "http://");
269     p = strstr(q, ".");
270     if (!p) return false;
271     if (p == q) return false;
272     q = p+1;
273     p = strstr(q, "/");
274     if (!p) return false;
275     if (p == q) return false;
276     n = strlen(buf);
277     if (buf[n-1] != '/') return false;
278     return true;
279 }
280 
escape_project_url(char * in,char * out)281 void escape_project_url(char *in, char* out) {
282     escape_url_readable(in, out);
283     char& last = out[strlen(out)-1];
284     // remove trailing _
285     if (last == '_') {
286         last = '\0';
287     }
288 }
289 
is_https(const char * url)290 bool is_https(const char* url) {
291     return (strncmp(url, "https://", 8) == 0);
292 }
293