xref: /reactos/dll/win32/winhttp/url.c (revision 682f85ad)
1 /*
2  * Copyright 2008 Hans Leidekker for CodeWeavers
3  *
4  * This library is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Lesser General Public
6  * License as published by the Free Software Foundation; either
7  * version 2.1 of the License, or (at your option) any later version.
8  *
9  * This library is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12  * Lesser General Public License for more details.
13  *
14  * You should have received a copy of the GNU Lesser General Public
15  * License along with this library; if not, write to the Free Software
16  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
17  */
18 
19 #include "config.h"
20 #include "ws2tcpip.h"
21 #include <stdarg.h>
22 
23 #include "windef.h"
24 #include "winbase.h"
25 #include "winreg.h"
26 #include "winhttp.h"
27 #include "shlwapi.h"
28 
29 #include "wine/debug.h"
30 #include "winhttp_private.h"
31 
32 WINE_DEFAULT_DEBUG_CHANNEL(winhttp);
33 
34 static const WCHAR scheme_http[] = {'h','t','t','p',0};
35 static const WCHAR scheme_https[] = {'h','t','t','p','s',0};
36 
37 struct url_component
38 {
39     WCHAR **str;
40     DWORD  *len;
41 };
42 
43 static DWORD set_component( struct url_component *comp, WCHAR *value, DWORD len, DWORD flags, BOOL *overflow )
44 {
45     if (*comp->str && !*comp->len) return ERROR_INVALID_PARAMETER;
46     if (!*comp->len) return ERROR_SUCCESS;
47     if (!*comp->str)
48     {
49         if (len && *comp->len && (flags & (ICU_DECODE|ICU_ESCAPE))) return ERROR_INVALID_PARAMETER;
50         *comp->str = value;
51         *comp->len = len;
52     }
53     else
54     {
55         if (len >= *comp->len)
56         {
57             *comp->len = len + 1;
58             *overflow = TRUE;
59             return ERROR_SUCCESS;
60         }
61         memcpy( *comp->str, value, len * sizeof(WCHAR) );
62         (*comp->str)[len] = 0;
63         *comp->len = len;
64     }
65     return ERROR_SUCCESS;
66 }
67 
68 static WCHAR *decode_url( LPCWSTR url, DWORD *len )
69 {
70     const WCHAR *p = url;
71     WCHAR hex[3], *q, *ret;
72 
73     if (!(ret = heap_alloc( *len * sizeof(WCHAR) ))) return NULL;
74     q = ret;
75     while (*len > 0)
76     {
77         if (p[0] == '%' && isxdigitW( p[1] ) && isxdigitW( p[2] ))
78         {
79             hex[0] = p[1];
80             hex[1] = p[2];
81             hex[2] = 0;
82             *q++ = strtolW( hex, NULL, 16 );
83             p += 3;
84             *len -= 3;
85         }
86         else
87         {
88             *q++ = *p++;
89             *len -= 1;
90         }
91     }
92     *len = q - ret;
93     return ret;
94 }
95 
96 static inline BOOL need_escape( WCHAR ch )
97 {
98     static const WCHAR escapes[] = {' ','"','#','%','<','>','[','\\',']','^','`','{','|','}','~',0};
99     const WCHAR *p = escapes;
100 
101     if (ch <= 31 || ch >= 127) return TRUE;
102     while (*p)
103     {
104         if (ch == *p++) return TRUE;
105     }
106     return FALSE;
107 }
108 
109 static BOOL escape_string( const WCHAR *src, DWORD src_len, WCHAR *dst, DWORD *dst_len )
110 {
111     static const WCHAR hex[] = {'0','1','2','3','4','5','6','7','8','9','A','B','C','D','E','F'};
112     WCHAR *p = dst;
113     DWORD i;
114 
115     *dst_len = src_len;
116     for (i = 0; i < src_len; i++)
117     {
118         if (src[i] > 0xff) return FALSE;
119         if (need_escape( src[i] ))
120         {
121             if (dst)
122             {
123                 p[0] = '%';
124                 p[1] = hex[(src[i] >> 4) & 0xf];
125                 p[2] = hex[src[i] & 0xf];
126                 p += 3;
127             }
128             *dst_len += 2;
129         }
130         else if (dst) *p++ = src[i];
131     }
132 
133     if (dst) dst[*dst_len] = 0;
134     return TRUE;
135 }
136 
137 static DWORD escape_url( const WCHAR *url, DWORD *len, WCHAR **ret )
138 {
139     const WCHAR *p;
140     DWORD len_base, len_path;
141 
142     if ((p = strrchrW( url, '/' )))
143     {
144         len_base = p - url;
145         if (!escape_string( p, *len - len_base, NULL, &len_path )) return ERROR_INVALID_PARAMETER;
146     }
147     else
148     {
149         len_base = *len;
150         len_path = 0;
151     }
152 
153     if (!(*ret = heap_alloc( (len_base + len_path + 1) * sizeof(WCHAR) ))) return ERROR_OUTOFMEMORY;
154     memcpy( *ret, url, len_base * sizeof(WCHAR) );
155 
156     if (p) escape_string( p, *len - (p - url), *ret + len_base, &len_path );
157     (*ret)[len_base + len_path] = 0;
158 
159     *len = len_base + len_path;
160     return ERROR_SUCCESS;
161 }
162 
163 static DWORD parse_port( const WCHAR *str, DWORD len, INTERNET_PORT *ret )
164 {
165     const WCHAR *p = str;
166     DWORD port = 0;
167     while (len && isdigitW( *p ))
168     {
169         if ((port = port * 10 + *p - '0') > 65535) return ERROR_WINHTTP_INVALID_URL;
170         p++; len--;
171     }
172     *ret = port;
173     return ERROR_SUCCESS;
174 }
175 
176 /***********************************************************************
177  *          WinHttpCrackUrl (winhttp.@)
178  */
179 BOOL WINAPI WinHttpCrackUrl( LPCWSTR url, DWORD len, DWORD flags, LPURL_COMPONENTSW uc )
180 {
181     WCHAR *p, *q, *r, *url_decoded = NULL, *url_escaped = NULL;
182     INTERNET_SCHEME scheme_number = 0;
183     struct url_component scheme, username, password, hostname, path, extra;
184     BOOL overflow = FALSE;
185     DWORD err;
186 
187     TRACE("%s, %d, %x, %p\n", debugstr_wn(url, len), len, flags, uc);
188 
189     if (!url || !uc || uc->dwStructSize != sizeof(*uc))
190     {
191         SetLastError( ERROR_INVALID_PARAMETER );
192         return FALSE;
193     }
194     if (!len) len = strlenW( url );
195 
196     if (flags & ICU_ESCAPE)
197     {
198         if ((err = escape_url( url, &len, &url_escaped )))
199         {
200             SetLastError( err );
201             return FALSE;
202         }
203         url = url_escaped;
204     }
205     else if (flags & ICU_DECODE)
206     {
207         if (!(url_decoded = decode_url( url, &len )))
208         {
209             SetLastError( ERROR_OUTOFMEMORY );
210             return FALSE;
211         }
212         url = url_decoded;
213     }
214     if (!(p = strchrW( url, ':' )))
215     {
216         SetLastError( ERROR_WINHTTP_UNRECOGNIZED_SCHEME );
217         return FALSE;
218     }
219     if (p - url == 4 && !strncmpiW( url, scheme_http, 4 )) scheme_number = INTERNET_SCHEME_HTTP;
220     else if (p - url == 5 && !strncmpiW( url, scheme_https, 5 )) scheme_number = INTERNET_SCHEME_HTTPS;
221     else
222     {
223         err = ERROR_WINHTTP_UNRECOGNIZED_SCHEME;
224         goto exit;
225     }
226 
227     scheme.str = &uc->lpszScheme;
228     scheme.len = &uc->dwSchemeLength;
229 
230     if ((err = set_component( &scheme, (WCHAR *)url, p - url, flags, &overflow ))) goto exit;
231 
232     p++; /* skip ':' */
233     if (!p[0] || p[0] != '/' || p[1] != '/')
234     {
235         err = ERROR_WINHTTP_INVALID_URL;
236         goto exit;
237     }
238     p += 2;
239     if (!p[0])
240     {
241         err = ERROR_WINHTTP_INVALID_URL;
242         goto exit;
243     }
244 
245     username.str = &uc->lpszUserName;
246     username.len = &uc->dwUserNameLength;
247 
248     password.str = &uc->lpszPassword;
249     password.len = &uc->dwPasswordLength;
250 
251     if ((q = memchrW( p, '@', len - (p - url) )) && !(memchrW( p, '/', q - p )))
252     {
253 
254         if ((r = memchrW( p, ':', q - p )))
255         {
256             if ((err = set_component( &username, p, r - p, flags, &overflow ))) goto exit;
257             r++;
258             if ((err = set_component( &password, r, q - r, flags, &overflow ))) goto exit;
259         }
260         else
261         {
262             if ((err = set_component( &username, p, q - p, flags, &overflow ))) goto exit;
263             if ((err = set_component( &password, NULL, 0, flags, &overflow ))) goto exit;
264         }
265         p = q + 1;
266     }
267     else
268     {
269         if ((err = set_component( &username, NULL, 0, flags, &overflow ))) goto exit;
270         if ((err = set_component( &password, NULL, 0, flags, &overflow ))) goto exit;
271     }
272 
273     hostname.str = &uc->lpszHostName;
274     hostname.len = &uc->dwHostNameLength;
275 
276     path.str = &uc->lpszUrlPath;
277     path.len = &uc->dwUrlPathLength;
278 
279     extra.str = &uc->lpszExtraInfo;
280     extra.len = &uc->dwExtraInfoLength;
281 
282     if ((q = memchrW( p, '/', len - (p - url) )))
283     {
284         if ((r = memchrW( p, ':', q - p )))
285         {
286             if ((err = set_component( &hostname, p, r - p, flags, &overflow ))) goto exit;
287             r++;
288             if ((err = parse_port( r, q - r, &uc->nPort ))) goto exit;
289         }
290         else
291         {
292             if ((err = set_component( &hostname, p, q - p, flags, &overflow ))) goto exit;
293             if (scheme_number == INTERNET_SCHEME_HTTP) uc->nPort = INTERNET_DEFAULT_HTTP_PORT;
294             if (scheme_number == INTERNET_SCHEME_HTTPS) uc->nPort = INTERNET_DEFAULT_HTTPS_PORT;
295         }
296 
297         if ((r = memchrW( q, '?', len - (q - url) )))
298         {
299             if (*extra.len)
300             {
301                 if ((err = set_component( &path, q, r - q, flags, &overflow ))) goto exit;
302                 if ((err = set_component( &extra, r, len - (r - url), flags, &overflow ))) goto exit;
303             }
304             else if ((err = set_component( &path, q, len - (q - url), flags, &overflow ))) goto exit;
305         }
306         else
307         {
308             if ((err = set_component( &path, q, len - (q - url), flags, &overflow ))) goto exit;
309             if ((err = set_component( &extra, (WCHAR *)url + len, 0, flags, &overflow ))) goto exit;
310         }
311     }
312     else
313     {
314         if ((r = memchrW( p, ':', len - (p - url) )))
315         {
316             if ((err = set_component( &hostname, p, r - p, flags, &overflow ))) goto exit;
317             r++;
318             if ((err = parse_port( r, len - (r - url), &uc->nPort ))) goto exit;
319         }
320         else
321         {
322             if ((err = set_component( &hostname, p, len - (p - url), flags, &overflow ))) goto exit;
323             if (scheme_number == INTERNET_SCHEME_HTTP) uc->nPort = INTERNET_DEFAULT_HTTP_PORT;
324             if (scheme_number == INTERNET_SCHEME_HTTPS) uc->nPort = INTERNET_DEFAULT_HTTPS_PORT;
325         }
326         if ((err = set_component( &path, (WCHAR *)url + len, 0, flags, &overflow ))) goto exit;
327         if ((err = set_component( &extra, (WCHAR *)url + len, 0, flags, &overflow ))) goto exit;
328     }
329 
330     TRACE("scheme(%s) host(%s) port(%d) path(%s) extra(%s)\n", debugstr_wn(*scheme.str, *scheme.len),
331           debugstr_wn(*hostname.str, *hostname.len ), uc->nPort, debugstr_wn(*path.str, *path.len),
332           debugstr_wn(*extra.str, *extra.len));
333 
334 exit:
335     if (!err)
336     {
337         if (overflow) err = ERROR_INSUFFICIENT_BUFFER;
338         uc->nScheme = scheme_number;
339     }
340     heap_free( url_decoded );
341     heap_free( url_escaped );
342     SetLastError( err );
343     return !err;
344 }
345 
346 static INTERNET_SCHEME get_scheme( const WCHAR *scheme, DWORD len )
347 {
348     if (!strncmpW( scheme, scheme_http, len )) return INTERNET_SCHEME_HTTP;
349     if (!strncmpW( scheme, scheme_https, len )) return INTERNET_SCHEME_HTTPS;
350     return 0;
351 }
352 
353 static const WCHAR *get_scheme_string( INTERNET_SCHEME scheme )
354 {
355     if (scheme == INTERNET_SCHEME_HTTP) return scheme_http;
356     if (scheme == INTERNET_SCHEME_HTTPS) return scheme_https;
357     return NULL;
358 }
359 
360 static BOOL uses_default_port( INTERNET_SCHEME scheme, INTERNET_PORT port )
361 {
362     if ((scheme == INTERNET_SCHEME_HTTP) && (port == INTERNET_DEFAULT_HTTP_PORT)) return TRUE;
363     if ((scheme == INTERNET_SCHEME_HTTPS) && (port == INTERNET_DEFAULT_HTTPS_PORT)) return TRUE;
364     return FALSE;
365 }
366 
367 static DWORD get_comp_length( DWORD len, DWORD flags, WCHAR *comp )
368 {
369     DWORD ret;
370     unsigned int i;
371 
372     ret = len ? len : strlenW( comp );
373     if (!(flags & ICU_ESCAPE)) return ret;
374     for (i = 0; i < len; i++) if (need_escape( comp[i] )) ret += 2;
375     return ret;
376 }
377 
378 static BOOL get_url_length( URL_COMPONENTS *uc, DWORD flags, DWORD *len )
379 {
380     static const WCHAR formatW[] = {'%','u',0};
381     INTERNET_SCHEME scheme;
382 
383     *len = 0;
384     if (uc->lpszScheme)
385     {
386         DWORD scheme_len = get_comp_length( uc->dwSchemeLength, 0, uc->lpszScheme );
387         *len += scheme_len;
388         scheme = get_scheme( uc->lpszScheme, scheme_len );
389     }
390     else
391     {
392         scheme = uc->nScheme;
393         if (!scheme) scheme = INTERNET_SCHEME_HTTP;
394         *len += strlenW( get_scheme_string( scheme ) );
395     }
396     *len += 3; /* "://" */
397 
398     if (uc->lpszUserName)
399     {
400         *len += get_comp_length( uc->dwUserNameLength, 0, uc->lpszUserName );
401         *len += 1; /* "@" */
402     }
403     else
404     {
405         if (uc->lpszPassword)
406         {
407             SetLastError( ERROR_INVALID_PARAMETER );
408             return FALSE;
409         }
410     }
411     if (uc->lpszPassword)
412     {
413         *len += 1; /* ":" */
414         *len += get_comp_length( uc->dwPasswordLength, 0, uc->lpszPassword );
415     }
416     if (uc->lpszHostName)
417     {
418         *len += get_comp_length( uc->dwHostNameLength, 0, uc->lpszHostName );
419 
420         if (!uses_default_port( scheme, uc->nPort ))
421         {
422             WCHAR port[sizeof("65535")];
423 
424             *len += sprintfW( port, formatW, uc->nPort );
425             *len += 1; /* ":" */
426         }
427         if (uc->lpszUrlPath && *uc->lpszUrlPath != '/') *len += 1; /* '/' */
428     }
429     if (uc->lpszUrlPath) *len += get_comp_length( uc->dwUrlPathLength, flags, uc->lpszUrlPath );
430     if (uc->lpszExtraInfo) *len += get_comp_length( uc->dwExtraInfoLength, flags, uc->lpszExtraInfo );
431     return TRUE;
432 }
433 
434 /***********************************************************************
435  *          WinHttpCreateUrl (winhttp.@)
436  */
437 BOOL WINAPI WinHttpCreateUrl( LPURL_COMPONENTS uc, DWORD flags, LPWSTR url, LPDWORD required )
438 {
439     static const WCHAR formatW[] = {'%','u',0};
440     DWORD len, len_escaped;
441     INTERNET_SCHEME scheme;
442 
443     TRACE("%p, 0x%08x, %p, %p\n", uc, flags, url, required);
444 
445     if (!uc || uc->dwStructSize != sizeof(URL_COMPONENTS) || !required)
446     {
447         SetLastError( ERROR_INVALID_PARAMETER );
448         return FALSE;
449     }
450 
451     if (!get_url_length( uc, flags, &len )) return FALSE;
452 
453     if (*required < len)
454     {
455         *required = len + 1;
456         SetLastError( ERROR_INSUFFICIENT_BUFFER );
457         return FALSE;
458     }
459     if (!url)
460     {
461         SetLastError( ERROR_INVALID_PARAMETER );
462         return FALSE;
463     }
464 
465     url[0] = 0;
466     *required = len;
467     if (uc->lpszScheme)
468     {
469         len = get_comp_length( uc->dwSchemeLength, 0, uc->lpszScheme );
470         memcpy( url, uc->lpszScheme, len * sizeof(WCHAR) );
471         url += len;
472 
473         scheme = get_scheme( uc->lpszScheme, len );
474     }
475     else
476     {
477         const WCHAR *schemeW;
478         scheme = uc->nScheme;
479 
480         if (!scheme) scheme = INTERNET_SCHEME_HTTP;
481 
482         schemeW = get_scheme_string( scheme );
483         len = strlenW( schemeW );
484         memcpy( url, schemeW, len * sizeof(WCHAR) );
485         url += len;
486     }
487 
488     *url++ = ':';
489     *url++ = '/';
490     *url++ = '/';
491 
492     if (uc->lpszUserName)
493     {
494         len = get_comp_length( uc->dwUserNameLength, 0, uc->lpszUserName );
495         memcpy( url, uc->lpszUserName, len * sizeof(WCHAR) );
496         url += len;
497 
498         if (uc->lpszPassword)
499         {
500             *url++ = ':';
501             len = get_comp_length( uc->dwPasswordLength, 0, uc->lpszPassword );
502             memcpy( url, uc->lpszPassword, len * sizeof(WCHAR) );
503             url += len;
504         }
505         *url++ = '@';
506     }
507     if (uc->lpszHostName)
508     {
509         len = get_comp_length( uc->dwHostNameLength, 0, uc->lpszHostName );
510         memcpy( url, uc->lpszHostName, len * sizeof(WCHAR) );
511         url += len;
512 
513         if (!uses_default_port( scheme, uc->nPort ))
514         {
515             *url++ = ':';
516             url += sprintfW( url, formatW, uc->nPort );
517         }
518 
519         /* add slash between hostname and path if necessary */
520         if (uc->lpszUrlPath && *uc->lpszUrlPath != '/')
521         {
522             *url++ = '/';
523         }
524     }
525     if (uc->lpszUrlPath)
526     {
527         len = get_comp_length( uc->dwUrlPathLength, 0, uc->lpszUrlPath );
528         if (flags & ICU_ESCAPE)
529         {
530             if (!escape_string( uc->lpszUrlPath, len, url, &len_escaped ))
531             {
532                 SetLastError( ERROR_INVALID_PARAMETER );
533                 return FALSE;
534             }
535             url += len_escaped;
536         }
537         else
538         {
539             memcpy( url, uc->lpszUrlPath, len * sizeof(WCHAR) );
540             url += len;
541         }
542     }
543     if (uc->lpszExtraInfo)
544     {
545         len = get_comp_length( uc->dwExtraInfoLength, 0, uc->lpszExtraInfo );
546         if (flags & ICU_ESCAPE)
547         {
548             if (!escape_string( uc->lpszExtraInfo, len, url, &len_escaped ))
549             {
550                 SetLastError( ERROR_INVALID_PARAMETER );
551                 return FALSE;
552             }
553             url += len_escaped;
554         }
555         else
556         {
557             memcpy( url, uc->lpszExtraInfo, len * sizeof(WCHAR) );
558             url += len;
559         }
560     }
561     *url = 0;
562     SetLastError( ERROR_SUCCESS );
563     return TRUE;
564 }
565