1 /*
2  * Copyright (c) 2012 Tim Ruehsen
3  * Copyright (c) 2015-2021 Free Software Foundation, Inc.
4  *
5  * This file is part of libwget.
6  *
7  * Libwget is free software: you can redistribute it and/or modify
8  * it under the terms of the GNU Lesser General Public License as published by
9  * the Free Software Foundation, either version 3 of the License, or
10  * (at your option) any later version.
11  *
12  * Libwget is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public License
18  * along with libwget.  If not, see <https://www.gnu.org/licenses/>.
19  *
20  *
21  * Cookie parsing routines
22  *
23  * Changelog
24  * 23.10.2012  Tim Ruehsen  created
25  * 14.08.2019  Tim Ruehsen  split out from cookie.c
26  *
27  * see https://tools.ietf.org/html/rfc6265
28  *
29  */
30 
31 #include <config.h>
32 
33 #include <stdio.h>
34 #include <stdlib.h>
35 #include <string.h>
36 #include <limits.h>
37 #include <ctype.h>
38 #include <time.h>
39 
40 #include <c-ctype.h>
41 
42 #include <wget.h>
43 #include "private.h"
44 #include "cookie.h"
45 
cookie_domain_match(const char * domain,const char * host)46 bool cookie_domain_match(const char *domain, const char *host)
47 {
48 	size_t domain_length, host_length;
49 	const char *p;
50 
51 	debug_printf("domain_match(%s,%s)", domain, host);
52 
53 	if (!strcmp(domain, host))
54 		return true; // an exact match
55 
56 	domain_length = strlen(domain);
57 	host_length = strlen(host);
58 
59 	if (domain_length >= host_length)
60 		return false; // host is too short
61 
62 	p = host + host_length - domain_length;
63 	if (!strcmp(p, domain) && p[-1] == '.')
64 		return true;
65 
66 	return false;
67 }
68 
cookie_path_match(const char * cookie_path,const char * request_path)69 bool cookie_path_match(const char *cookie_path, const char *request_path)
70 {
71 	const char *last_slash;
72 	size_t cookie_path_length, iri_path_length;
73 	bool cookie_path_slash = false;
74 
75 	if (*cookie_path == '/') {
76 		cookie_path++;
77 		cookie_path_slash = true;
78 	}
79 
80 	if (request_path && *request_path == '/')
81 		request_path++;
82 
83 	debug_printf("path_match(/%s,/%s)\n", cookie_path, request_path ? request_path : "");
84 
85 	// algorithm as described in RFC 6265 5.1.4
86 
87 //	if (!request_path || *request_path != '/' || !(last_slash = strrchr(request_path + 1, '/'))) {
88 //		request_path = "/";
89 //		iri_path_length = 1;
90 	if (!request_path || !(last_slash = strrchr(request_path, '/'))) {
91 		request_path = "";
92 		iri_path_length = 0;
93 	} else {
94 		iri_path_length = last_slash - request_path;
95 	}
96 
97 	cookie_path_length = strlen(cookie_path);
98 
99 	if (iri_path_length < cookie_path_length)
100 		// cookie-path is not a prefix of request-path
101 		return false;
102 
103 	if (iri_path_length == 0 && cookie_path_length == 0)
104 		// slash matches slash
105 		return true;
106 
107 	if (!strncmp(cookie_path, request_path, cookie_path_length)) {
108 		if (!request_path[cookie_path_length])
109 			// the cookie-path and the request-path are identical
110 			return true;
111 
112 		if ((cookie_path_length > 0 && cookie_path[cookie_path_length - 1] == '/') || cookie_path_slash)
113 			// the cookie-path is a prefix of the request-path, and the last
114 			// character of the cookie-path is %x2F ("/").
115 			return true;
116 
117 		if (request_path[cookie_path_length] == '/')
118 			// the cookie-path is a prefix of the request-path, and the first
119 			// character of the request-path that is not included in the cookie-
120 			// path is a %x2F ("/") character.
121 			return true;
122 	}
123 
124 	return false;
125 }
126 
wget_cookie_init(wget_cookie * cookie)127 wget_cookie *wget_cookie_init(wget_cookie *cookie)
128 {
129 	if (!cookie)
130 		cookie = wget_calloc(1, sizeof(wget_cookie));
131 	else
132 		memset(cookie, 0, sizeof(*cookie));
133 
134 	cookie->last_access = cookie->creation = time(NULL);
135 
136 	return cookie;
137 }
138 
wget_cookie_deinit(wget_cookie * cookie)139 void wget_cookie_deinit(wget_cookie *cookie)
140 {
141 	if (cookie) {
142 		xfree(cookie->name);
143 		xfree(cookie->value);
144 		xfree(cookie->domain);
145 		xfree(cookie->path);
146 	}
147 }
148 
wget_cookie_free(wget_cookie ** cookie)149 void wget_cookie_free(wget_cookie **cookie)
150 {
151 	if (cookie) {
152 		wget_cookie_deinit(*cookie);
153 		xfree(*cookie);
154 	}
155 }
156 
157 // for vector destruction
cookie_free(void * cookie)158 void cookie_free(void *cookie)
159 {
160 	if (cookie) {
161 		wget_cookie_deinit(cookie);
162 		xfree(cookie);
163 	}
164 }
165 
166 /*
167 int wget_cookie_equals(wget_cookie *cookie1, wget_cookie *cookie2)
168 {
169 	if (!cookie1)
170 		return !cookie2;
171 
172 	if (!cookie2)
173 		return 0;
174 
175 	if (wget_strcmp(cookie1->name, cookie2->name) ||
176 		wget_strcmp(cookie1->value, cookie2->value) ||
177 		wget_strcmp(cookie1->domain, cookie2->domain) ||
178 		wget_strcmp(cookie1->path, cookie2->path) ||
179 		cookie1->domain_dot != cookie2->domain_dot ||
180 		cookie1->normalized != cookie2->normalized ||
181 		cookie1->persistent != cookie2->persistent ||
182 		cookie1->host_only != cookie2->host_only ||
183 		cookie1->secure_only != cookie2->secure_only ||
184 		cookie1->http_only != cookie2->http_only)
185 	{
186 		return 0;
187 	}
188 
189 	return 1;
190 }
191 */
192 
wget_cookie_to_setcookie(wget_cookie * cookie)193 char *wget_cookie_to_setcookie(wget_cookie *cookie)
194 {
195 	char expires[32] = "";
196 
197 	if (!cookie)
198 		return wget_strdup("(null)");
199 
200 	if (cookie->expires)
201 		wget_http_print_date(cookie->expires, expires, sizeof(expires)); // date format from RFC 6265
202 
203 	return wget_aprintf("%s=%s%s%s%s%s; domain=%s%s%s%s",
204 		cookie->name, cookie->value,
205 		*expires ? "; expires=" : "", *expires ? expires : "",
206 		cookie->path ? "; path=" : "", cookie->path ? cookie->path : "",
207 		cookie->host_only ? "" : ".", cookie->domain,
208 		cookie->http_only ? "; HttpOnly" : "",
209 		cookie->secure_only ? "; Secure" : "");
210 }
211 
212 /*
213  RFC 6265
214 
215  set-cookie-header = "Set-Cookie:" SP set-cookie-string
216  set-cookie-string = cookie-pair *( ";" SP cookie-av )
217  cookie-pair       = cookie-name "=" cookie-value
218  cookie-name       = token
219  cookie-value      = *cookie-octet / ( DQUOTE *cookie-octet DQUOTE )
220  cookie-octet      = %x21 / %x23-2B / %x2D-3A / %x3C-5B / %x5D-7E
221                        ; US-ASCII characters excluding CTLs,
222                        ; whitespace DQUOTE, comma, semicolon,
223                        ; and backslash
224  token             = <token, defined in [RFC2616], Section 2.2>
225 
226  cookie-av         = expires-av / max-age-av / domain-av /
227                      path-av / secure-av / httponly-av /
228                      extension-av
229  expires-av        = "Expires=" sane-cookie-date
230  sane-cookie-date  = <rfc1123-date, defined in [RFC2616], Section 3.3.1>
231  max-age-av        = "Max-Age=" non-zero-digit *DIGIT
232                        ; In practice, both expires-av and max-age-av
233                        ; are limited to dates representable by the
234                        ; user agent.
235  non-zero-digit    = %x31-39
236                        ; digits 1 through 9
237  domain-av         = "Domain=" domain-value
238  domain-value      = <subdomain>
239                        ; defined in [RFC1034], Section 3.5, as
240                        ; enhanced by [RFC1123], Section 2.1
241  path-av           = "Path=" path-value
242  path-value        = <any CHAR except CTLs or ";">
243  secure-av         = "Secure"
244  httponly-av       = "HttpOnly"
245  extension-av      = <any CHAR except CTLs or ";">
246 */
wget_cookie_parse_setcookie(const char * s,wget_cookie ** _cookie)247 const char *wget_cookie_parse_setcookie(const char *s, wget_cookie **_cookie)
248 {
249 	const char *name, *p;
250 	wget_cookie *cookie = wget_cookie_init(NULL);
251 
252 	// remove leading whitespace from cookie name
253 	while (c_isspace(*s)) s++;
254 
255 	// s = wget_http_parse_token(s, &cookie->name);
256 	// also accept UTF-8 (NON-ASCII) characters in cookie name
257 	for (p = s; (*s >= 32 && *s <= 126 && *s != '=' && *s != ';') || *s < 0; s++);
258 
259 	// remove trailing whitespace from cookie name
260 	while (s > p && c_isspace(s[-1])) s--;
261 	cookie->name = wget_strmemdup(p, s - p);
262 
263 	// advance to next delimiter
264 	while (c_isspace(*s)) s++;
265 
266 	if (cookie->name && *cookie->name && *s == '=') {
267 		// *cookie-octet / ( DQUOTE *cookie-octet DQUOTE )
268 
269 		// skip over delimiter and remove leading whitespace from cookie value
270 		for (s++; c_isspace(*s);) s++;
271 
272 /* RFC compliance is too strict
273 		if (*s == '\"')
274 			s++;
275 		// cookie-octet      = %x21 / %x23-2B / %x2D-3A / %x3C-5B / %x5D-7E
276 		for (p = s; *s > 32 && *s <= 126 && *s != '\\' && *s != ',' && *s != ';' && *s != '\"'; s++);
277 */
278 
279 		// also accept UTF-8 (NON-ASCII) characters in cookie value
280 		for (p = s; (*s >= 32 && *s <= 126 && *s != ';') || *s < 0; s++);
281 
282 		// remove trailing whitespace from cookie value
283 		while (s > p && c_isspace(s[-1])) s--;
284 
285 		cookie->value = wget_strmemdup(p, s - p);
286 
287 		do {
288 			// find next delimiter
289 			while (*s && *s != ';') s++;
290 			if (!*s) break;
291 
292 			// skip delimiter and remove leading spaces from attribute name
293 			for (s++; c_isspace(*s);) s++;
294 			if (!*s) break;
295 
296 			s = wget_http_parse_token(s, &name);
297 
298 			if (name) {
299 				// find next delimiter
300 				while (*s && *s != '=' && *s != ';') s++;
301 				// if (!*s) break;
302 
303 				if (*s == '=') {
304 					// find end of value
305 					for (s++; c_isspace(*s);) s++;
306 					for (p = s; (*s >= 32 && *s <= 126 && *s != ';') || *s < 0; s++);
307 
308 					if (!wget_strcasecmp_ascii(name, "expires")) {
309 						cookie->expires = wget_http_parse_full_date(p);
310 					} else if (!wget_strcasecmp_ascii(name, "max-age")) {
311 						long offset = atol(p);
312 
313 						if (offset > 0) {
314 							// limit offset to avoid integer overflow
315 							if (offset > INT_MAX)
316 								offset = INT_MAX;
317 							cookie->maxage = time(NULL) + offset;
318 						} else
319 							cookie->maxage = 0;
320 					} else if (!wget_strcasecmp_ascii(name, "domain")) {
321 						if (p != s) {
322 							if (*p == '.') { // RFC 6265 5.2.3
323 								do { p++; } while (*p == '.');
324 								cookie->domain_dot = 1;
325 							} else
326 								cookie->domain_dot = 0;
327 
328 							// remove trailing whitespace from attribute value
329 							while (s > p && c_isspace(s[-1])) s--;
330 
331 							xfree(cookie->domain);
332 							cookie->domain = wget_strmemdup(p, s - p);
333 						}
334 					} else if (!wget_strcasecmp_ascii(name, "path")) {
335 						// remove trailing whitespace from attribute value
336 						while (s > p && c_isspace(s[-1])) s--;
337 
338 						xfree(cookie->path);
339 						cookie->path = wget_strmemdup(p, s - p);
340 					} else if (!wget_strcasecmp_ascii(name, "secure")) {
341 						// here we ignore the value
342 						cookie->secure_only = 1;
343 					} else if (!wget_strcasecmp_ascii(name, "httponly")) {
344 						// here we ignore the value
345 						cookie->http_only = 1;
346 					} else {
347 						debug_printf("Unsupported cookie-av '%s'\n", name);
348 					}
349 				} else if (!wget_strcasecmp_ascii(name, "secure")) {
350 					cookie->secure_only = 1;
351 				} else if (!wget_strcasecmp_ascii(name, "httponly")) {
352 					cookie->http_only = 1;
353 				} else {
354 					debug_printf("Unsupported cookie-av '%s'\n", name);
355 				}
356 
357 				xfree(name);
358 			}
359 		} while (*s);
360 
361 	} else {
362 		wget_cookie_free(&cookie);
363 		error_printf(_("Cookie without name or assignment ignored\n"));
364 	}
365 
366 	if (_cookie)
367 		*_cookie = cookie;
368 	else
369 		wget_cookie_free(&cookie);
370 
371 	return s;
372 }
373 
374 // normalize/sanitize and store cookies
cookie_normalize_cookie(const wget_iri * iri,wget_cookie * cookie)375 static int cookie_normalize_cookie(const wget_iri *iri, wget_cookie *cookie)
376 {
377 /*
378 	debug_printf("normalize cookie %s=%s\n", cookie->name, cookie->value);
379 	debug_printf("<  %s=%s\n", cookie->name, cookie->value);
380 	debug_printf("<  expires=%lld max-age=%lld\n", (long long)cookie->expires, (long long)cookie->maxage);
381 	debug_printf("<  domain=%s\n", cookie->domain);
382 	debug_printf("<  path=%s\n", cookie->path);
383 	debug_printf("<  normalized=%d persistent=%d hostonly=%d secure=%d httponly=%d\n",
384 		cookie->normalized, cookie->persistent, cookie->host_only, cookie->secure_only, cookie->http_only);
385 */
386 	cookie->normalized = 0;
387 
388 	if (cookie->maxage)
389 		cookie->expires = cookie->maxage;
390 
391 	cookie->persistent = cookie->expires != 0;
392 
393 	// convert domain to lowercase
394 	wget_strtolower((char *)cookie->domain);
395 
396 	if (iri) {
397 		// cookies comes from a HTTP header and needs checking
398 
399 		// check prefixes as proposed in https://tools.ietf.org/html/draft-ietf-httpbis-cookie-prefixes-00
400 		if (!wget_strncmp(cookie->name, "__Secure-", 9)) {
401 			if (!cookie->secure_only || iri->scheme != WGET_IRI_SCHEME_HTTPS) {
402 				debug_printf("Cookie prefix requires secure origin: %s %s\n", cookie->name, iri->host);
403 				return -1; // ignore cookie
404 			}
405 		}
406 		else if (!wget_strncmp(cookie->name, "__Host-", 7)) {
407 			if (!cookie->secure_only || iri->scheme != WGET_IRI_SCHEME_HTTPS) {
408 				debug_printf("Cookie prefix requires secure origin: %s %s\n", cookie->name, iri->host);
409 				return -1; // ignore cookie
410 			}
411 			if (!cookie->host_only) {
412 				debug_printf("Cookie prefix requires hostonly flag: %s %s\n", cookie->name, iri->host);
413 				return -1; // ignore cookie
414 			}
415 			if (wget_strcmp(cookie->path, "/")) {
416 				debug_printf("Cookie prefix requires path \"/\": %s %s\n", cookie->name, iri->host);
417 				return -1; // ignore cookie
418 			}
419 		}
420 
421 		if (cookie->domain && *cookie->domain) {
422 			if (!strcmp(cookie->domain, iri->host)) {
423 				cookie->host_only = 1;
424 			} else if (cookie_domain_match(cookie->domain, iri->host)) {
425 				cookie->host_only = 0;
426 			} else {
427 				debug_printf("Domain mismatch: %s %s\n", cookie->domain, iri->host);
428 				return -1; // ignore cookie
429 			}
430 		} else {
431 			xfree(cookie->domain);
432 			cookie->domain = wget_strdup(iri->host);
433 			cookie->host_only = 1;
434 		}
435 
436 		if (!cookie->path || *cookie->path != '/') {
437 			const char *p = iri->path ? strrchr(iri->path, '/') : NULL;
438 
439 			xfree(cookie->path);
440 
441 			if (p && p != iri->path) {
442 				cookie->path = wget_strmemdup(iri->path, p - iri->path);
443 			} else {
444 				cookie->path = wget_strdup("/");
445 				// err_printf(_("Unexpected URI without '/': %s\n"), iri->path);
446 				// return -1; // ignore cookie
447 			}
448 		}
449 	}
450 
451 	cookie->normalized = 1;
452 
453 /*
454 	debug_printf(">  %s=%s\n", cookie->name, cookie->value);
455 	debug_printf(">  expires=%lld max-age=%lld\n", (long long)cookie->expires, (long long)cookie->maxage);
456 	debug_printf(">  domain=%s\n", cookie->domain);
457 	debug_printf(">  path=%s\n", cookie->path);
458 	debug_printf(">  normalized=%d persistent=%d hostonly=%d secure=%d httponly=%d\n",
459 		cookie->normalized, cookie->persistent, cookie->host_only, cookie->secure_only, cookie->http_only);
460 */
461 
462 	return 0;
463 }
464 
wget_cookie_normalize(const wget_iri * iri,wget_cookie * cookie)465 int wget_cookie_normalize(const wget_iri *iri, wget_cookie *cookie)
466 {
467 //	wget_thread_mutex_lock(&_cookies_mutex);
468 
469 	int ret = cookie_normalize_cookie(iri, cookie);
470 
471 //	wget_thread_mutex_unlock(&_cookies_mutex);
472 
473 	return ret;
474 }
475 
wget_cookie_normalize_cookies(const wget_iri * iri,const wget_vector * cookies)476 void wget_cookie_normalize_cookies(const wget_iri *iri, const wget_vector *cookies)
477 {
478 //	wget_thread_mutex_lock(&_cookies_mutex);
479 
480 	for (int it = 0; it < wget_vector_size(cookies); it++)
481 		cookie_normalize_cookie(iri, wget_vector_get(cookies, it));
482 
483 //	wget_thread_mutex_unlock(&_cookies_mutex);
484 }
485