1 /*
2  * Copyright (c) 2012 Tim Ruehsen
3  * Copyright (c) 2015-2021 Free Software Foundation, Inc.
4  *
5  * This file is part of libwget.
6  *
7  * Libwget is free software: you can redistribute it and/or modify
8  * it under the terms of the GNU Lesser General Public License as published by
9  * the Free Software Foundation, either version 3 of the License, or
10  * (at your option) any later version.
11  *
12  * Libwget is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public License
18  * along with libwget.  If not, see <https://www.gnu.org/licenses/>.
19  *
20  *
21  * Cookie database routines
22  *
23  * Changelog
24  * 23.10.2012  Tim Ruehsen  created
25  *
26  * see https://tools.ietf.org/html/rfc6265
27  *
28  */
29 
30 #include <config.h>
31 
32 #include <stdio.h>
33 #include <stdlib.h>
34 #include <string.h>
35 #include <limits.h>
36 #include <ctype.h>
37 #include <time.h>
38 #include <errno.h>
39 #ifdef WITH_LIBPSL
40 #  include <libpsl.h>
41 #endif
42 
43 #include <wget.h>
44 #include "private.h"
45 #include "cookie.h"
46 
47 struct wget_cookie_db_st {
48 	wget_vector *
49 		cookies;
50 #ifdef WITH_LIBPSL
51 	psl_ctx_t
52 		*psl; // libpsl Publix Suffix List context
53 #endif
54 	wget_thread_mutex
55 		mutex;
56 	unsigned int
57 		age;
58 	bool
59 		keep_session_cookies : 1; // whether or not session cookies are saved
60 };
61 
62 // by this kind of sorting, we can easily see if a domain matches or not (match = supercookie !)
63 
wget_cookie_db_load_psl(wget_cookie_db * cookie_db,const char * fname)64 int wget_cookie_db_load_psl(wget_cookie_db *cookie_db, const char *fname)
65 {
66 #ifdef WITH_LIBPSL
67 	if (!cookie_db)
68 		return -1;
69 
70 	if (fname) {
71 		psl_ctx_t *psl = psl_load_file(fname);
72 
73 		psl_free(cookie_db->psl);
74 		cookie_db->psl = psl;
75 	} else {
76 		psl_free(cookie_db->psl);
77 		cookie_db->psl = NULL;
78 	}
79 
80 	return 0;
81 #else
82 	(void) cookie_db; (void) fname;
83 	return -1;
84 #endif
85 }
86 
87 // this is how we sort the entries in a cookie db
88 WGET_GCC_NONNULL_ALL WGET_GCC_PURE
compare_cookie(const wget_cookie * c1,const wget_cookie * c2)89 static int compare_cookie(const wget_cookie *c1, const wget_cookie *c2)
90 {
91 	int n;
92 
93 	if (!(n = wget_strcmp(c1->domain, c2->domain))) {
94 		if (!(n = wget_strcmp(c1->name, c2->name))) {
95 			n = wget_strcmp(c1->path, c2->path);
96 		}
97 	}
98 
99 	return n;
100 }
101 
102 // this is how we sort the entries when constructing a Cookie: header field
103 WGET_GCC_NONNULL_ALL WGET_GCC_PURE
compare_cookie2(const wget_cookie * c1,const wget_cookie * c2)104 static int compare_cookie2(const wget_cookie *c1, const wget_cookie *c2)
105 {
106 	// RFC 6265 5.4 demands sorting by 1. longer paths first, 2. earlier creation time first.
107 	size_t len1 = strlen(c1->path);
108 	size_t len2 = strlen(c2->path);
109 
110 	if (len1 < len2)
111 		return 1;
112 
113 	if (len1 > len2)
114 		return -1;
115 
116 	if (c1->sort_age < c2->sort_age)
117 		return -1;
118 
119 	if (c1->sort_age > c2->sort_age)
120 		return 1;
121 
122 	return 0;
123 }
124 
wget_cookie_check_psl(const wget_cookie_db * cookie_db,const wget_cookie * cookie)125 int wget_cookie_check_psl(const wget_cookie_db *cookie_db, const wget_cookie *cookie)
126 {
127 //	wget_thread_mutex_lock(&_cookies_mutex);
128 
129 #ifdef WITH_LIBPSL
130 	int ret;
131 
132 	if (cookie_db->psl)
133 		ret = psl_is_public_suffix(cookie_db->psl, cookie->domain) ? -1 : 0;
134 	else
135 		ret = 0;
136 #else
137 	(void) cookie_db; (void) cookie;
138 	int ret = 0;
139 #endif
140 
141 //	wget_thread_mutex_unlock(&_cookies_mutex);
142 
143 	return ret;
144 }
145 
wget_cookie_store_cookie(wget_cookie_db * cookie_db,wget_cookie * cookie)146 int wget_cookie_store_cookie(wget_cookie_db *cookie_db, wget_cookie *cookie)
147 {
148 	wget_cookie *old;
149 	int pos;
150 
151 	if (!cookie)
152 		return WGET_E_INVALID;
153 
154 	if (!cookie_db) {
155 		wget_cookie_free(&cookie);
156 		return WGET_E_INVALID;
157 	}
158 
159 	debug_printf("got cookie %s=%s\n", cookie->name, cookie->value);
160 
161 	if (!cookie->normalized) {
162 		debug_printf("cookie '%s' dropped, it wasn't normalized\n", cookie->name);
163 		wget_cookie_free(&cookie);
164 		return WGET_E_INVALID;
165 	}
166 
167 	if (wget_cookie_check_psl(cookie_db, cookie) != 0) {
168 		debug_printf("cookie '%s' dropped, domain '%s' is a public suffix\n", cookie->name, cookie->domain);
169 		wget_cookie_free(&cookie);
170 		return WGET_E_INVALID;
171 	}
172 
173 	wget_thread_mutex_lock(cookie_db->mutex);
174 
175 	old = wget_vector_get(cookie_db->cookies, pos = wget_vector_find(cookie_db->cookies, cookie));
176 
177 	if (old) {
178 		debug_printf("replace old cookie %s=%s\n", cookie->name, cookie->value);
179 		cookie->creation = old->creation;
180 		cookie->sort_age = old->sort_age;
181 		wget_vector_replace(cookie_db->cookies, cookie, pos);
182 	} else {
183 		debug_printf("store new cookie %s=%s\n", cookie->name, cookie->value);
184 		cookie->sort_age = ++cookie_db->age;
185 		wget_vector_insert_sorted(cookie_db->cookies, cookie);
186 	}
187 
188 	wget_thread_mutex_unlock(cookie_db->mutex);
189 
190 	return WGET_E_SUCCESS;
191 }
192 
wget_cookie_store_cookies(wget_cookie_db * cookie_db,wget_vector * cookies)193 void wget_cookie_store_cookies(wget_cookie_db *cookie_db, wget_vector *cookies)
194 {
195 	if (cookie_db) {
196 		int it;
197 
198 		for (it = 0; it < wget_vector_size(cookies); it++) {
199 			wget_cookie *cookie = wget_vector_get(cookies, it);
200 			wget_cookie_store_cookie(cookie_db, cookie); // takes ownership of 'cookie'
201 		}
202 
203 		// remove all 'cookie' entries without free'ing
204 		wget_vector_clear_nofree(cookies);
205 	}
206 }
207 
wget_cookie_create_request_header(wget_cookie_db * cookie_db,const wget_iri * iri)208 char *wget_cookie_create_request_header(wget_cookie_db *cookie_db, const wget_iri *iri)
209 {
210 	int it, init = 0;
211 	int64_t now = time(NULL);
212 	wget_vector *cookies = NULL;
213 	wget_buffer buf;
214 
215 	if (!cookie_db || !iri)
216 		return NULL;
217 
218 	debug_printf("cookie_create_request_header for host=%s path=%s\n", iri->host, iri->path);
219 
220 	wget_thread_mutex_lock(cookie_db->mutex);
221 
222 	for (it = 0; it < wget_vector_size(cookie_db->cookies); it++) {
223 		wget_cookie *cookie = wget_vector_get(cookie_db->cookies, it);
224 
225 		if (cookie->host_only && strcmp(cookie->domain, iri->host)) {
226 			debug_printf("cookie host match failed (%s,%s)\n", cookie->domain, iri->host);
227 			continue;
228 		}
229 
230 		if (!cookie->host_only && !cookie_domain_match(cookie->domain, iri->host)) {
231 			debug_printf("cookie domain match failed (%s,%s)\n", cookie->domain, iri->host);
232 			continue;
233 		}
234 
235 		if (cookie->expires && cookie->expires <= now) {
236 			debug_printf("cookie expired (%lld <= %lld)\n", (long long)cookie->expires, (long long)now);
237 			continue;
238 		}
239 
240 		if (cookie->secure_only && iri->scheme != WGET_IRI_SCHEME_HTTPS) {
241 			debug_printf("cookie ignored, not secure\n");
242 			continue;
243 		}
244 
245 		if (!cookie_path_match(cookie->path, iri->path)) {
246 			debug_printf("cookie path doesn't match (%s, %s)\n", cookie->path, iri->path);
247 			continue;
248 		}
249 
250 		debug_printf("found %s=%s\n", cookie->name, cookie->value);
251 
252 		if (!cookies)
253 			cookies = wget_vector_create(16, (wget_vector_compare_fn *) compare_cookie2);
254 
255 		// collect matching cookies (just pointers, no allocation)
256 		wget_vector_add(cookies, cookie);
257 	}
258 
259 	// sort cookies regarding RFC 6265
260 	wget_vector_sort(cookies);
261 
262 	// now create cookie header value
263 	for (it = 0; it < wget_vector_size(cookies); it++) {
264 		wget_cookie *cookie = wget_vector_get(cookies, it);
265 
266 		if (!init) {
267 			wget_buffer_init(&buf, NULL, 128);
268 			init = 1;
269 		}
270 
271 		if (buf.length)
272 			wget_buffer_printf_append(&buf, "; %s=%s", cookie->name, cookie->value);
273 		else
274 			wget_buffer_printf_append(&buf, "%s=%s", cookie->name, cookie->value);
275 	}
276 
277 	// free vector with free'ing the content
278 	wget_vector_clear_nofree(cookies);
279 	wget_vector_free(&cookies);
280 
281 	wget_thread_mutex_unlock(cookie_db->mutex);
282 
283 	return init ? buf.data : NULL;
284 }
285 
wget_cookie_db_init(wget_cookie_db * cookie_db)286 wget_cookie_db *wget_cookie_db_init(wget_cookie_db *cookie_db)
287 {
288 	if (!cookie_db) {
289 		cookie_db = wget_malloc(sizeof(wget_cookie_db));
290 		if (!cookie_db)
291 			return NULL;
292 	}
293 
294 	memset(cookie_db, 0, sizeof(*cookie_db));
295 	cookie_db->cookies = wget_vector_create(32, (wget_vector_compare_fn *) compare_cookie);
296 	wget_vector_set_destructor(cookie_db->cookies, cookie_free);
297 	wget_thread_mutex_init(&cookie_db->mutex);
298 #ifdef WITH_LIBPSL
299 #if ((PSL_VERSION_MAJOR > 0) || (PSL_VERSION_MAJOR == 0 && PSL_VERSION_MINOR >= 16))
300 	cookie_db->psl = psl_latest(NULL);
301 #else
302 	cookie_db->psl = (psl_ctx_t *)psl_builtin();
303 #endif
304 #endif
305 
306 	return cookie_db;
307 }
308 
wget_cookie_db_deinit(wget_cookie_db * cookie_db)309 void wget_cookie_db_deinit(wget_cookie_db *cookie_db)
310 {
311 	if (cookie_db) {
312 #ifdef WITH_LIBPSL
313 		psl_free(cookie_db->psl);
314 		cookie_db->psl = NULL;
315 #endif
316 		wget_thread_mutex_lock(cookie_db->mutex);
317 		wget_vector_free(&cookie_db->cookies);
318 		wget_thread_mutex_unlock(cookie_db->mutex);
319 		wget_thread_mutex_destroy(&cookie_db->mutex);
320 	}
321 }
322 
wget_cookie_db_free(wget_cookie_db ** cookie_db)323 void wget_cookie_db_free(wget_cookie_db **cookie_db)
324 {
325 	if (cookie_db) {
326 		wget_cookie_db_deinit(*cookie_db);
327 		xfree(*cookie_db);
328 	}
329 }
330 
wget_cookie_set_keep_session_cookies(wget_cookie_db * cookie_db,bool keep)331 void wget_cookie_set_keep_session_cookies(wget_cookie_db *cookie_db, bool keep)
332 {
333 	if (cookie_db)
334 		cookie_db->keep_session_cookies = keep;
335 }
336 
cookie_db_load(wget_cookie_db * cookie_db,FILE * fp)337 static int cookie_db_load(wget_cookie_db *cookie_db, FILE *fp)
338 {
339 	wget_cookie cookie;
340 	int ncookies = 0;
341 	char *buf = NULL, *linep, *p;
342 	size_t bufsize = 0;
343 	ssize_t buflen;
344 	int64_t now = time(NULL);
345 
346 	wget_cookie_init(&cookie);
347 
348 	while ((buflen = wget_getline(&buf, &bufsize, fp)) >= 0) {
349 		linep = buf;
350 
351 		while (isspace(*linep)) linep++; // ignore leading whitespace
352 		if (!*linep) continue; // skip empty lines
353 
354 		if (*linep == '#') {
355 			if (strncmp(linep, "#HttpOnly_", 10))
356 				continue; // skip comments
357 
358 			linep = linep + 10;
359 			cookie.http_only = 1;
360 		} else {
361 			cookie.http_only = 0;
362 		}
363 
364 		// strip off \r\n
365 		while (buflen > 0 && (buf[buflen] == '\n' || buf[buflen] == '\r'))
366 			buf[--buflen] = 0;
367 
368 		// parse domain
369 		for (p = linep; *linep && *linep != '\t';) linep++;
370 		if (*p == '.') {
371 			p++;
372 			cookie.domain_dot = 1;
373 		}
374 		cookie.domain = wget_strmemdup(p, linep - p);
375 
376 		// parse inverse host_only (FALSE: host_only=1)
377 		for (p = *linep ? ++linep : linep; *linep && *linep != '\t';) linep++;
378 		cookie.host_only = wget_strncasecmp_ascii(p, "TRUE", 4);
379 
380 		// parse path
381 		for (p = *linep ? ++linep : linep; *linep && *linep != '\t';) linep++;
382 		if (p != linep)
383 			cookie.path = wget_strmemdup(p, linep - p);
384 		else
385 			cookie.path = wget_strmemdup("/", 1); // allow empty paths
386 
387 		// parse secure_only
388 		for (p = *linep ? ++linep : linep; *linep && *linep != '\t';) linep++;
389 		cookie.secure_only = !wget_strncasecmp_ascii(p, "TRUE", 4);
390 
391 		// parse expires
392 		for (p = *linep ? ++linep : linep; *linep && *linep != '\t';) linep++;
393 		cookie.expires = (int64_t) atoll(p);
394 		if (cookie.expires && cookie.expires <= now) {
395 			// drop expired cookie
396 			wget_cookie_deinit(&cookie);
397 			continue;
398 		}
399 		if (!cookie.expires && !cookie_db->keep_session_cookies) {
400 			// drop session cookies
401 			wget_cookie_deinit(&cookie);
402 			continue;
403 		}
404 
405 		// parse name
406 		for (p = *linep ? ++linep : linep; *linep && *linep != '\t';) linep++;
407 		if (linep == p) {
408 			error_printf(_("Incomplete cookie entry: %s\n"), buf);
409 			wget_cookie_deinit(&cookie);
410 			continue;
411 		}
412 		cookie.name = wget_strmemdup(p, linep - p);
413 
414 		// parse value, until end of line
415 		for (p = *linep ? ++linep : linep; *linep;) linep++;
416 		cookie.value = wget_strmemdup(p, linep - p);
417 
418 		if (wget_cookie_normalize(NULL, &cookie) == 0 && wget_cookie_check_psl(cookie_db, &cookie) == 0) {
419 			ncookies++;
420 			wget_cookie_store_cookie(cookie_db, wget_memdup(&cookie, sizeof(cookie))); // takes ownership of cookie
421 		} else
422 			wget_cookie_deinit(&cookie);
423 	}
424 
425 	xfree(buf);
426 
427 	if (ferror(fp)) {
428 		return -1;
429 	}
430 
431 	return ncookies;
432 }
433 
wget_cookie_db_load(wget_cookie_db * cookie_db,const char * fname)434 int wget_cookie_db_load(wget_cookie_db *cookie_db, const char *fname)
435 {
436 	if (!cookie_db || !fname || !*fname)
437 		return 0;
438 
439 	if (wget_update_file(fname, (wget_update_load_fn *) cookie_db_load, NULL, cookie_db)) {
440 		error_printf(_("Failed to read cookies\n"));
441 		return -1;
442 	} else {
443 		debug_printf("Fetched cookies from '%s'\n", fname);
444 		return 0;
445 	}
446 }
447 
448 // save the cookie store to a flat file
449 
cookie_db_save(wget_cookie_db * cookie_db,FILE * fp)450 static int cookie_db_save(wget_cookie_db *cookie_db, FILE *fp)
451 {
452 	if (wget_vector_size(cookie_db->cookies) > 0) {
453 		int it;
454 		int64_t now = time(NULL);
455 
456 		fputs("# HTTP Cookie File\n", fp);
457 		fputs("#Generated by libwget " PACKAGE_VERSION ". Edit at your own risk.\n\n", fp);
458 
459 		for (it = 0; it < wget_vector_size(cookie_db->cookies); it++) {
460 			wget_cookie *cookie = wget_vector_get(cookie_db->cookies, it);
461 
462 			if (cookie->persistent) {
463 				if (cookie->expires <= now)
464 					continue;
465 			} else if (!cookie_db->keep_session_cookies)
466 				continue;
467 
468 			wget_fprintf(fp, "%s%s%s\t%s\t%s\t%s\t%lld\t%s\t%s\n",
469 				cookie->http_only ? "#HttpOnly_" : "",
470 				cookie->domain_dot ? "." : "", // compatibility, irrelevant since RFC 6562
471 				cookie->domain,
472 				cookie->host_only ? "FALSE" : "TRUE",
473 				cookie->path, cookie->secure_only ? "TRUE" : "FALSE",
474 				(long long)cookie->expires,
475 				cookie->name, cookie->value);
476 
477 			if (ferror(fp))
478 				return -1;
479 		}
480 	}
481 
482 	return 0;
483 }
484 
485 // Save the HSTS cache to a flat file
486 // Protected by flock()
487 
wget_cookie_db_save(wget_cookie_db * cookie_db,const char * fname)488 int wget_cookie_db_save(wget_cookie_db *cookie_db, const char *fname)
489 {
490 	int size;
491 
492 	if (!cookie_db || !fname || !*fname)
493 		return -1;
494 
495 	if (wget_update_file(fname,
496 		(wget_update_load_fn *) cookie_db_load,
497 		(wget_update_save_fn *) cookie_db_save, cookie_db))
498 	{
499 		error_printf(_("Failed to write cookie file '%s'\n"), fname);
500 		return -1;
501 	}
502 
503 	if ((size = wget_vector_size(cookie_db->cookies)))
504 		debug_printf("Saved %d cookie%s into '%s'\n", size, size != 1 ? "s" : "", fname);
505 	else
506 		debug_printf("No cookies to save. Table is empty.\n");
507 
508 	return 0;
509 }
510