1 /*
2 * Copyright (c) 2012 Tim Ruehsen
3 * Copyright (c) 2015-2021 Free Software Foundation, Inc.
4 *
5 * This file is part of libwget.
6 *
7 * Libwget is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU Lesser General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
11 *
12 * Libwget is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU Lesser General Public License for more details.
16 *
17 * You should have received a copy of the GNU Lesser General Public License
18 * along with libwget. If not, see <https://www.gnu.org/licenses/>.
19 *
20 *
21 * Cookie database routines
22 *
23 * Changelog
24 * 23.10.2012 Tim Ruehsen created
25 *
26 * see https://tools.ietf.org/html/rfc6265
27 *
28 */
29
30 #include <config.h>
31
32 #include <stdio.h>
33 #include <stdlib.h>
34 #include <string.h>
35 #include <limits.h>
36 #include <ctype.h>
37 #include <time.h>
38 #include <errno.h>
39 #ifdef WITH_LIBPSL
40 # include <libpsl.h>
41 #endif
42
43 #include <wget.h>
44 #include "private.h"
45 #include "cookie.h"
46
47 struct wget_cookie_db_st {
48 wget_vector *
49 cookies;
50 #ifdef WITH_LIBPSL
51 psl_ctx_t
52 *psl; // libpsl Publix Suffix List context
53 #endif
54 wget_thread_mutex
55 mutex;
56 unsigned int
57 age;
58 bool
59 keep_session_cookies : 1; // whether or not session cookies are saved
60 };
61
62 // by this kind of sorting, we can easily see if a domain matches or not (match = supercookie !)
63
wget_cookie_db_load_psl(wget_cookie_db * cookie_db,const char * fname)64 int wget_cookie_db_load_psl(wget_cookie_db *cookie_db, const char *fname)
65 {
66 #ifdef WITH_LIBPSL
67 if (!cookie_db)
68 return -1;
69
70 if (fname) {
71 psl_ctx_t *psl = psl_load_file(fname);
72
73 psl_free(cookie_db->psl);
74 cookie_db->psl = psl;
75 } else {
76 psl_free(cookie_db->psl);
77 cookie_db->psl = NULL;
78 }
79
80 return 0;
81 #else
82 (void) cookie_db; (void) fname;
83 return -1;
84 #endif
85 }
86
87 // this is how we sort the entries in a cookie db
88 WGET_GCC_NONNULL_ALL WGET_GCC_PURE
compare_cookie(const wget_cookie * c1,const wget_cookie * c2)89 static int compare_cookie(const wget_cookie *c1, const wget_cookie *c2)
90 {
91 int n;
92
93 if (!(n = wget_strcmp(c1->domain, c2->domain))) {
94 if (!(n = wget_strcmp(c1->name, c2->name))) {
95 n = wget_strcmp(c1->path, c2->path);
96 }
97 }
98
99 return n;
100 }
101
102 // this is how we sort the entries when constructing a Cookie: header field
103 WGET_GCC_NONNULL_ALL WGET_GCC_PURE
compare_cookie2(const wget_cookie * c1,const wget_cookie * c2)104 static int compare_cookie2(const wget_cookie *c1, const wget_cookie *c2)
105 {
106 // RFC 6265 5.4 demands sorting by 1. longer paths first, 2. earlier creation time first.
107 size_t len1 = strlen(c1->path);
108 size_t len2 = strlen(c2->path);
109
110 if (len1 < len2)
111 return 1;
112
113 if (len1 > len2)
114 return -1;
115
116 if (c1->sort_age < c2->sort_age)
117 return -1;
118
119 if (c1->sort_age > c2->sort_age)
120 return 1;
121
122 return 0;
123 }
124
wget_cookie_check_psl(const wget_cookie_db * cookie_db,const wget_cookie * cookie)125 int wget_cookie_check_psl(const wget_cookie_db *cookie_db, const wget_cookie *cookie)
126 {
127 // wget_thread_mutex_lock(&_cookies_mutex);
128
129 #ifdef WITH_LIBPSL
130 int ret;
131
132 if (cookie_db->psl)
133 ret = psl_is_public_suffix(cookie_db->psl, cookie->domain) ? -1 : 0;
134 else
135 ret = 0;
136 #else
137 (void) cookie_db; (void) cookie;
138 int ret = 0;
139 #endif
140
141 // wget_thread_mutex_unlock(&_cookies_mutex);
142
143 return ret;
144 }
145
wget_cookie_store_cookie(wget_cookie_db * cookie_db,wget_cookie * cookie)146 int wget_cookie_store_cookie(wget_cookie_db *cookie_db, wget_cookie *cookie)
147 {
148 wget_cookie *old;
149 int pos;
150
151 if (!cookie)
152 return WGET_E_INVALID;
153
154 if (!cookie_db) {
155 wget_cookie_free(&cookie);
156 return WGET_E_INVALID;
157 }
158
159 debug_printf("got cookie %s=%s\n", cookie->name, cookie->value);
160
161 if (!cookie->normalized) {
162 debug_printf("cookie '%s' dropped, it wasn't normalized\n", cookie->name);
163 wget_cookie_free(&cookie);
164 return WGET_E_INVALID;
165 }
166
167 if (wget_cookie_check_psl(cookie_db, cookie) != 0) {
168 debug_printf("cookie '%s' dropped, domain '%s' is a public suffix\n", cookie->name, cookie->domain);
169 wget_cookie_free(&cookie);
170 return WGET_E_INVALID;
171 }
172
173 wget_thread_mutex_lock(cookie_db->mutex);
174
175 old = wget_vector_get(cookie_db->cookies, pos = wget_vector_find(cookie_db->cookies, cookie));
176
177 if (old) {
178 debug_printf("replace old cookie %s=%s\n", cookie->name, cookie->value);
179 cookie->creation = old->creation;
180 cookie->sort_age = old->sort_age;
181 wget_vector_replace(cookie_db->cookies, cookie, pos);
182 } else {
183 debug_printf("store new cookie %s=%s\n", cookie->name, cookie->value);
184 cookie->sort_age = ++cookie_db->age;
185 wget_vector_insert_sorted(cookie_db->cookies, cookie);
186 }
187
188 wget_thread_mutex_unlock(cookie_db->mutex);
189
190 return WGET_E_SUCCESS;
191 }
192
wget_cookie_store_cookies(wget_cookie_db * cookie_db,wget_vector * cookies)193 void wget_cookie_store_cookies(wget_cookie_db *cookie_db, wget_vector *cookies)
194 {
195 if (cookie_db) {
196 int it;
197
198 for (it = 0; it < wget_vector_size(cookies); it++) {
199 wget_cookie *cookie = wget_vector_get(cookies, it);
200 wget_cookie_store_cookie(cookie_db, cookie); // takes ownership of 'cookie'
201 }
202
203 // remove all 'cookie' entries without free'ing
204 wget_vector_clear_nofree(cookies);
205 }
206 }
207
wget_cookie_create_request_header(wget_cookie_db * cookie_db,const wget_iri * iri)208 char *wget_cookie_create_request_header(wget_cookie_db *cookie_db, const wget_iri *iri)
209 {
210 int it, init = 0;
211 int64_t now = time(NULL);
212 wget_vector *cookies = NULL;
213 wget_buffer buf;
214
215 if (!cookie_db || !iri)
216 return NULL;
217
218 debug_printf("cookie_create_request_header for host=%s path=%s\n", iri->host, iri->path);
219
220 wget_thread_mutex_lock(cookie_db->mutex);
221
222 for (it = 0; it < wget_vector_size(cookie_db->cookies); it++) {
223 wget_cookie *cookie = wget_vector_get(cookie_db->cookies, it);
224
225 if (cookie->host_only && strcmp(cookie->domain, iri->host)) {
226 debug_printf("cookie host match failed (%s,%s)\n", cookie->domain, iri->host);
227 continue;
228 }
229
230 if (!cookie->host_only && !cookie_domain_match(cookie->domain, iri->host)) {
231 debug_printf("cookie domain match failed (%s,%s)\n", cookie->domain, iri->host);
232 continue;
233 }
234
235 if (cookie->expires && cookie->expires <= now) {
236 debug_printf("cookie expired (%lld <= %lld)\n", (long long)cookie->expires, (long long)now);
237 continue;
238 }
239
240 if (cookie->secure_only && iri->scheme != WGET_IRI_SCHEME_HTTPS) {
241 debug_printf("cookie ignored, not secure\n");
242 continue;
243 }
244
245 if (!cookie_path_match(cookie->path, iri->path)) {
246 debug_printf("cookie path doesn't match (%s, %s)\n", cookie->path, iri->path);
247 continue;
248 }
249
250 debug_printf("found %s=%s\n", cookie->name, cookie->value);
251
252 if (!cookies)
253 cookies = wget_vector_create(16, (wget_vector_compare_fn *) compare_cookie2);
254
255 // collect matching cookies (just pointers, no allocation)
256 wget_vector_add(cookies, cookie);
257 }
258
259 // sort cookies regarding RFC 6265
260 wget_vector_sort(cookies);
261
262 // now create cookie header value
263 for (it = 0; it < wget_vector_size(cookies); it++) {
264 wget_cookie *cookie = wget_vector_get(cookies, it);
265
266 if (!init) {
267 wget_buffer_init(&buf, NULL, 128);
268 init = 1;
269 }
270
271 if (buf.length)
272 wget_buffer_printf_append(&buf, "; %s=%s", cookie->name, cookie->value);
273 else
274 wget_buffer_printf_append(&buf, "%s=%s", cookie->name, cookie->value);
275 }
276
277 // free vector with free'ing the content
278 wget_vector_clear_nofree(cookies);
279 wget_vector_free(&cookies);
280
281 wget_thread_mutex_unlock(cookie_db->mutex);
282
283 return init ? buf.data : NULL;
284 }
285
wget_cookie_db_init(wget_cookie_db * cookie_db)286 wget_cookie_db *wget_cookie_db_init(wget_cookie_db *cookie_db)
287 {
288 if (!cookie_db) {
289 cookie_db = wget_malloc(sizeof(wget_cookie_db));
290 if (!cookie_db)
291 return NULL;
292 }
293
294 memset(cookie_db, 0, sizeof(*cookie_db));
295 cookie_db->cookies = wget_vector_create(32, (wget_vector_compare_fn *) compare_cookie);
296 wget_vector_set_destructor(cookie_db->cookies, cookie_free);
297 wget_thread_mutex_init(&cookie_db->mutex);
298 #ifdef WITH_LIBPSL
299 #if ((PSL_VERSION_MAJOR > 0) || (PSL_VERSION_MAJOR == 0 && PSL_VERSION_MINOR >= 16))
300 cookie_db->psl = psl_latest(NULL);
301 #else
302 cookie_db->psl = (psl_ctx_t *)psl_builtin();
303 #endif
304 #endif
305
306 return cookie_db;
307 }
308
wget_cookie_db_deinit(wget_cookie_db * cookie_db)309 void wget_cookie_db_deinit(wget_cookie_db *cookie_db)
310 {
311 if (cookie_db) {
312 #ifdef WITH_LIBPSL
313 psl_free(cookie_db->psl);
314 cookie_db->psl = NULL;
315 #endif
316 wget_thread_mutex_lock(cookie_db->mutex);
317 wget_vector_free(&cookie_db->cookies);
318 wget_thread_mutex_unlock(cookie_db->mutex);
319 wget_thread_mutex_destroy(&cookie_db->mutex);
320 }
321 }
322
wget_cookie_db_free(wget_cookie_db ** cookie_db)323 void wget_cookie_db_free(wget_cookie_db **cookie_db)
324 {
325 if (cookie_db) {
326 wget_cookie_db_deinit(*cookie_db);
327 xfree(*cookie_db);
328 }
329 }
330
wget_cookie_set_keep_session_cookies(wget_cookie_db * cookie_db,bool keep)331 void wget_cookie_set_keep_session_cookies(wget_cookie_db *cookie_db, bool keep)
332 {
333 if (cookie_db)
334 cookie_db->keep_session_cookies = keep;
335 }
336
cookie_db_load(wget_cookie_db * cookie_db,FILE * fp)337 static int cookie_db_load(wget_cookie_db *cookie_db, FILE *fp)
338 {
339 wget_cookie cookie;
340 int ncookies = 0;
341 char *buf = NULL, *linep, *p;
342 size_t bufsize = 0;
343 ssize_t buflen;
344 int64_t now = time(NULL);
345
346 wget_cookie_init(&cookie);
347
348 while ((buflen = wget_getline(&buf, &bufsize, fp)) >= 0) {
349 linep = buf;
350
351 while (isspace(*linep)) linep++; // ignore leading whitespace
352 if (!*linep) continue; // skip empty lines
353
354 if (*linep == '#') {
355 if (strncmp(linep, "#HttpOnly_", 10))
356 continue; // skip comments
357
358 linep = linep + 10;
359 cookie.http_only = 1;
360 } else {
361 cookie.http_only = 0;
362 }
363
364 // strip off \r\n
365 while (buflen > 0 && (buf[buflen] == '\n' || buf[buflen] == '\r'))
366 buf[--buflen] = 0;
367
368 // parse domain
369 for (p = linep; *linep && *linep != '\t';) linep++;
370 if (*p == '.') {
371 p++;
372 cookie.domain_dot = 1;
373 }
374 cookie.domain = wget_strmemdup(p, linep - p);
375
376 // parse inverse host_only (FALSE: host_only=1)
377 for (p = *linep ? ++linep : linep; *linep && *linep != '\t';) linep++;
378 cookie.host_only = wget_strncasecmp_ascii(p, "TRUE", 4);
379
380 // parse path
381 for (p = *linep ? ++linep : linep; *linep && *linep != '\t';) linep++;
382 if (p != linep)
383 cookie.path = wget_strmemdup(p, linep - p);
384 else
385 cookie.path = wget_strmemdup("/", 1); // allow empty paths
386
387 // parse secure_only
388 for (p = *linep ? ++linep : linep; *linep && *linep != '\t';) linep++;
389 cookie.secure_only = !wget_strncasecmp_ascii(p, "TRUE", 4);
390
391 // parse expires
392 for (p = *linep ? ++linep : linep; *linep && *linep != '\t';) linep++;
393 cookie.expires = (int64_t) atoll(p);
394 if (cookie.expires && cookie.expires <= now) {
395 // drop expired cookie
396 wget_cookie_deinit(&cookie);
397 continue;
398 }
399 if (!cookie.expires && !cookie_db->keep_session_cookies) {
400 // drop session cookies
401 wget_cookie_deinit(&cookie);
402 continue;
403 }
404
405 // parse name
406 for (p = *linep ? ++linep : linep; *linep && *linep != '\t';) linep++;
407 if (linep == p) {
408 error_printf(_("Incomplete cookie entry: %s\n"), buf);
409 wget_cookie_deinit(&cookie);
410 continue;
411 }
412 cookie.name = wget_strmemdup(p, linep - p);
413
414 // parse value, until end of line
415 for (p = *linep ? ++linep : linep; *linep;) linep++;
416 cookie.value = wget_strmemdup(p, linep - p);
417
418 if (wget_cookie_normalize(NULL, &cookie) == 0 && wget_cookie_check_psl(cookie_db, &cookie) == 0) {
419 ncookies++;
420 wget_cookie_store_cookie(cookie_db, wget_memdup(&cookie, sizeof(cookie))); // takes ownership of cookie
421 } else
422 wget_cookie_deinit(&cookie);
423 }
424
425 xfree(buf);
426
427 if (ferror(fp)) {
428 return -1;
429 }
430
431 return ncookies;
432 }
433
wget_cookie_db_load(wget_cookie_db * cookie_db,const char * fname)434 int wget_cookie_db_load(wget_cookie_db *cookie_db, const char *fname)
435 {
436 if (!cookie_db || !fname || !*fname)
437 return 0;
438
439 if (wget_update_file(fname, (wget_update_load_fn *) cookie_db_load, NULL, cookie_db)) {
440 error_printf(_("Failed to read cookies\n"));
441 return -1;
442 } else {
443 debug_printf("Fetched cookies from '%s'\n", fname);
444 return 0;
445 }
446 }
447
448 // save the cookie store to a flat file
449
cookie_db_save(wget_cookie_db * cookie_db,FILE * fp)450 static int cookie_db_save(wget_cookie_db *cookie_db, FILE *fp)
451 {
452 if (wget_vector_size(cookie_db->cookies) > 0) {
453 int it;
454 int64_t now = time(NULL);
455
456 fputs("# HTTP Cookie File\n", fp);
457 fputs("#Generated by libwget " PACKAGE_VERSION ". Edit at your own risk.\n\n", fp);
458
459 for (it = 0; it < wget_vector_size(cookie_db->cookies); it++) {
460 wget_cookie *cookie = wget_vector_get(cookie_db->cookies, it);
461
462 if (cookie->persistent) {
463 if (cookie->expires <= now)
464 continue;
465 } else if (!cookie_db->keep_session_cookies)
466 continue;
467
468 wget_fprintf(fp, "%s%s%s\t%s\t%s\t%s\t%lld\t%s\t%s\n",
469 cookie->http_only ? "#HttpOnly_" : "",
470 cookie->domain_dot ? "." : "", // compatibility, irrelevant since RFC 6562
471 cookie->domain,
472 cookie->host_only ? "FALSE" : "TRUE",
473 cookie->path, cookie->secure_only ? "TRUE" : "FALSE",
474 (long long)cookie->expires,
475 cookie->name, cookie->value);
476
477 if (ferror(fp))
478 return -1;
479 }
480 }
481
482 return 0;
483 }
484
485 // Save the HSTS cache to a flat file
486 // Protected by flock()
487
wget_cookie_db_save(wget_cookie_db * cookie_db,const char * fname)488 int wget_cookie_db_save(wget_cookie_db *cookie_db, const char *fname)
489 {
490 int size;
491
492 if (!cookie_db || !fname || !*fname)
493 return -1;
494
495 if (wget_update_file(fname,
496 (wget_update_load_fn *) cookie_db_load,
497 (wget_update_save_fn *) cookie_db_save, cookie_db))
498 {
499 error_printf(_("Failed to write cookie file '%s'\n"), fname);
500 return -1;
501 }
502
503 if ((size = wget_vector_size(cookie_db->cookies)))
504 debug_printf("Saved %d cookie%s into '%s'\n", size, size != 1 ? "s" : "", fname);
505 else
506 debug_printf("No cookies to save. Table is empty.\n");
507
508 return 0;
509 }
510