1 /* Support for cookies.
2 Copyright (C) 2001-2011, 2015, 2018-2021 Free Software Foundation,
3 Inc.
4
5 This file is part of GNU Wget.
6
7 GNU Wget is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or (at
10 your option) any later version.
11
12 GNU Wget is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with Wget. If not, see <http://www.gnu.org/licenses/>.
19
20 Additional permission under GNU GPL version 3 section 7
21
22 If you modify this program, or any covered work, by linking or
23 combining it with the OpenSSL project's OpenSSL library (or a
24 modified version of that library), containing parts covered by the
25 terms of the OpenSSL or SSLeay licenses, the Free Software Foundation
26 grants you additional permission to convey the resulting work.
27 Corresponding Source for a non-source form of such a combination
28 shall include the source code for the parts of OpenSSL used as well
29 as that of the covered work. */
30
31 /* Written by Hrvoje Niksic. Parts are loosely inspired by the
32 cookie patch submitted by Tomasz Wegrzanowski.
33
34 This implements the client-side cookie support, as specified
35 (loosely) by Netscape's "preliminary specification", currently
36 available at:
37
38 http://wp.netscape.com/newsref/std/cookie_spec.html
39
40 rfc2109 is not supported because of its incompatibilities with the
41 above widely-used specification. rfc2965 is entirely ignored,
42 since popular client software doesn't implement it, and even the
43 sites that do send Set-Cookie2 also emit Set-Cookie for
44 compatibility. */
45
46 #include "wget.h"
47
48 #include <stdint.h>
49 #include <stdio.h>
50 #include <string.h>
51 #include <stdlib.h>
52 #include <assert.h>
53 #include <errno.h>
54 #include <time.h>
55 #ifdef HAVE_LIBPSL
56 # include <libpsl.h>
57 #endif
58 #include "utils.h"
59 #include "hash.h"
60 #include "cookies.h"
61 #include "http.h" /* for http_atotm */
62 #include "c-strcase.h"
63
64
65 /* Declarations of `struct cookie' and the most basic functions. */
66
67 /* Cookie jar serves as cookie storage and a means of retrieving
68 cookies efficiently. All cookies with the same domain are stored
69 in a linked list called "chain". A cookie chain can be reached by
70 looking up the domain in the cookie jar's chains_by_domain table.
71
72 For example, to reach all the cookies under google.com, one must
73 execute hash_table_get(jar->chains_by_domain, "google.com"). Of
74 course, when sending a cookie to `www.google.com', one must search
75 for cookies that belong to either `www.google.com' or `google.com'
76 -- but the point is that the code doesn't need to go through *all*
77 the cookies. */
78
79 struct cookie_jar {
80 /* Cookie chains indexed by domain. */
81 struct hash_table *chains;
82
83 int cookie_count; /* number of cookies in the jar. */
84 };
85
86 /* Value set by entry point functions, so that the low-level
87 routines don't need to call time() all the time. */
88 static time_t cookies_now;
89
90 struct cookie_jar *
cookie_jar_new(void)91 cookie_jar_new (void)
92 {
93 struct cookie_jar *jar = xnew (struct cookie_jar);
94 jar->chains = make_nocase_string_hash_table (0);
95 jar->cookie_count = 0;
96 return jar;
97 }
98
99 struct cookie {
100 char *domain; /* domain of the cookie */
101 int port; /* port number */
102 char *path; /* path prefix of the cookie */
103
104 unsigned discard_requested :1;/* whether cookie was created to
105 request discarding another
106 cookie. */
107
108 unsigned secure :1; /* whether cookie should be
109 transmitted over non-https
110 connections. */
111 unsigned domain_exact :1; /* whether DOMAIN must match as a
112 whole. */
113
114 unsigned permanent :1; /* whether the cookie should outlive
115 the session. */
116 time_t expiry_time; /* time when the cookie expires, 0
117 means undetermined. */
118
119 char *attr; /* cookie attribute name */
120 char *value; /* cookie attribute value */
121
122 struct cookie *next; /* used for chaining of cookies in the
123 same domain. */
124 };
125
126 #define PORT_ANY (-1)
127
128 /* Allocate and return a new, empty cookie structure. */
129
130 static struct cookie *
cookie_new(void)131 cookie_new (void)
132 {
133 struct cookie *cookie = xnew0 (struct cookie);
134
135 /* Both cookie->permanent and cookie->expiry_time are now 0. This
136 means that the cookie doesn't expire, but is only valid for this
137 session (i.e. not written out to disk). */
138
139 cookie->port = PORT_ANY;
140 return cookie;
141 }
142
143 /* Non-zero if the cookie has expired. Assumes cookies_now has been
144 set by one of the entry point functions. */
145
146 static bool
cookie_expired_p(const struct cookie * c)147 cookie_expired_p (const struct cookie *c)
148 {
149 return c->expiry_time != 0 && c->expiry_time < cookies_now;
150 }
151
152 /* Deallocate COOKIE and its components. */
153
154 static void
delete_cookie(struct cookie * cookie)155 delete_cookie (struct cookie *cookie)
156 {
157 xfree (cookie->domain);
158 xfree (cookie->path);
159 xfree (cookie->attr);
160 xfree (cookie->value);
161 xfree (cookie);
162 }
163
164 /* Functions for storing cookies.
165
166 All cookies can be reached beginning with jar->chains. The key in
167 that table is the domain name, and the value is a linked list of
168 all cookies from that domain. Every new cookie is placed on the
169 head of the list. */
170
171 /* Find and return a cookie in JAR whose domain, path, and attribute
172 name correspond to COOKIE. If found, PREVPTR will point to the
173 location of the cookie previous in chain, or NULL if the found
174 cookie is the head of a chain.
175
176 If no matching cookie is found, return NULL. */
177
178 static struct cookie *
find_matching_cookie(struct cookie_jar * jar,struct cookie * cookie,struct cookie ** prevptr)179 find_matching_cookie (struct cookie_jar *jar, struct cookie *cookie,
180 struct cookie **prevptr)
181 {
182 struct cookie *chain, *prev;
183
184 chain = hash_table_get (jar->chains, cookie->domain);
185 if (!chain)
186 goto nomatch;
187
188 prev = NULL;
189 for (; chain; prev = chain, chain = chain->next)
190 if (0 == strcmp (cookie->path, chain->path)
191 && 0 == strcmp (cookie->attr, chain->attr)
192 && cookie->port == chain->port)
193 {
194 *prevptr = prev;
195 return chain;
196 }
197
198 nomatch:
199 *prevptr = NULL;
200 return NULL;
201 }
202
203 /* Store COOKIE to the jar.
204
205 This is done by placing COOKIE at the head of its chain. However,
206 if COOKIE matches a cookie already in memory, as determined by
207 find_matching_cookie, the old cookie is unlinked and destroyed.
208
209 The key of each chain's hash table entry is allocated only the
210 first time; next hash_table_put's reuse the same key. */
211
212 static void
store_cookie(struct cookie_jar * jar,struct cookie * cookie)213 store_cookie (struct cookie_jar *jar, struct cookie *cookie)
214 {
215 struct cookie *chain_head;
216 char *chain_key;
217
218 if (hash_table_get_pair (jar->chains, cookie->domain,
219 &chain_key, &chain_head))
220 {
221 /* A chain of cookies in this domain already exists. Check for
222 duplicates -- if an extant cookie exactly matches our domain,
223 port, path, and name, replace it. */
224 struct cookie *prev;
225 struct cookie *victim = find_matching_cookie (jar, cookie, &prev);
226
227 if (victim)
228 {
229 /* Remove VICTIM from the chain. COOKIE will be placed at
230 the head. */
231 if (prev)
232 {
233 prev->next = victim->next;
234 cookie->next = chain_head;
235 }
236 else
237 {
238 /* prev is NULL; apparently VICTIM was at the head of
239 the chain. This place will be taken by COOKIE, so
240 all we need to do is: */
241 cookie->next = victim->next;
242 }
243 delete_cookie (victim);
244 --jar->cookie_count;
245 DEBUGP (("Deleted old cookie (to be replaced.)\n"));
246 }
247 else
248 cookie->next = chain_head;
249 }
250 else
251 {
252 /* We are now creating the chain. Use a copy of cookie->domain
253 as the key for the life-time of the chain. Using
254 cookie->domain would be unsafe because the life-time of the
255 chain may exceed the life-time of the cookie. (Cookies may
256 be deleted from the chain by this very function.) */
257 cookie->next = NULL;
258 chain_key = xstrdup (cookie->domain);
259 }
260
261 hash_table_put (jar->chains, chain_key, cookie);
262 ++jar->cookie_count;
263
264 IF_DEBUG
265 {
266 time_t exptime = cookie->expiry_time;
267 DEBUGP (("\nStored cookie %s %d%s %s <%s> <%s> [expiry %s] %s %s\n",
268 cookie->domain, cookie->port,
269 cookie->port == PORT_ANY ? " (ANY)" : "",
270 cookie->path,
271 cookie->permanent ? "permanent" : "session",
272 cookie->secure ? "secure" : "insecure",
273 cookie->expiry_time ? datetime_str (exptime) : "none",
274 cookie->attr, cookie->value));
275 }
276 }
277
278 /* Discard a cookie matching COOKIE's domain, port, path, and
279 attribute name. This gets called when we encounter a cookie whose
280 expiry date is in the past, or whose max-age is set to 0. The
281 former corresponds to netscape cookie spec, while the latter is
282 specified by rfc2109. */
283
284 static void
discard_matching_cookie(struct cookie_jar * jar,struct cookie * cookie)285 discard_matching_cookie (struct cookie_jar *jar, struct cookie *cookie)
286 {
287 struct cookie *prev, *victim;
288
289 if (!hash_table_count (jar->chains))
290 /* No elements == nothing to discard. */
291 return;
292
293 victim = find_matching_cookie (jar, cookie, &prev);
294 if (victim)
295 {
296 if (prev)
297 /* Simply unchain the victim. */
298 prev->next = victim->next;
299 else
300 {
301 /* VICTIM was head of its chain. We need to place a new
302 cookie at the head. */
303 char *chain_key = NULL;
304 int res;
305
306 res = hash_table_get_pair (jar->chains, victim->domain,
307 &chain_key, NULL);
308
309 if (res == 0)
310 {
311 logprintf (LOG_VERBOSE, _("Unable to get cookie for %s\n"),
312 victim->domain);
313 }
314 if (!victim->next)
315 {
316 /* VICTIM was the only cookie in the chain. Destroy the
317 chain and deallocate the chain key. */
318 hash_table_remove (jar->chains, victim->domain);
319 xfree (chain_key);
320 }
321 else
322 hash_table_put (jar->chains, chain_key, victim->next);
323 }
324 delete_cookie (victim);
325 DEBUGP (("Discarded old cookie.\n"));
326 }
327 }
328
329 /* Functions for parsing the `Set-Cookie' header, and creating new
330 cookies from the wire. */
331
332 #define TOKEN_IS(token, string_literal) \
333 BOUNDED_EQUAL_NO_CASE (token.b, token.e, string_literal)
334
335 #define TOKEN_NON_EMPTY(token) (token.b != NULL && token.b != token.e)
336
337 /* Parse the contents of the `Set-Cookie' header. The header looks
338 like this:
339
340 name1=value1; name2=value2; ...
341
342 Trailing semicolon is optional; spaces are allowed between all
343 tokens. Additionally, values may be quoted.
344
345 A new cookie is returned upon success, NULL otherwise.
346
347 The first name-value pair will be used to set the cookie's
348 attribute name and value. Subsequent parameters will be checked
349 against field names such as `domain', `path', etc. Recognized
350 fields will be parsed and the corresponding members of COOKIE
351 filled. */
352
353 static struct cookie *
parse_set_cookie(const char * set_cookie,bool silent)354 parse_set_cookie (const char *set_cookie, bool silent)
355 {
356 const char *ptr = set_cookie;
357 struct cookie *cookie = cookie_new ();
358 param_token name, value;
359
360 if (!extract_param (&ptr, &name, &value, ';', NULL))
361 goto error;
362 if (!value.b)
363 goto error;
364
365 /* If the value is quoted, do not modify it. */
366 if (*(value.b - 1) == '"')
367 value.b--;
368 if (*value.e == '"')
369 value.e++;
370
371 cookie->attr = strdupdelim (name.b, name.e);
372 cookie->value = strdupdelim (value.b, value.e);
373
374 while (extract_param (&ptr, &name, &value, ';', NULL))
375 {
376 if (TOKEN_IS (name, "domain"))
377 {
378 if (!TOKEN_NON_EMPTY (value))
379 goto error;
380 xfree (cookie->domain);
381 /* Strictly speaking, we should set cookie->domain_exact if the
382 domain doesn't begin with a dot. But many sites set the
383 domain to "foo.com" and expect "subhost.foo.com" to get the
384 cookie, and it apparently works in browsers. */
385 if (*value.b == '.')
386 ++value.b;
387 cookie->domain = strdupdelim (value.b, value.e);
388 }
389 else if (TOKEN_IS (name, "path"))
390 {
391 if (!TOKEN_NON_EMPTY (value))
392 goto error;
393 xfree (cookie->path);
394 cookie->path = strdupdelim (value.b, value.e);
395 }
396 else if (TOKEN_IS (name, "expires"))
397 {
398 char value_copy[128];
399 size_t value_len = value.e - value.b;
400 time_t expires;
401
402 if (!TOKEN_NON_EMPTY (value) || value_len >= sizeof (value_copy))
403 goto error;
404
405 memcpy (value_copy, value.b, value_len);
406 value_copy[value_len] = 0;
407
408 /* Check if expiration spec is valid.
409 If not, assume default (cookie doesn't expire, but valid only for
410 this session.) */
411 expires = http_atotm (value_copy);
412 if (expires != (time_t) -1)
413 {
414 cookie->permanent = 1;
415 cookie->expiry_time = expires;
416 /* According to netscape's specification, expiry time in
417 the past means that discarding of a matching cookie
418 is requested. */
419 if (cookie->expiry_time < cookies_now)
420 cookie->discard_requested = 1;
421 }
422 }
423 else if (TOKEN_IS (name, "max-age"))
424 {
425 double maxage = -1;
426 char value_copy[32];
427 size_t value_len = value.e - value.b;
428
429 if (!TOKEN_NON_EMPTY (value) || value_len >= sizeof (value_copy))
430 goto error;
431
432 memcpy (value_copy, value.b, value_len);
433 value_copy[value_len] = 0;
434
435 sscanf (value_copy, "%lf", &maxage);
436 if (maxage == -1)
437 /* something went wrong. */
438 goto error;
439 cookie->permanent = 1;
440 cookie->expiry_time = cookies_now + (time_t) maxage;
441
442 /* According to rfc2109, a cookie with max-age of 0 means that
443 discarding of a matching cookie is requested. */
444 if (maxage == 0)
445 cookie->discard_requested = 1;
446 }
447 else if (TOKEN_IS (name, "secure"))
448 {
449 /* ignore value completely */
450 cookie->secure = 1;
451 }
452 /* else: Ignore unrecognized attribute. */
453 }
454 if (*ptr)
455 /* extract_param has encountered a syntax error */
456 goto error;
457
458 /* The cookie has been successfully constructed; return it. */
459 return cookie;
460
461 error:
462 if (!silent)
463 logprintf (LOG_NOTQUIET,
464 _("Syntax error in Set-Cookie: %s at position %d.\n"),
465 quotearg_style (escape_quoting_style, set_cookie),
466 (int) (ptr - set_cookie));
467 delete_cookie (cookie);
468 return NULL;
469 }
470
471 #undef TOKEN_IS
472 #undef TOKEN_NON_EMPTY
473
474 /* Sanity checks. These are important, otherwise it is possible for
475 mailcious attackers to destroy important cookie information and/or
476 violate your privacy. */
477
478
479 #define REQUIRE_DIGITS(p) do { \
480 if (!c_isdigit (*p)) \
481 return false; \
482 for (++p; c_isdigit (*p); p++) \
483 ; \
484 } while (0)
485
486 #define REQUIRE_DOT(p) do { \
487 if (*p++ != '.') \
488 return false; \
489 } while (0)
490
491 /* Check whether ADDR matches <digits>.<digits>.<digits>.<digits>.
492
493 We don't want to call network functions like inet_addr() because
494 all we need is a check, preferably one that is small, fast, and
495 well-defined. */
496
497 static bool
numeric_address_p(const char * addr)498 numeric_address_p (const char *addr)
499 {
500 const char *p = addr;
501
502 REQUIRE_DIGITS (p); /* A */
503 REQUIRE_DOT (p); /* . */
504 REQUIRE_DIGITS (p); /* B */
505 REQUIRE_DOT (p); /* . */
506 REQUIRE_DIGITS (p); /* C */
507 REQUIRE_DOT (p); /* . */
508 REQUIRE_DIGITS (p); /* D */
509
510 if (*p != '\0')
511 return false;
512 return true;
513 }
514
515 /* Check whether COOKIE_DOMAIN is an appropriate domain for HOST.
516 Originally I tried to make the check compliant with rfc2109, but
517 the sites deviated too often, so I had to fall back to "tail
518 matching", as defined by the original Netscape's cookie spec.
519
520 Wget now uses libpsl to check domain names against a public suffix
521 list to see if they are valid. However, since we don't provide a
522 psl on our own, if libpsl is compiled without a public suffix list,
523 fall back to using the original "tail matching" heuristic. Also if
524 libpsl is unable to convert the domain to lowercase, which means that
525 it doesn't have any runtime conversion support, we again fall back to
526 "tail matching" since libpsl states the results are unpredictable with
527 upper case strings.
528 */
529
530 #ifdef HAVE_LIBPSL
531 static psl_ctx_t *psl;
532 #endif
533
534 static bool
check_domain_match(const char * cookie_domain,const char * host)535 check_domain_match (const char *cookie_domain, const char *host)
536 {
537 #ifdef HAVE_LIBPSL
538 static int init_psl;
539 char *cookie_domain_lower = NULL;
540 char *host_lower = NULL;
541 int is_acceptable;
542
543 DEBUGP (("cdm: 1\n"));
544 if (!init_psl)
545 {
546 init_psl = 1;
547
548 #ifdef HAVE_PSL_LATEST
549 if ((psl = psl_latest (NULL)))
550 goto have_psl;
551
552 DEBUGP (("\nPSL: Failed to load any PSL data. "
553 "Falling back to insecure heuristics.\n"));
554 #else
555 if ((psl = psl_builtin ()) && !psl_builtin_outdated ())
556 goto have_psl;
557
558 DEBUGP (("\nPSL: built-in data outdated. "
559 "Trying to load data from %s.\n",
560 quote (psl_builtin_filename ())));
561
562 if ((psl = psl_load_file (psl_builtin_filename ())))
563 goto have_psl;
564
565 DEBUGP (("\nPSL: %s not found or not readable. "
566 "Falling back to built-in data.\n",
567 quote (psl_builtin_filename ())));
568
569 if (!(psl = psl_builtin ()))
570 {
571 DEBUGP (("\nPSL: libpsl not built with a public suffix list. "
572 "Falling back to insecure heuristics.\n"));
573 goto no_psl;
574 }
575 #endif
576 }
577 else if (!psl)
578 goto no_psl;
579
580 have_psl:
581 if (psl_str_to_utf8lower (cookie_domain, NULL, NULL, &cookie_domain_lower) == PSL_SUCCESS &&
582 psl_str_to_utf8lower (host, NULL, NULL, &host_lower) == PSL_SUCCESS)
583 {
584 is_acceptable = psl_is_cookie_domain_acceptable (psl, host_lower, cookie_domain_lower);
585 }
586 else
587 {
588 DEBUGP (("libpsl unable to parse domain name. "
589 "Falling back to simple heuristics.\n"));
590 goto no_psl;
591 }
592
593 xfree (cookie_domain_lower);
594 xfree (host_lower);
595
596 return is_acceptable == 1;
597
598 no_psl:
599 /* Cleanup the PSL pointers first */
600 xfree (cookie_domain_lower);
601 xfree (host_lower);
602 #endif
603
604 /* For efficiency make some elementary checks first */
605 DEBUGP (("cdm: 2\n"));
606
607 /* For the sake of efficiency, check for exact match first. */
608 if (0 == strcasecmp (cookie_domain, host))
609 return true;
610
611 DEBUGP (("cdm: 3\n"));
612
613 /* HOST must match the tail of cookie_domain. */
614 if (!match_tail (host, cookie_domain, true))
615 return false;
616
617 /* We know that COOKIE_DOMAIN is a subset of HOST; however, we must
618 make sure that somebody is not trying to set the cookie for a
619 subdomain shared by many entities. For example, "company.co.uk"
620 must not be allowed to set a cookie for ".co.uk". On the other
621 hand, "sso.redhat.de" should be able to set a cookie for
622 ".redhat.de".
623
624 The only marginally sane way to handle this I can think of is to
625 reject on the basis of the length of the second-level domain name
626 (but when the top-level domain is unknown), with the assumption
627 that those of three or less characters could be reserved. For
628 example:
629
630 .co.org -> works because the TLD is known
631 .co.uk -> doesn't work because "co" is only two chars long
632 .com.au -> doesn't work because "com" is only 3 chars long
633 .cnn.uk -> doesn't work because "cnn" is also only 3 chars long (ugh)
634 .cnn.de -> doesn't work for the same reason (ugh!!)
635 .abcd.de -> works because "abcd" is 4 chars long
636 .img.cnn.de -> works because it's not trying to set the 2nd level domain
637 .cnn.co.uk -> works for the same reason
638
639 That should prevent misuse, while allowing reasonable usage. If
640 someone knows of a better way to handle this, please let me
641 know. */
642 {
643 const char *p = cookie_domain;
644 int dccount = 1; /* number of domain components */
645 int ldcl = 0; /* last domain component length */
646 int nldcl = 0; /* next to last domain component length */
647 int out;
648 if (*p == '.')
649 /* Ignore leading period in this calculation. */
650 ++p;
651 DEBUGP (("cdm: 4\n"));
652 for (out = 0; !out; p++)
653 switch (*p)
654 {
655 case '\0':
656 out = 1;
657 break;
658 case '.':
659 if (ldcl == 0)
660 /* Empty domain component found -- the domain is invalid. */
661 return false;
662 if (*(p + 1) == '\0')
663 {
664 /* Tolerate trailing '.' by not treating the domain as
665 one ending with an empty domain component. */
666 out = 1;
667 break;
668 }
669 nldcl = ldcl;
670 ldcl = 0;
671 ++dccount;
672 break;
673 default:
674 ++ldcl;
675 }
676
677 DEBUGP (("cdm: 5\n"));
678
679 if (dccount < 2)
680 return false;
681
682 DEBUGP (("cdm: 6\n"));
683
684 if (dccount == 2)
685 {
686 size_t i;
687 int known_toplevel = false;
688 static const char *known_toplevel_domains[] = {
689 ".com", ".edu", ".net", ".org", ".gov", ".mil", ".int"
690 };
691 for (i = 0; i < countof (known_toplevel_domains); i++)
692 if (match_tail (cookie_domain, known_toplevel_domains[i], true))
693 {
694 known_toplevel = true;
695 break;
696 }
697 if (!known_toplevel && nldcl <= 3)
698 return false;
699 }
700 }
701
702 DEBUGP (("cdm: 7\n"));
703
704 /* Don't allow the host "foobar.com" to set a cookie for domain
705 "bar.com". */
706 if (*cookie_domain != '.')
707 {
708 int dlen = strlen (cookie_domain);
709 int hlen = strlen (host);
710 /* cookie host: hostname.foobar.com */
711 /* desired domain: bar.com */
712 /* '.' must be here in host-> ^ */
713 if (hlen > dlen && host[hlen - dlen - 1] != '.')
714 return false;
715 }
716
717 DEBUGP (("cdm: 8\n"));
718
719 return true;
720 }
721
722 static int path_matches (const char *, const char *);
723
724 /* Check whether PATH begins with COOKIE_PATH. */
725
726 static bool
check_path_match(const char * cookie_path,const char * path)727 check_path_match (const char *cookie_path, const char *path)
728 {
729 return path_matches (path, cookie_path) != 0;
730 }
731
732 /* Process the HTTP `Set-Cookie' header. This results in storing the
733 cookie or discarding a matching one, or ignoring it completely, all
734 depending on the contents. */
735
736 void
cookie_handle_set_cookie(struct cookie_jar * jar,const char * host,int port,const char * path,const char * set_cookie)737 cookie_handle_set_cookie (struct cookie_jar *jar,
738 const char *host, int port,
739 const char *path, const char *set_cookie)
740 {
741 struct cookie *cookie;
742 cookies_now = time (NULL);
743 char buf[1024], *tmp;
744 size_t pathlen = strlen(path);
745
746 /* Wget's paths don't begin with '/' (blame rfc1808), but cookie
747 usage assumes /-prefixed paths. Until the rest of Wget is fixed,
748 simply prepend slash to PATH. */
749 if (pathlen < sizeof (buf) - 1)
750 tmp = buf;
751 else
752 tmp = xmalloc (pathlen + 2);
753
754 *tmp = '/';
755 memcpy (tmp + 1, path, pathlen + 1);
756 path = tmp;
757
758 cookie = parse_set_cookie (set_cookie, false);
759 if (!cookie)
760 goto out;
761
762 /* Sanitize parts of cookie. */
763
764 if (!cookie->domain)
765 {
766 cookie->domain = xstrdup (host);
767 cookie->domain_exact = 1;
768 /* Set the port, but only if it's non-default. */
769 if (port != 80 && port != 443)
770 cookie->port = port;
771 }
772 else
773 {
774 if (!check_domain_match (cookie->domain, host))
775 {
776 logprintf (LOG_NOTQUIET,
777 _("Cookie coming from %s attempted to set domain to "),
778 quotearg_style (escape_quoting_style, host));
779 logprintf (LOG_NOTQUIET,
780 _("%s\n"),
781 quotearg_style (escape_quoting_style, cookie->domain));
782 cookie->discard_requested = true;
783 }
784 }
785
786 if (!cookie->path)
787 {
788 /* The cookie doesn't set path: set it to the URL path, sans the
789 file part ("/dir/file" truncated to "/dir/"). */
790 char *trailing_slash = strrchr (path, '/');
791 if (trailing_slash)
792 cookie->path = strdupdelim (path, trailing_slash + 1);
793 else
794 /* no slash in the string -- can this even happen? */
795 cookie->path = xstrdup (path);
796 }
797 else
798 {
799 /* The cookie sets its own path; verify that it is legal. */
800 if (!check_path_match (cookie->path, path))
801 {
802 DEBUGP (("Attempt to fake the path: %s, %s\n",
803 cookie->path, path));
804 goto out;
805 }
806 }
807
808 /* Now store the cookie, or discard an existing cookie, if
809 discarding was requested. */
810
811 if (cookie->discard_requested)
812 {
813 discard_matching_cookie (jar, cookie);
814 goto out;
815 }
816
817 store_cookie (jar, cookie);
818 if (tmp != buf)
819 xfree (tmp);
820 return;
821
822 out:
823 if (cookie)
824 delete_cookie (cookie);
825 if (tmp != buf)
826 xfree (tmp);
827 }
828
829 /* Support for sending out cookies in HTTP requests, based on
830 previously stored cookies. Entry point is
831 `build_cookies_request'. */
832
833 /* Return a count of how many times CHR occurs in STRING. */
834
835 static int
count_char(const char * string,char chr)836 count_char (const char *string, char chr)
837 {
838 const char *p;
839 int count = 0;
840 for (p = string; *p; p++)
841 if (*p == chr)
842 ++count;
843 return count;
844 }
845
846 /* Find the cookie chains whose domains match HOST and store them to
847 DEST.
848
849 A cookie chain is the head of a list of cookies that belong to a
850 host/domain. Given HOST "img.search.xemacs.org", this function
851 will return the chains for "img.search.xemacs.org",
852 "search.xemacs.org", and "xemacs.org" -- those of them that exist
853 (if any), that is.
854
855 DEST should be large enough to accept (in the worst case) as many
856 elements as there are domain components of HOST. */
857
858 static int
find_chains_of_host(struct cookie_jar * jar,const char * host,struct cookie * dest[])859 find_chains_of_host (struct cookie_jar *jar, const char *host,
860 struct cookie *dest[])
861 {
862 int dest_count = 0;
863 int passes, passcnt;
864
865 /* Bail out quickly if there are no cookies in the jar. */
866 if (!hash_table_count (jar->chains))
867 return 0;
868
869 if (numeric_address_p (host))
870 /* If host is an IP address, only check for the exact match. */
871 passes = 1;
872 else
873 /* Otherwise, check all the subdomains except the top-level (last)
874 one. As a domain with N components has N-1 dots, the number of
875 passes equals the number of dots. */
876 passes = count_char (host, '.');
877
878 passcnt = 0;
879
880 /* Find chains that match HOST, starting with exact match and
881 progressing to less specific domains. For instance, given HOST
882 fly.srk.fer.hr, first look for fly.srk.fer.hr's chain, then
883 srk.fer.hr's, then fer.hr's. */
884 while (1)
885 {
886 struct cookie *chain = hash_table_get (jar->chains, host);
887 if (chain)
888 dest[dest_count++] = chain;
889 if (++passcnt >= passes)
890 break;
891 host = strchr (host, '.') + 1;
892 }
893
894 return dest_count;
895 }
896
897 /* If FULL_PATH begins with PREFIX, return the length of PREFIX, zero
898 otherwise. */
899
900 static int
path_matches(const char * full_path,const char * prefix)901 path_matches (const char *full_path, const char *prefix)
902 {
903 int len = strlen (prefix);
904
905 if (0 != strncmp (full_path, prefix, len))
906 /* FULL_PATH doesn't begin with PREFIX. */
907 return 0;
908
909 /* Length of PREFIX determines the quality of the match. */
910 return len + 1;
911 }
912
913 /* Return true if COOKIE matches the provided parameters of the URL
914 being downloaded: HOST, PORT, PATH, and SECFLAG.
915
916 If PATH_GOODNESS is non-NULL, store the "path goodness" value
917 there. That value is a measure of how closely COOKIE matches PATH,
918 used for ordering cookies. */
919
920 static bool
cookie_matches_url(const struct cookie * cookie,const char * host,int port,const char * path,bool secflag,int * path_goodness)921 cookie_matches_url (const struct cookie *cookie,
922 const char *host, int port, const char *path,
923 bool secflag, int *path_goodness)
924 {
925 int pg;
926
927 if (cookie_expired_p (cookie))
928 /* Ignore stale cookies. Don't bother unchaining the cookie at
929 this point -- Wget is a relatively short-lived application, and
930 stale cookies will not be saved by `save_cookies'. On the
931 other hand, this function should be as efficient as
932 possible. */
933 return false;
934
935 if (cookie->secure && !secflag)
936 /* Don't transmit secure cookies over insecure connections. */
937 return false;
938 if (cookie->port != PORT_ANY && cookie->port != port)
939 return false;
940
941 /* If exact domain match is required, verify that cookie's domain is
942 equal to HOST. If not, assume success on the grounds of the
943 cookie's chain having been found by find_chains_of_host. */
944 if (cookie->domain_exact
945 && 0 != strcasecmp (host, cookie->domain))
946 return false;
947
948 pg = path_matches (path, cookie->path);
949 if (pg == 0)
950 return false;
951
952 if (path_goodness)
953 /* If the caller requested path_goodness, we return it. This is
954 an optimization, so that the caller doesn't need to call
955 path_matches() again. */
956 *path_goodness = pg;
957 return true;
958 }
959
960 /* A structure that points to a cookie, along with the additional
961 information about the cookie's "goodness". This allows us to sort
962 the cookies when returning them to the server, as required by the
963 spec. */
964
965 struct weighed_cookie {
966 struct cookie *cookie;
967 int domain_goodness;
968 int path_goodness;
969 };
970
971 /* Comparator used for uniquifying the list. */
972
973 static int
equality_comparator(const void * p1,const void * p2)974 equality_comparator (const void *p1, const void *p2)
975 {
976 struct weighed_cookie *wc1 = (struct weighed_cookie *)p1;
977 struct weighed_cookie *wc2 = (struct weighed_cookie *)p2;
978
979 int namecmp = strcmp (wc1->cookie->attr, wc2->cookie->attr);
980 int valuecmp = strcmp (wc1->cookie->value, wc2->cookie->value);
981
982 /* We only really care whether both name and value are equal. We
983 return them in this order only for consistency... */
984 return namecmp ? namecmp : valuecmp;
985 }
986
987 /* Eliminate duplicate cookies. "Duplicate cookies" are any two
988 cookies with the same attr name and value. Whenever a duplicate
989 pair is found, one of the cookies is removed. */
990
991 static int
eliminate_dups(struct weighed_cookie * outgoing,int count)992 eliminate_dups (struct weighed_cookie *outgoing, int count)
993 {
994 struct weighed_cookie *h; /* hare */
995 struct weighed_cookie *t; /* tortoise */
996 struct weighed_cookie *end = outgoing + count;
997
998 /* We deploy a simple uniquify algorithm: first sort the array
999 according to our sort criteria, then copy it to itself, comparing
1000 each cookie to its neighbor and ignoring the duplicates. */
1001
1002 qsort (outgoing, count, sizeof (struct weighed_cookie), equality_comparator);
1003
1004 /* "Hare" runs through all the entries in the array, followed by
1005 "tortoise". If a duplicate is found, the hare skips it.
1006 Non-duplicate entries are copied to the tortoise ptr. */
1007
1008 for (h = t = outgoing; h < end; h++)
1009 {
1010 if (h != end - 1)
1011 {
1012 struct cookie *c0 = h[0].cookie;
1013 struct cookie *c1 = h[1].cookie;
1014 if (!strcmp (c0->attr, c1->attr) && !strcmp (c0->value, c1->value))
1015 continue; /* ignore the duplicate */
1016 }
1017
1018 /* If the hare has advanced past the tortoise (because of
1019 previous dups), make sure the values get copied. Otherwise,
1020 no copying is necessary. */
1021 if (h != t)
1022 *t++ = *h;
1023 else
1024 t++;
1025 }
1026 return t - outgoing;
1027 }
1028
1029 /* Comparator used for sorting by quality. */
1030
1031 static int
goodness_comparator(const void * p1,const void * p2)1032 goodness_comparator (const void *p1, const void *p2)
1033 {
1034 struct weighed_cookie *wc1 = (struct weighed_cookie *)p1;
1035 struct weighed_cookie *wc2 = (struct weighed_cookie *)p2;
1036
1037 /* Subtractions take `wc2' as the first argument becauase we want a
1038 sort in *decreasing* order of goodness. */
1039 int dgdiff = wc2->domain_goodness - wc1->domain_goodness;
1040 int pgdiff = wc2->path_goodness - wc1->path_goodness;
1041
1042 /* Sort by domain goodness; if these are the same, sort by path
1043 goodness. (The sorting order isn't really specified; maybe it
1044 should be the other way around.) */
1045 return dgdiff ? dgdiff : pgdiff;
1046 }
1047
1048 /* Generate a `Cookie' header for a request that goes to HOST:PORT and
1049 requests PATH from the server. The resulting string is allocated
1050 with `malloc', and the caller is responsible for freeing it. If no
1051 cookies pertain to this request, i.e. no cookie header should be
1052 generated, NULL is returned. */
1053
1054 char *
cookie_header(struct cookie_jar * jar,const char * host,int port,const char * path,bool secflag)1055 cookie_header (struct cookie_jar *jar, const char *host,
1056 int port, const char *path, bool secflag)
1057 {
1058 struct cookie *chains[32];
1059 int chain_count;
1060
1061 struct cookie *cookie;
1062 struct weighed_cookie *outgoing;
1063 size_t count, i, ocnt;
1064 char *result = NULL;
1065 int result_size, pos;
1066 char pathbuf[1024];
1067
1068 /* First, find the cookie chains whose domains match HOST. */
1069
1070 /* Allocate room for find_chains_of_host to write to. The number of
1071 chains can at most equal the number of subdomains, hence
1072 1+<number of dots>. We ignore cookies with more than 32 labels. */
1073 chain_count = 1 + count_char (host, '.');
1074 if (chain_count > (int) countof (chains))
1075 return NULL;
1076 chain_count = find_chains_of_host (jar, host, chains);
1077
1078 /* No cookies for this host. */
1079 if (chain_count <= 0)
1080 return NULL;
1081
1082 /* Wget's paths don't begin with '/' (blame rfc1808), but cookie
1083 usage assumes /-prefixed paths. Until the rest of Wget is fixed,
1084 simply prepend slash to PATH. */
1085 {
1086 char *tmp;
1087 size_t pathlen = strlen(path);
1088
1089 if (pathlen < sizeof (pathbuf) - 1)
1090 tmp = pathbuf;
1091 else
1092 tmp = xmalloc (pathlen + 2);
1093
1094 *tmp = '/';
1095 memcpy (tmp + 1, path, pathlen + 1);
1096 path = tmp;
1097 }
1098
1099 cookies_now = time (NULL);
1100
1101 /* Now extract from the chains those cookies that match our host
1102 (for domain_exact cookies), port (for cookies with port other
1103 than PORT_ANY), etc. See matching_cookie for details. */
1104
1105 /* Count the number of matching cookies. */
1106 count = 0;
1107 for (i = 0; i < (unsigned) chain_count; i++)
1108 for (cookie = chains[i]; cookie; cookie = cookie->next)
1109 if (cookie_matches_url (cookie, host, port, path, secflag, NULL))
1110 ++count;
1111 if (!count)
1112 goto out; /* no cookies matched */
1113
1114 /* Allocate the array. */
1115 if (count > SIZE_MAX / sizeof (struct weighed_cookie))
1116 goto out; /* unable to process so many cookies */
1117 outgoing = xmalloc (count * sizeof (struct weighed_cookie));
1118
1119 /* Fill the array with all the matching cookies from the chains that
1120 match HOST. */
1121 ocnt = 0;
1122 for (i = 0; i < (unsigned) chain_count; i++)
1123 for (cookie = chains[i]; cookie; cookie = cookie->next)
1124 {
1125 int pg;
1126 if (!cookie_matches_url (cookie, host, port, path, secflag, &pg))
1127 continue;
1128 outgoing[ocnt].cookie = cookie;
1129 outgoing[ocnt].domain_goodness = strlen (cookie->domain);
1130 outgoing[ocnt].path_goodness = pg;
1131 ++ocnt;
1132 }
1133 assert (ocnt == count);
1134
1135 /* Eliminate duplicate cookies; that is, those whose name and value
1136 are the same. */
1137 count = eliminate_dups (outgoing, count);
1138
1139 /* Sort the array so that best-matching domains come first, and
1140 that, within one domain, best-matching paths come first. */
1141 qsort (outgoing, count, sizeof (struct weighed_cookie), goodness_comparator);
1142
1143 /* Count the space the name=value pairs will take. */
1144 result_size = 0;
1145 for (i = 0; i < count; i++)
1146 {
1147 struct cookie *c = outgoing[i].cookie;
1148 /* name=value */
1149 result_size += strlen (c->attr) + 1 + strlen (c->value);
1150 }
1151
1152 /* Allocate output buffer:
1153 name=value pairs -- result_size
1154 "; " separators -- (count - 1) * 2
1155 \0 terminator -- 1 */
1156 result_size = result_size + (count - 1) * 2 + 1;
1157 result = xmalloc (result_size);
1158 pos = 0;
1159 for (i = 0; i < count; i++)
1160 {
1161 struct cookie *c = outgoing[i].cookie;
1162 int namlen = strlen (c->attr);
1163 int vallen = strlen (c->value);
1164
1165 memcpy (result + pos, c->attr, namlen);
1166 pos += namlen;
1167 result[pos++] = '=';
1168 memcpy (result + pos, c->value, vallen);
1169 pos += vallen;
1170 if (i < count - 1)
1171 {
1172 result[pos++] = ';';
1173 result[pos++] = ' ';
1174 }
1175 }
1176 result[pos++] = '\0';
1177 xfree (outgoing);
1178 assert (pos == result_size);
1179
1180 out:
1181 if (path != pathbuf)
1182 xfree (path);
1183
1184 return result;
1185 }
1186
1187 /* Support for loading and saving cookies. The format used for
1188 loading and saving should be the format of the `cookies.txt' file
1189 used by Netscape and Mozilla, at least the Unix versions.
1190 (Apparently IE can export cookies in that format as well.) The
1191 format goes like this:
1192
1193 DOMAIN DOMAIN-FLAG PATH SECURE-FLAG TIMESTAMP ATTR-NAME ATTR-VALUE
1194
1195 DOMAIN -- cookie domain, optionally followed by :PORT
1196 DOMAIN-FLAG -- whether all hosts in the domain match
1197 PATH -- cookie path
1198 SECURE-FLAG -- whether cookie requires secure connection
1199 TIMESTAMP -- expiry timestamp, number of seconds since epoch
1200 ATTR-NAME -- name of the cookie attribute
1201 ATTR-VALUE -- value of the cookie attribute (empty if absent)
1202
1203 The fields are separated by TABs. All fields are mandatory, except
1204 for ATTR-VALUE. The `-FLAG' fields are boolean, their legal values
1205 being "TRUE" and "FALSE'. Empty lines, lines consisting of
1206 whitespace only, and comment lines (beginning with # optionally
1207 preceded by whitespace) are ignored.
1208
1209 Example line from cookies.txt (split in two lines for readability):
1210
1211 .google.com TRUE / FALSE 2147368447 \
1212 PREF ID=34bb47565bbcd47b:LD=en:NR=20:TM=985172580:LM=985739012
1213
1214 */
1215
1216 /* If the region [B, E) ends with :<digits>, parse the number, return
1217 it, and store new boundary (location of the `:') to DOMAIN_E_PTR.
1218 If port is not specified, return 0. */
1219
1220 static int
domain_port(const char * domain_b,const char * domain_e,const char ** domain_e_ptr)1221 domain_port (const char *domain_b, const char *domain_e,
1222 const char **domain_e_ptr)
1223 {
1224 int port = 0;
1225 const char *p;
1226 const char *colon = memchr (domain_b, ':', domain_e - domain_b);
1227 if (!colon)
1228 return 0;
1229 for (p = colon + 1; p < domain_e && c_isdigit (*p); p++)
1230 port = 10 * port + (*p - '0');
1231 if (p < domain_e)
1232 /* Garbage following port number. */
1233 return 0;
1234 *domain_e_ptr = colon;
1235 return port;
1236 }
1237
1238 #define GET_WORD(p, b, e) do { \
1239 b = p; \
1240 while (*p && *p != '\t') \
1241 ++p; \
1242 e = p; \
1243 if (b == e || !*p) \
1244 goto next; \
1245 ++p; \
1246 } while (0)
1247
1248 /* Load cookies from FILE. */
1249
1250 void
cookie_jar_load(struct cookie_jar * jar,const char * file)1251 cookie_jar_load (struct cookie_jar *jar, const char *file)
1252 {
1253 char *line = NULL;
1254 size_t bufsize = 0;
1255
1256 FILE *fp = fopen (file, "r");
1257 if (!fp)
1258 {
1259 logprintf (LOG_NOTQUIET, _("Cannot open cookies file %s: %s\n"),
1260 quote (file), strerror (errno));
1261 return;
1262 }
1263
1264 cookies_now = time (NULL);
1265
1266 while (getline (&line, &bufsize, fp) > 0)
1267 {
1268 struct cookie *cookie;
1269 char *p = line;
1270
1271 double expiry;
1272 int port;
1273
1274 char *domain_b = NULL, *domain_e = NULL;
1275 char *domflag_b = NULL, *domflag_e = NULL;
1276 char *path_b = NULL, *path_e = NULL;
1277 char *secure_b = NULL, *secure_e = NULL;
1278 char *expires_b = NULL, *expires_e = NULL;
1279 char *name_b = NULL, *name_e = NULL;
1280 char *value_b = NULL, *value_e = NULL;
1281
1282 /* Skip leading white-space. */
1283 while (*p && c_isspace (*p))
1284 ++p;
1285 /* Ignore empty lines. */
1286 if (!*p || *p == '#')
1287 continue;
1288
1289 GET_WORD (p, domain_b, domain_e);
1290 GET_WORD (p, domflag_b, domflag_e);
1291 GET_WORD (p, path_b, path_e);
1292 GET_WORD (p, secure_b, secure_e);
1293 GET_WORD (p, expires_b, expires_e);
1294 GET_WORD (p, name_b, name_e);
1295
1296 /* Don't use GET_WORD for value because it ends with newline,
1297 not TAB. */
1298 value_b = p;
1299 value_e = p + strlen (p);
1300 if (value_e > value_b && value_e[-1] == '\n')
1301 --value_e;
1302 if (value_e > value_b && value_e[-1] == '\r')
1303 --value_e;
1304 /* Empty values are legal (I think), so don't bother checking. */
1305
1306 cookie = cookie_new ();
1307
1308 cookie->attr = strdupdelim (name_b, name_e);
1309 cookie->value = strdupdelim (value_b, value_e);
1310 cookie->path = strdupdelim (path_b, path_e);
1311 cookie->secure = BOUNDED_EQUAL (secure_b, secure_e, "TRUE");
1312
1313 /* Curl source says, quoting Andre Garcia: "flag: A TRUE/FALSE
1314 value indicating if all machines within a given domain can
1315 access the variable. This value is set automatically by the
1316 browser, depending on the value set for the domain." */
1317 cookie->domain_exact = !BOUNDED_EQUAL (domflag_b, domflag_e, "TRUE");
1318
1319 /* DOMAIN needs special treatment because we might need to
1320 extract the port. */
1321 port = domain_port (domain_b, domain_e, (const char **)&domain_e);
1322 if (port)
1323 cookie->port = port;
1324
1325 if (*domain_b == '.')
1326 ++domain_b; /* remove leading dot internally */
1327 cookie->domain = strdupdelim (domain_b, domain_e);
1328
1329 /* safe default in case EXPIRES field is garbled. */
1330 expiry = (double)cookies_now - 1;
1331
1332 /* I don't like changing the line, but it's safe here. (line is
1333 malloced.) */
1334 *expires_e = '\0';
1335 sscanf (expires_b, "%lf", &expiry);
1336
1337 if (expiry == 0)
1338 {
1339 /* EXPIRY can be 0 for session cookies saved because the
1340 user specified `--keep-session-cookies' in the past.
1341 They remain session cookies, and will be saved only if
1342 the user has specified `keep-session-cookies' again. */
1343 }
1344 else
1345 {
1346 if (expiry < cookies_now)
1347 goto abort_cookie; /* ignore stale cookie. */
1348 cookie->expiry_time = (time_t) expiry;
1349 cookie->permanent = 1;
1350 }
1351
1352 store_cookie (jar, cookie);
1353
1354 next:
1355 continue;
1356
1357 abort_cookie:
1358 delete_cookie (cookie);
1359 }
1360
1361 xfree(line);
1362 fclose (fp);
1363 }
1364
1365 /* Save cookies, in format described above, to FILE. */
1366
1367 void
cookie_jar_save(struct cookie_jar * jar,const char * file)1368 cookie_jar_save (struct cookie_jar *jar, const char *file)
1369 {
1370 FILE *fp;
1371 hash_table_iterator iter;
1372
1373 DEBUGP (("Saving cookies to %s.\n", file));
1374
1375 cookies_now = time (NULL);
1376
1377 fp = fopen (file, "w");
1378 if (!fp)
1379 {
1380 logprintf (LOG_NOTQUIET, _("Cannot open cookies file %s: %s\n"),
1381 quote (file), strerror (errno));
1382 return;
1383 }
1384
1385 fputs ("# HTTP Cookie File\n", fp);
1386 fprintf (fp, "# Generated by Wget on %s.\n", datetime_str (cookies_now));
1387 fputs ("# Edit at your own risk.\n\n", fp);
1388
1389 for (hash_table_iterate (jar->chains, &iter);
1390 hash_table_iter_next (&iter);
1391 )
1392 {
1393 const char *domain = iter.key;
1394 struct cookie *cookie = iter.value;
1395 for (; cookie; cookie = cookie->next)
1396 {
1397 if (!cookie->permanent && !opt.keep_session_cookies)
1398 continue;
1399 if (cookie_expired_p (cookie))
1400 continue;
1401 if (!cookie->domain_exact)
1402 fputc ('.', fp);
1403 fputs (domain, fp);
1404 if (cookie->port != PORT_ANY)
1405 fprintf (fp, ":%d", cookie->port);
1406 fprintf (fp, "\t%s\t%s\t%s\t%.0f\t%s\t%s\n",
1407 cookie->domain_exact ? "FALSE" : "TRUE",
1408 cookie->path, cookie->secure ? "TRUE" : "FALSE",
1409 (double)cookie->expiry_time,
1410 cookie->attr, cookie->value);
1411 if (ferror (fp))
1412 goto out;
1413 }
1414 }
1415 out:
1416 if (ferror (fp))
1417 logprintf (LOG_NOTQUIET, _("Error writing to %s: %s\n"),
1418 quote (file), strerror (errno));
1419 if (fclose (fp) < 0)
1420 logprintf (LOG_NOTQUIET, _("Error closing %s: %s\n"),
1421 quote (file), strerror (errno));
1422
1423 DEBUGP (("Done saving cookies.\n"));
1424 }
1425
1426 /* Clean up cookie-related data. */
1427
1428 void
cookie_jar_delete(struct cookie_jar * jar)1429 cookie_jar_delete (struct cookie_jar *jar)
1430 {
1431 /* Iterate over chains (indexed by domain) and free them. */
1432 hash_table_iterator iter;
1433 for (hash_table_iterate (jar->chains, &iter); hash_table_iter_next (&iter); )
1434 {
1435 struct cookie *chain = iter.value;
1436 xfree (iter.key);
1437 /* Then all cookies in this chain. */
1438 while (chain)
1439 {
1440 struct cookie *next = chain->next;
1441 delete_cookie (chain);
1442 chain = next;
1443 }
1444 }
1445 hash_table_destroy (jar->chains);
1446 xfree (jar);
1447
1448 #ifdef HAVE_LIBPSL
1449 psl_free (psl);
1450 psl = NULL;
1451 #endif
1452 }
1453
1454 /* Test cases. Currently this is only tests parse_set_cookies. To
1455 use, recompile Wget with -DTEST_COOKIES and call test_cookies()
1456 from main. */
1457
1458 #ifdef TEST_COOKIES
1459 void
test_cookies(void)1460 test_cookies (void)
1461 {
1462 /* Tests expected to succeed: */
1463 static struct {
1464 const char *data;
1465 const char *results[10];
1466 } tests_succ[] = {
1467 { "arg=value", {"arg", "value", NULL} },
1468 { "arg1=value1;arg2=value2", {"arg1", "value1", "arg2", "value2", NULL} },
1469 { "arg1=value1; arg2=value2", {"arg1", "value1", "arg2", "value2", NULL} },
1470 { "arg1=value1; arg2=value2;", {"arg1", "value1", "arg2", "value2", NULL} },
1471 { "arg1=value1; arg2=value2; ", {"arg1", "value1", "arg2", "value2", NULL} },
1472 { "arg1=\"value1\"; arg2=\"\"", {"arg1", "value1", "arg2", "", NULL} },
1473 { "arg=", {"arg", "", NULL} },
1474 { "arg1=; arg2=", {"arg1", "", "arg2", "", NULL} },
1475 { "arg1 = ; arg2= ", {"arg1", "", "arg2", "", NULL} },
1476 };
1477
1478 /* Tests expected to fail: */
1479 static char *tests_fail[] = {
1480 ";",
1481 "arg=\"unterminated",
1482 "=empty-name",
1483 "arg1=;=another-empty-name",
1484 };
1485 int i;
1486
1487 for (i = 0; i < countof (tests_succ); i++)
1488 {
1489 int ind;
1490 const char *data = tests_succ[i].data;
1491 const char **expected = tests_succ[i].results;
1492 struct cookie *c;
1493
1494 c = parse_set_cookie (data, true);
1495 if (!c)
1496 {
1497 printf ("NULL cookie returned for valid data: %s\n", data);
1498 continue;
1499 }
1500
1501 /* Test whether extract_param handles these cases correctly. */
1502 {
1503 param_token name, value;
1504 const char *ptr = data;
1505 int j = 0;
1506 while (extract_param (&ptr, &name, &value, ';', NULL))
1507 {
1508 char *n = strdupdelim (name.b, name.e);
1509 char *v = strdupdelim (value.b, value.e);
1510 if (!expected[j])
1511 {
1512 printf ("Too many parameters for '%s'\n", data);
1513 break;
1514 }
1515 if (0 != strcmp (expected[j], n))
1516 printf ("Invalid name %d for '%s' (expected '%s', got '%s')\n",
1517 j / 2 + 1, data, expected[j], n);
1518 if (0 != strcmp (expected[j + 1], v))
1519 printf ("Invalid value %d for '%s' (expected '%s', got '%s')\n",
1520 j / 2 + 1, data, expected[j + 1], v);
1521 j += 2;
1522 xfree (n);
1523 xfree (v);
1524 }
1525 if (expected[j])
1526 printf ("Too few parameters for '%s'\n", data);
1527 }
1528 }
1529
1530 for (i = 0; i < countof (tests_fail); i++)
1531 {
1532 struct cookie *c;
1533 char *data = tests_fail[i];
1534 c = parse_set_cookie (data, true);
1535 if (c)
1536 printf ("Failed to report error on invalid data: %s\n", data);
1537 }
1538 }
1539 #endif /* TEST_COOKIES */
1540