1 /* Support for cookies.
2    Copyright (C) 2001-2011, 2015, 2018-2021 Free Software Foundation,
3    Inc.
4 
5 This file is part of GNU Wget.
6 
7 GNU Wget is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or (at
10 your option) any later version.
11 
12 GNU Wget is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15 General Public License for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with Wget.  If not, see <http://www.gnu.org/licenses/>.
19 
20 Additional permission under GNU GPL version 3 section 7
21 
22 If you modify this program, or any covered work, by linking or
23 combining it with the OpenSSL project's OpenSSL library (or a
24 modified version of that library), containing parts covered by the
25 terms of the OpenSSL or SSLeay licenses, the Free Software Foundation
26 grants you additional permission to convey the resulting work.
27 Corresponding Source for a non-source form of such a combination
28 shall include the source code for the parts of OpenSSL used as well
29 as that of the covered work.  */
30 
31 /* Written by Hrvoje Niksic.  Parts are loosely inspired by the
32    cookie patch submitted by Tomasz Wegrzanowski.
33 
34    This implements the client-side cookie support, as specified
35    (loosely) by Netscape's "preliminary specification", currently
36    available at:
37 
38        http://wp.netscape.com/newsref/std/cookie_spec.html
39 
40    rfc2109 is not supported because of its incompatibilities with the
41    above widely-used specification.  rfc2965 is entirely ignored,
42    since popular client software doesn't implement it, and even the
43    sites that do send Set-Cookie2 also emit Set-Cookie for
44    compatibility.  */
45 
46 #include "wget.h"
47 
48 #include <stdint.h>
49 #include <stdio.h>
50 #include <string.h>
51 #include <stdlib.h>
52 #include <assert.h>
53 #include <errno.h>
54 #include <time.h>
55 #ifdef HAVE_LIBPSL
56 # include <libpsl.h>
57 #endif
58 #include "utils.h"
59 #include "hash.h"
60 #include "cookies.h"
61 #include "http.h"               /* for http_atotm */
62 #include "c-strcase.h"
63 
64 
65 /* Declarations of `struct cookie' and the most basic functions. */
66 
67 /* Cookie jar serves as cookie storage and a means of retrieving
68    cookies efficiently.  All cookies with the same domain are stored
69    in a linked list called "chain".  A cookie chain can be reached by
70    looking up the domain in the cookie jar's chains_by_domain table.
71 
72    For example, to reach all the cookies under google.com, one must
73    execute hash_table_get(jar->chains_by_domain, "google.com").  Of
74    course, when sending a cookie to `www.google.com', one must search
75    for cookies that belong to either `www.google.com' or `google.com'
76    -- but the point is that the code doesn't need to go through *all*
77    the cookies.  */
78 
79 struct cookie_jar {
80   /* Cookie chains indexed by domain.  */
81   struct hash_table *chains;
82 
83   int cookie_count;             /* number of cookies in the jar. */
84 };
85 
86 /* Value set by entry point functions, so that the low-level
87    routines don't need to call time() all the time.  */
88 static time_t cookies_now;
89 
90 struct cookie_jar *
cookie_jar_new(void)91 cookie_jar_new (void)
92 {
93   struct cookie_jar *jar = xnew (struct cookie_jar);
94   jar->chains = make_nocase_string_hash_table (0);
95   jar->cookie_count = 0;
96   return jar;
97 }
98 
99 struct cookie {
100   char *domain;                 /* domain of the cookie */
101   int port;                     /* port number */
102   char *path;                   /* path prefix of the cookie */
103 
104   unsigned discard_requested :1;/* whether cookie was created to
105                                    request discarding another
106                                    cookie. */
107 
108   unsigned secure :1;           /* whether cookie should be
109                                    transmitted over non-https
110                                    connections. */
111   unsigned domain_exact :1;     /* whether DOMAIN must match as a
112                                    whole. */
113 
114   unsigned permanent :1;        /* whether the cookie should outlive
115                                    the session. */
116   time_t expiry_time;           /* time when the cookie expires, 0
117                                    means undetermined. */
118 
119   char *attr;                   /* cookie attribute name */
120   char *value;                  /* cookie attribute value */
121 
122   struct cookie *next;          /* used for chaining of cookies in the
123                                    same domain. */
124 };
125 
126 #define PORT_ANY (-1)
127 
128 /* Allocate and return a new, empty cookie structure. */
129 
130 static struct cookie *
cookie_new(void)131 cookie_new (void)
132 {
133   struct cookie *cookie = xnew0 (struct cookie);
134 
135   /* Both cookie->permanent and cookie->expiry_time are now 0.  This
136      means that the cookie doesn't expire, but is only valid for this
137      session (i.e. not written out to disk).  */
138 
139   cookie->port = PORT_ANY;
140   return cookie;
141 }
142 
143 /* Non-zero if the cookie has expired.  Assumes cookies_now has been
144    set by one of the entry point functions.  */
145 
146 static bool
cookie_expired_p(const struct cookie * c)147 cookie_expired_p (const struct cookie *c)
148 {
149   return c->expiry_time != 0 && c->expiry_time < cookies_now;
150 }
151 
152 /* Deallocate COOKIE and its components. */
153 
154 static void
delete_cookie(struct cookie * cookie)155 delete_cookie (struct cookie *cookie)
156 {
157   xfree (cookie->domain);
158   xfree (cookie->path);
159   xfree (cookie->attr);
160   xfree (cookie->value);
161   xfree (cookie);
162 }
163 
164 /* Functions for storing cookies.
165 
166    All cookies can be reached beginning with jar->chains.  The key in
167    that table is the domain name, and the value is a linked list of
168    all cookies from that domain.  Every new cookie is placed on the
169    head of the list.  */
170 
171 /* Find and return a cookie in JAR whose domain, path, and attribute
172    name correspond to COOKIE.  If found, PREVPTR will point to the
173    location of the cookie previous in chain, or NULL if the found
174    cookie is the head of a chain.
175 
176    If no matching cookie is found, return NULL. */
177 
178 static struct cookie *
find_matching_cookie(struct cookie_jar * jar,struct cookie * cookie,struct cookie ** prevptr)179 find_matching_cookie (struct cookie_jar *jar, struct cookie *cookie,
180                       struct cookie **prevptr)
181 {
182   struct cookie *chain, *prev;
183 
184   chain = hash_table_get (jar->chains, cookie->domain);
185   if (!chain)
186     goto nomatch;
187 
188   prev = NULL;
189   for (; chain; prev = chain, chain = chain->next)
190     if (0 == strcmp (cookie->path, chain->path)
191         && 0 == strcmp (cookie->attr, chain->attr)
192         && cookie->port == chain->port)
193       {
194         *prevptr = prev;
195         return chain;
196       }
197 
198  nomatch:
199   *prevptr = NULL;
200   return NULL;
201 }
202 
203 /* Store COOKIE to the jar.
204 
205    This is done by placing COOKIE at the head of its chain.  However,
206    if COOKIE matches a cookie already in memory, as determined by
207    find_matching_cookie, the old cookie is unlinked and destroyed.
208 
209    The key of each chain's hash table entry is allocated only the
210    first time; next hash_table_put's reuse the same key.  */
211 
212 static void
store_cookie(struct cookie_jar * jar,struct cookie * cookie)213 store_cookie (struct cookie_jar *jar, struct cookie *cookie)
214 {
215   struct cookie *chain_head;
216   char *chain_key;
217 
218   if (hash_table_get_pair (jar->chains, cookie->domain,
219                            &chain_key, &chain_head))
220     {
221       /* A chain of cookies in this domain already exists.  Check for
222          duplicates -- if an extant cookie exactly matches our domain,
223          port, path, and name, replace it.  */
224       struct cookie *prev;
225       struct cookie *victim = find_matching_cookie (jar, cookie, &prev);
226 
227       if (victim)
228         {
229           /* Remove VICTIM from the chain.  COOKIE will be placed at
230              the head. */
231           if (prev)
232             {
233               prev->next = victim->next;
234               cookie->next = chain_head;
235             }
236           else
237             {
238               /* prev is NULL; apparently VICTIM was at the head of
239                  the chain.  This place will be taken by COOKIE, so
240                  all we need to do is:  */
241               cookie->next = victim->next;
242             }
243           delete_cookie (victim);
244           --jar->cookie_count;
245           DEBUGP (("Deleted old cookie (to be replaced.)\n"));
246         }
247       else
248         cookie->next = chain_head;
249     }
250   else
251     {
252       /* We are now creating the chain.  Use a copy of cookie->domain
253          as the key for the life-time of the chain.  Using
254          cookie->domain would be unsafe because the life-time of the
255          chain may exceed the life-time of the cookie.  (Cookies may
256          be deleted from the chain by this very function.)  */
257       cookie->next = NULL;
258       chain_key = xstrdup (cookie->domain);
259     }
260 
261   hash_table_put (jar->chains, chain_key, cookie);
262   ++jar->cookie_count;
263 
264   IF_DEBUG
265     {
266       time_t exptime = cookie->expiry_time;
267       DEBUGP (("\nStored cookie %s %d%s %s <%s> <%s> [expiry %s] %s %s\n",
268                cookie->domain, cookie->port,
269                cookie->port == PORT_ANY ? " (ANY)" : "",
270                cookie->path,
271                cookie->permanent ? "permanent" : "session",
272                cookie->secure ? "secure" : "insecure",
273                cookie->expiry_time ? datetime_str (exptime) : "none",
274                cookie->attr, cookie->value));
275     }
276 }
277 
278 /* Discard a cookie matching COOKIE's domain, port, path, and
279    attribute name.  This gets called when we encounter a cookie whose
280    expiry date is in the past, or whose max-age is set to 0.  The
281    former corresponds to netscape cookie spec, while the latter is
282    specified by rfc2109.  */
283 
284 static void
discard_matching_cookie(struct cookie_jar * jar,struct cookie * cookie)285 discard_matching_cookie (struct cookie_jar *jar, struct cookie *cookie)
286 {
287   struct cookie *prev, *victim;
288 
289   if (!hash_table_count (jar->chains))
290     /* No elements == nothing to discard. */
291     return;
292 
293   victim = find_matching_cookie (jar, cookie, &prev);
294   if (victim)
295     {
296       if (prev)
297         /* Simply unchain the victim. */
298         prev->next = victim->next;
299       else
300         {
301           /* VICTIM was head of its chain.  We need to place a new
302              cookie at the head.  */
303           char *chain_key = NULL;
304           int res;
305 
306           res = hash_table_get_pair (jar->chains, victim->domain,
307                                      &chain_key, NULL);
308 
309           if (res == 0)
310             {
311               logprintf (LOG_VERBOSE, _("Unable to get cookie for %s\n"),
312                          victim->domain);
313             }
314           if (!victim->next)
315             {
316               /* VICTIM was the only cookie in the chain.  Destroy the
317                  chain and deallocate the chain key.  */
318               hash_table_remove (jar->chains, victim->domain);
319               xfree (chain_key);
320             }
321           else
322             hash_table_put (jar->chains, chain_key, victim->next);
323         }
324       delete_cookie (victim);
325       DEBUGP (("Discarded old cookie.\n"));
326     }
327 }
328 
329 /* Functions for parsing the `Set-Cookie' header, and creating new
330    cookies from the wire.  */
331 
332 #define TOKEN_IS(token, string_literal)                         \
333   BOUNDED_EQUAL_NO_CASE (token.b, token.e, string_literal)
334 
335 #define TOKEN_NON_EMPTY(token) (token.b != NULL && token.b != token.e)
336 
337 /* Parse the contents of the `Set-Cookie' header.  The header looks
338    like this:
339 
340    name1=value1; name2=value2; ...
341 
342    Trailing semicolon is optional; spaces are allowed between all
343    tokens.  Additionally, values may be quoted.
344 
345    A new cookie is returned upon success, NULL otherwise.
346 
347    The first name-value pair will be used to set the cookie's
348    attribute name and value.  Subsequent parameters will be checked
349    against field names such as `domain', `path', etc.  Recognized
350    fields will be parsed and the corresponding members of COOKIE
351    filled.  */
352 
353 static struct cookie *
parse_set_cookie(const char * set_cookie,bool silent)354 parse_set_cookie (const char *set_cookie, bool silent)
355 {
356   const char *ptr = set_cookie;
357   struct cookie *cookie = cookie_new ();
358   param_token name, value;
359 
360   if (!extract_param (&ptr, &name, &value, ';', NULL))
361     goto error;
362   if (!value.b)
363     goto error;
364 
365   /* If the value is quoted, do not modify it.  */
366   if (*(value.b - 1) == '"')
367     value.b--;
368   if (*value.e == '"')
369     value.e++;
370 
371   cookie->attr = strdupdelim (name.b, name.e);
372   cookie->value = strdupdelim (value.b, value.e);
373 
374   while (extract_param (&ptr, &name, &value, ';', NULL))
375     {
376       if (TOKEN_IS (name, "domain"))
377         {
378           if (!TOKEN_NON_EMPTY (value))
379             goto error;
380           xfree (cookie->domain);
381           /* Strictly speaking, we should set cookie->domain_exact if the
382              domain doesn't begin with a dot.  But many sites set the
383              domain to "foo.com" and expect "subhost.foo.com" to get the
384              cookie, and it apparently works in browsers.  */
385           if (*value.b == '.')
386             ++value.b;
387           cookie->domain = strdupdelim (value.b, value.e);
388         }
389       else if (TOKEN_IS (name, "path"))
390         {
391           if (!TOKEN_NON_EMPTY (value))
392             goto error;
393           xfree (cookie->path);
394           cookie->path = strdupdelim (value.b, value.e);
395         }
396       else if (TOKEN_IS (name, "expires"))
397         {
398           char value_copy[128];
399           size_t value_len = value.e - value.b;
400           time_t expires;
401 
402           if (!TOKEN_NON_EMPTY (value) || value_len >= sizeof (value_copy))
403             goto error;
404 
405           memcpy (value_copy, value.b, value_len);
406           value_copy[value_len] = 0;
407 
408           /* Check if expiration spec is valid.
409              If not, assume default (cookie doesn't expire, but valid only for
410              this session.) */
411           expires = http_atotm (value_copy);
412           if (expires != (time_t) -1)
413             {
414               cookie->permanent = 1;
415               cookie->expiry_time = expires;
416               /* According to netscape's specification, expiry time in
417                  the past means that discarding of a matching cookie
418                  is requested.  */
419               if (cookie->expiry_time < cookies_now)
420                 cookie->discard_requested = 1;
421             }
422         }
423       else if (TOKEN_IS (name, "max-age"))
424         {
425           double maxage = -1;
426           char value_copy[32];
427           size_t value_len = value.e - value.b;
428 
429           if (!TOKEN_NON_EMPTY (value) || value_len >= sizeof (value_copy))
430             goto error;
431 
432           memcpy (value_copy, value.b, value_len);
433           value_copy[value_len] = 0;
434 
435           sscanf (value_copy, "%lf", &maxage);
436           if (maxage == -1)
437             /* something went wrong. */
438             goto error;
439           cookie->permanent = 1;
440           cookie->expiry_time = cookies_now + (time_t) maxage;
441 
442           /* According to rfc2109, a cookie with max-age of 0 means that
443              discarding of a matching cookie is requested.  */
444           if (maxage == 0)
445             cookie->discard_requested = 1;
446         }
447       else if (TOKEN_IS (name, "secure"))
448         {
449           /* ignore value completely */
450           cookie->secure = 1;
451         }
452       /* else: Ignore unrecognized attribute. */
453     }
454   if (*ptr)
455     /* extract_param has encountered a syntax error */
456     goto error;
457 
458   /* The cookie has been successfully constructed; return it. */
459   return cookie;
460 
461  error:
462   if (!silent)
463     logprintf (LOG_NOTQUIET,
464                _("Syntax error in Set-Cookie: %s at position %d.\n"),
465                quotearg_style (escape_quoting_style, set_cookie),
466                (int) (ptr - set_cookie));
467   delete_cookie (cookie);
468   return NULL;
469 }
470 
471 #undef TOKEN_IS
472 #undef TOKEN_NON_EMPTY
473 
474 /* Sanity checks.  These are important, otherwise it is possible for
475    mailcious attackers to destroy important cookie information and/or
476    violate your privacy.  */
477 
478 
479 #define REQUIRE_DIGITS(p) do {                  \
480   if (!c_isdigit (*p))                          \
481     return false;                               \
482   for (++p; c_isdigit (*p); p++)                \
483     ;                                           \
484 } while (0)
485 
486 #define REQUIRE_DOT(p) do {                     \
487   if (*p++ != '.')                              \
488     return false;                               \
489 } while (0)
490 
491 /* Check whether ADDR matches <digits>.<digits>.<digits>.<digits>.
492 
493    We don't want to call network functions like inet_addr() because
494    all we need is a check, preferably one that is small, fast, and
495    well-defined.  */
496 
497 static bool
numeric_address_p(const char * addr)498 numeric_address_p (const char *addr)
499 {
500   const char *p = addr;
501 
502   REQUIRE_DIGITS (p);           /* A */
503   REQUIRE_DOT (p);              /* . */
504   REQUIRE_DIGITS (p);           /* B */
505   REQUIRE_DOT (p);              /* . */
506   REQUIRE_DIGITS (p);           /* C */
507   REQUIRE_DOT (p);              /* . */
508   REQUIRE_DIGITS (p);           /* D */
509 
510   if (*p != '\0')
511     return false;
512   return true;
513 }
514 
515 /* Check whether COOKIE_DOMAIN is an appropriate domain for HOST.
516    Originally I tried to make the check compliant with rfc2109, but
517    the sites deviated too often, so I had to fall back to "tail
518    matching", as defined by the original Netscape's cookie spec.
519 
520    Wget now uses libpsl to check domain names against a public suffix
521    list to see if they are valid. However, since we don't provide a
522    psl on our own, if libpsl is compiled without a public suffix list,
523    fall back to using the original "tail matching" heuristic. Also if
524    libpsl is unable to convert the domain to lowercase, which means that
525    it doesn't have any runtime conversion support, we again fall back to
526    "tail matching" since libpsl states the results are unpredictable with
527    upper case strings.
528    */
529 
530 #ifdef HAVE_LIBPSL
531 static psl_ctx_t *psl;
532 #endif
533 
534 static bool
check_domain_match(const char * cookie_domain,const char * host)535 check_domain_match (const char *cookie_domain, const char *host)
536 {
537 #ifdef HAVE_LIBPSL
538   static int init_psl;
539   char *cookie_domain_lower = NULL;
540   char *host_lower = NULL;
541   int is_acceptable;
542 
543   DEBUGP (("cdm: 1\n"));
544   if (!init_psl)
545     {
546       init_psl = 1;
547 
548 #ifdef HAVE_PSL_LATEST
549       if ((psl = psl_latest (NULL)))
550         goto have_psl;
551 
552       DEBUGP (("\nPSL: Failed to load any PSL data. "
553                "Falling back to insecure heuristics.\n"));
554 #else
555       if ((psl = psl_builtin ()) && !psl_builtin_outdated ())
556         goto have_psl;
557 
558       DEBUGP (("\nPSL: built-in data outdated. "
559                "Trying to load data from %s.\n",
560               quote (psl_builtin_filename ())));
561 
562       if ((psl = psl_load_file (psl_builtin_filename ())))
563         goto have_psl;
564 
565       DEBUGP (("\nPSL: %s not found or not readable. "
566                "Falling back to built-in data.\n",
567               quote (psl_builtin_filename ())));
568 
569       if (!(psl = psl_builtin ()))
570         {
571           DEBUGP (("\nPSL: libpsl not built with a public suffix list. "
572                    "Falling back to insecure heuristics.\n"));
573           goto no_psl;
574         }
575 #endif
576     }
577   else if (!psl)
578     goto no_psl;
579 
580 have_psl:
581   if (psl_str_to_utf8lower (cookie_domain, NULL, NULL, &cookie_domain_lower) == PSL_SUCCESS &&
582       psl_str_to_utf8lower (host, NULL, NULL, &host_lower) == PSL_SUCCESS)
583     {
584       is_acceptable = psl_is_cookie_domain_acceptable (psl, host_lower, cookie_domain_lower);
585     }
586   else
587     {
588         DEBUGP (("libpsl unable to parse domain name. "
589                  "Falling back to simple heuristics.\n"));
590         goto no_psl;
591     }
592 
593   xfree (cookie_domain_lower);
594   xfree (host_lower);
595 
596   return is_acceptable == 1;
597 
598 no_psl:
599   /* Cleanup the PSL pointers first */
600   xfree (cookie_domain_lower);
601   xfree (host_lower);
602 #endif
603 
604   /* For efficiency make some elementary checks first */
605   DEBUGP (("cdm: 2\n"));
606 
607   /* For the sake of efficiency, check for exact match first. */
608   if (0 == strcasecmp (cookie_domain, host))
609     return true;
610 
611   DEBUGP (("cdm: 3\n"));
612 
613   /* HOST must match the tail of cookie_domain. */
614   if (!match_tail (host, cookie_domain, true))
615     return false;
616 
617   /* We know that COOKIE_DOMAIN is a subset of HOST; however, we must
618      make sure that somebody is not trying to set the cookie for a
619      subdomain shared by many entities.  For example, "company.co.uk"
620      must not be allowed to set a cookie for ".co.uk".  On the other
621      hand, "sso.redhat.de" should be able to set a cookie for
622      ".redhat.de".
623 
624      The only marginally sane way to handle this I can think of is to
625      reject on the basis of the length of the second-level domain name
626      (but when the top-level domain is unknown), with the assumption
627      that those of three or less characters could be reserved.  For
628      example:
629 
630           .co.org -> works because the TLD is known
631            .co.uk -> doesn't work because "co" is only two chars long
632           .com.au -> doesn't work because "com" is only 3 chars long
633           .cnn.uk -> doesn't work because "cnn" is also only 3 chars long (ugh)
634           .cnn.de -> doesn't work for the same reason (ugh!!)
635          .abcd.de -> works because "abcd" is 4 chars long
636       .img.cnn.de -> works because it's not trying to set the 2nd level domain
637        .cnn.co.uk -> works for the same reason
638 
639     That should prevent misuse, while allowing reasonable usage.  If
640     someone knows of a better way to handle this, please let me
641     know.  */
642   {
643     const char *p = cookie_domain;
644     int dccount = 1;            /* number of domain components */
645     int ldcl  = 0;              /* last domain component length */
646     int nldcl = 0;              /* next to last domain component length */
647     int out;
648     if (*p == '.')
649       /* Ignore leading period in this calculation. */
650       ++p;
651     DEBUGP (("cdm: 4\n"));
652     for (out = 0; !out; p++)
653       switch (*p)
654         {
655         case '\0':
656           out = 1;
657           break;
658         case '.':
659           if (ldcl == 0)
660             /* Empty domain component found -- the domain is invalid. */
661             return false;
662           if (*(p + 1) == '\0')
663             {
664               /* Tolerate trailing '.' by not treating the domain as
665                  one ending with an empty domain component.  */
666               out = 1;
667               break;
668             }
669           nldcl = ldcl;
670           ldcl  = 0;
671           ++dccount;
672           break;
673         default:
674           ++ldcl;
675         }
676 
677     DEBUGP (("cdm: 5\n"));
678 
679     if (dccount < 2)
680       return false;
681 
682     DEBUGP (("cdm: 6\n"));
683 
684     if (dccount == 2)
685       {
686         size_t i;
687         int known_toplevel = false;
688         static const char *known_toplevel_domains[] = {
689           ".com", ".edu", ".net", ".org", ".gov", ".mil", ".int"
690         };
691         for (i = 0; i < countof (known_toplevel_domains); i++)
692           if (match_tail (cookie_domain, known_toplevel_domains[i], true))
693             {
694               known_toplevel = true;
695               break;
696             }
697         if (!known_toplevel && nldcl <= 3)
698           return false;
699       }
700   }
701 
702   DEBUGP (("cdm: 7\n"));
703 
704   /* Don't allow the host "foobar.com" to set a cookie for domain
705      "bar.com".  */
706   if (*cookie_domain != '.')
707     {
708       int dlen = strlen (cookie_domain);
709       int hlen = strlen (host);
710       /* cookie host:    hostname.foobar.com */
711       /* desired domain:             bar.com */
712       /* '.' must be here in host-> ^        */
713       if (hlen > dlen && host[hlen - dlen - 1] != '.')
714         return false;
715     }
716 
717   DEBUGP (("cdm: 8\n"));
718 
719   return true;
720 }
721 
722 static int path_matches (const char *, const char *);
723 
724 /* Check whether PATH begins with COOKIE_PATH. */
725 
726 static bool
check_path_match(const char * cookie_path,const char * path)727 check_path_match (const char *cookie_path, const char *path)
728 {
729   return path_matches (path, cookie_path) != 0;
730 }
731 
732 /* Process the HTTP `Set-Cookie' header.  This results in storing the
733    cookie or discarding a matching one, or ignoring it completely, all
734    depending on the contents.  */
735 
736 void
cookie_handle_set_cookie(struct cookie_jar * jar,const char * host,int port,const char * path,const char * set_cookie)737 cookie_handle_set_cookie (struct cookie_jar *jar,
738                           const char *host, int port,
739                           const char *path, const char *set_cookie)
740 {
741   struct cookie *cookie;
742   cookies_now = time (NULL);
743   char buf[1024], *tmp;
744   size_t pathlen = strlen(path);
745 
746   /* Wget's paths don't begin with '/' (blame rfc1808), but cookie
747      usage assumes /-prefixed paths.  Until the rest of Wget is fixed,
748      simply prepend slash to PATH.  */
749   if (pathlen < sizeof (buf) - 1)
750     tmp = buf;
751   else
752     tmp = xmalloc (pathlen + 2);
753 
754   *tmp = '/';
755   memcpy (tmp + 1, path, pathlen + 1);
756   path = tmp;
757 
758   cookie = parse_set_cookie (set_cookie, false);
759   if (!cookie)
760     goto out;
761 
762   /* Sanitize parts of cookie. */
763 
764   if (!cookie->domain)
765     {
766       cookie->domain = xstrdup (host);
767       cookie->domain_exact = 1;
768       /* Set the port, but only if it's non-default. */
769       if (port != 80 && port != 443)
770         cookie->port = port;
771     }
772   else
773     {
774       if (!check_domain_match (cookie->domain, host))
775         {
776           logprintf (LOG_NOTQUIET,
777                      _("Cookie coming from %s attempted to set domain to "),
778                      quotearg_style (escape_quoting_style, host));
779           logprintf (LOG_NOTQUIET,
780                      _("%s\n"),
781                      quotearg_style (escape_quoting_style, cookie->domain));
782           cookie->discard_requested = true;
783         }
784     }
785 
786   if (!cookie->path)
787     {
788       /* The cookie doesn't set path: set it to the URL path, sans the
789          file part ("/dir/file" truncated to "/dir/").  */
790       char *trailing_slash = strrchr (path, '/');
791       if (trailing_slash)
792         cookie->path = strdupdelim (path, trailing_slash + 1);
793       else
794         /* no slash in the string -- can this even happen? */
795         cookie->path = xstrdup (path);
796     }
797   else
798     {
799       /* The cookie sets its own path; verify that it is legal. */
800       if (!check_path_match (cookie->path, path))
801         {
802           DEBUGP (("Attempt to fake the path: %s, %s\n",
803                    cookie->path, path));
804           goto out;
805         }
806     }
807 
808   /* Now store the cookie, or discard an existing cookie, if
809      discarding was requested.  */
810 
811   if (cookie->discard_requested)
812     {
813       discard_matching_cookie (jar, cookie);
814       goto out;
815     }
816 
817   store_cookie (jar, cookie);
818   if (tmp != buf)
819     xfree (tmp);
820   return;
821 
822  out:
823   if (cookie)
824     delete_cookie (cookie);
825   if (tmp != buf)
826     xfree (tmp);
827 }
828 
829 /* Support for sending out cookies in HTTP requests, based on
830    previously stored cookies.  Entry point is
831    `build_cookies_request'.  */
832 
833 /* Return a count of how many times CHR occurs in STRING. */
834 
835 static int
count_char(const char * string,char chr)836 count_char (const char *string, char chr)
837 {
838   const char *p;
839   int count = 0;
840   for (p = string; *p; p++)
841     if (*p == chr)
842       ++count;
843   return count;
844 }
845 
846 /* Find the cookie chains whose domains match HOST and store them to
847    DEST.
848 
849    A cookie chain is the head of a list of cookies that belong to a
850    host/domain.  Given HOST "img.search.xemacs.org", this function
851    will return the chains for "img.search.xemacs.org",
852    "search.xemacs.org", and "xemacs.org" -- those of them that exist
853    (if any), that is.
854 
855    DEST should be large enough to accept (in the worst case) as many
856    elements as there are domain components of HOST.  */
857 
858 static int
find_chains_of_host(struct cookie_jar * jar,const char * host,struct cookie * dest[])859 find_chains_of_host (struct cookie_jar *jar, const char *host,
860                      struct cookie *dest[])
861 {
862   int dest_count = 0;
863   int passes, passcnt;
864 
865   /* Bail out quickly if there are no cookies in the jar.  */
866   if (!hash_table_count (jar->chains))
867     return 0;
868 
869   if (numeric_address_p (host))
870     /* If host is an IP address, only check for the exact match. */
871     passes = 1;
872   else
873     /* Otherwise, check all the subdomains except the top-level (last)
874        one.  As a domain with N components has N-1 dots, the number of
875        passes equals the number of dots.  */
876     passes = count_char (host, '.');
877 
878   passcnt = 0;
879 
880   /* Find chains that match HOST, starting with exact match and
881      progressing to less specific domains.  For instance, given HOST
882      fly.srk.fer.hr, first look for fly.srk.fer.hr's chain, then
883      srk.fer.hr's, then fer.hr's.  */
884   while (1)
885     {
886       struct cookie *chain = hash_table_get (jar->chains, host);
887       if (chain)
888         dest[dest_count++] = chain;
889       if (++passcnt >= passes)
890         break;
891       host = strchr (host, '.') + 1;
892     }
893 
894   return dest_count;
895 }
896 
897 /* If FULL_PATH begins with PREFIX, return the length of PREFIX, zero
898    otherwise.  */
899 
900 static int
path_matches(const char * full_path,const char * prefix)901 path_matches (const char *full_path, const char *prefix)
902 {
903   int len = strlen (prefix);
904 
905   if (0 != strncmp (full_path, prefix, len))
906     /* FULL_PATH doesn't begin with PREFIX. */
907     return 0;
908 
909   /* Length of PREFIX determines the quality of the match. */
910   return len + 1;
911 }
912 
913 /* Return true if COOKIE matches the provided parameters of the URL
914    being downloaded: HOST, PORT, PATH, and SECFLAG.
915 
916    If PATH_GOODNESS is non-NULL, store the "path goodness" value
917    there.  That value is a measure of how closely COOKIE matches PATH,
918    used for ordering cookies.  */
919 
920 static bool
cookie_matches_url(const struct cookie * cookie,const char * host,int port,const char * path,bool secflag,int * path_goodness)921 cookie_matches_url (const struct cookie *cookie,
922                     const char *host, int port, const char *path,
923                     bool secflag, int *path_goodness)
924 {
925   int pg;
926 
927   if (cookie_expired_p (cookie))
928     /* Ignore stale cookies.  Don't bother unchaining the cookie at
929        this point -- Wget is a relatively short-lived application, and
930        stale cookies will not be saved by `save_cookies'.  On the
931        other hand, this function should be as efficient as
932        possible.  */
933     return false;
934 
935   if (cookie->secure && !secflag)
936     /* Don't transmit secure cookies over insecure connections.  */
937     return false;
938   if (cookie->port != PORT_ANY && cookie->port != port)
939     return false;
940 
941   /* If exact domain match is required, verify that cookie's domain is
942      equal to HOST.  If not, assume success on the grounds of the
943      cookie's chain having been found by find_chains_of_host.  */
944   if (cookie->domain_exact
945       && 0 != strcasecmp (host, cookie->domain))
946     return false;
947 
948   pg = path_matches (path, cookie->path);
949   if (pg == 0)
950     return false;
951 
952   if (path_goodness)
953     /* If the caller requested path_goodness, we return it.  This is
954        an optimization, so that the caller doesn't need to call
955        path_matches() again.  */
956     *path_goodness = pg;
957   return true;
958 }
959 
960 /* A structure that points to a cookie, along with the additional
961    information about the cookie's "goodness".  This allows us to sort
962    the cookies when returning them to the server, as required by the
963    spec.  */
964 
965 struct weighed_cookie {
966   struct cookie *cookie;
967   int domain_goodness;
968   int path_goodness;
969 };
970 
971 /* Comparator used for uniquifying the list. */
972 
973 static int
equality_comparator(const void * p1,const void * p2)974 equality_comparator (const void *p1, const void *p2)
975 {
976   struct weighed_cookie *wc1 = (struct weighed_cookie *)p1;
977   struct weighed_cookie *wc2 = (struct weighed_cookie *)p2;
978 
979   int namecmp  = strcmp (wc1->cookie->attr, wc2->cookie->attr);
980   int valuecmp = strcmp (wc1->cookie->value, wc2->cookie->value);
981 
982   /* We only really care whether both name and value are equal.  We
983      return them in this order only for consistency...  */
984   return namecmp ? namecmp : valuecmp;
985 }
986 
987 /* Eliminate duplicate cookies.  "Duplicate cookies" are any two
988    cookies with the same attr name and value.  Whenever a duplicate
989    pair is found, one of the cookies is removed.  */
990 
991 static int
eliminate_dups(struct weighed_cookie * outgoing,int count)992 eliminate_dups (struct weighed_cookie *outgoing, int count)
993 {
994   struct weighed_cookie *h;     /* hare */
995   struct weighed_cookie *t;     /* tortoise */
996   struct weighed_cookie *end = outgoing + count;
997 
998   /* We deploy a simple uniquify algorithm: first sort the array
999      according to our sort criteria, then copy it to itself, comparing
1000      each cookie to its neighbor and ignoring the duplicates.  */
1001 
1002   qsort (outgoing, count, sizeof (struct weighed_cookie), equality_comparator);
1003 
1004   /* "Hare" runs through all the entries in the array, followed by
1005      "tortoise".  If a duplicate is found, the hare skips it.
1006      Non-duplicate entries are copied to the tortoise ptr.  */
1007 
1008   for (h = t = outgoing; h < end; h++)
1009     {
1010       if (h != end - 1)
1011         {
1012           struct cookie *c0 = h[0].cookie;
1013           struct cookie *c1 = h[1].cookie;
1014           if (!strcmp (c0->attr, c1->attr) && !strcmp (c0->value, c1->value))
1015             continue;           /* ignore the duplicate */
1016         }
1017 
1018       /* If the hare has advanced past the tortoise (because of
1019          previous dups), make sure the values get copied.  Otherwise,
1020          no copying is necessary.  */
1021       if (h != t)
1022         *t++ = *h;
1023       else
1024         t++;
1025     }
1026   return t - outgoing;
1027 }
1028 
1029 /* Comparator used for sorting by quality. */
1030 
1031 static int
goodness_comparator(const void * p1,const void * p2)1032 goodness_comparator (const void *p1, const void *p2)
1033 {
1034   struct weighed_cookie *wc1 = (struct weighed_cookie *)p1;
1035   struct weighed_cookie *wc2 = (struct weighed_cookie *)p2;
1036 
1037   /* Subtractions take `wc2' as the first argument becauase we want a
1038      sort in *decreasing* order of goodness.  */
1039   int dgdiff = wc2->domain_goodness - wc1->domain_goodness;
1040   int pgdiff = wc2->path_goodness - wc1->path_goodness;
1041 
1042   /* Sort by domain goodness; if these are the same, sort by path
1043      goodness.  (The sorting order isn't really specified; maybe it
1044      should be the other way around.)  */
1045   return dgdiff ? dgdiff : pgdiff;
1046 }
1047 
1048 /* Generate a `Cookie' header for a request that goes to HOST:PORT and
1049    requests PATH from the server.  The resulting string is allocated
1050    with `malloc', and the caller is responsible for freeing it.  If no
1051    cookies pertain to this request, i.e. no cookie header should be
1052    generated, NULL is returned.  */
1053 
1054 char *
cookie_header(struct cookie_jar * jar,const char * host,int port,const char * path,bool secflag)1055 cookie_header (struct cookie_jar *jar, const char *host,
1056                int port, const char *path, bool secflag)
1057 {
1058   struct cookie *chains[32];
1059   int chain_count;
1060 
1061   struct cookie *cookie;
1062   struct weighed_cookie *outgoing;
1063   size_t count, i, ocnt;
1064   char *result = NULL;
1065   int result_size, pos;
1066   char pathbuf[1024];
1067 
1068   /* First, find the cookie chains whose domains match HOST. */
1069 
1070   /* Allocate room for find_chains_of_host to write to.  The number of
1071      chains can at most equal the number of subdomains, hence
1072      1+<number of dots>.  We ignore cookies with more than 32 labels. */
1073   chain_count = 1 + count_char (host, '.');
1074   if (chain_count > (int) countof (chains))
1075     return NULL;
1076   chain_count = find_chains_of_host (jar, host, chains);
1077 
1078   /* No cookies for this host. */
1079   if (chain_count <= 0)
1080     return NULL;
1081 
1082   /* Wget's paths don't begin with '/' (blame rfc1808), but cookie
1083      usage assumes /-prefixed paths.  Until the rest of Wget is fixed,
1084      simply prepend slash to PATH.  */
1085   {
1086     char *tmp;
1087     size_t pathlen = strlen(path);
1088 
1089     if (pathlen < sizeof (pathbuf) - 1)
1090       tmp = pathbuf;
1091     else
1092       tmp = xmalloc (pathlen + 2);
1093 
1094     *tmp = '/';
1095     memcpy (tmp + 1, path, pathlen + 1);
1096     path = tmp;
1097   }
1098 
1099   cookies_now = time (NULL);
1100 
1101   /* Now extract from the chains those cookies that match our host
1102      (for domain_exact cookies), port (for cookies with port other
1103      than PORT_ANY), etc.  See matching_cookie for details.  */
1104 
1105   /* Count the number of matching cookies. */
1106   count = 0;
1107   for (i = 0; i < (unsigned) chain_count; i++)
1108     for (cookie = chains[i]; cookie; cookie = cookie->next)
1109       if (cookie_matches_url (cookie, host, port, path, secflag, NULL))
1110         ++count;
1111   if (!count)
1112     goto out;                /* no cookies matched */
1113 
1114   /* Allocate the array. */
1115   if (count > SIZE_MAX / sizeof (struct weighed_cookie))
1116     goto out;                /* unable to process so many cookies */
1117   outgoing = xmalloc (count * sizeof (struct weighed_cookie));
1118 
1119   /* Fill the array with all the matching cookies from the chains that
1120      match HOST. */
1121   ocnt = 0;
1122   for (i = 0; i < (unsigned) chain_count; i++)
1123     for (cookie = chains[i]; cookie; cookie = cookie->next)
1124       {
1125         int pg;
1126         if (!cookie_matches_url (cookie, host, port, path, secflag, &pg))
1127           continue;
1128         outgoing[ocnt].cookie = cookie;
1129         outgoing[ocnt].domain_goodness = strlen (cookie->domain);
1130         outgoing[ocnt].path_goodness   = pg;
1131         ++ocnt;
1132       }
1133   assert (ocnt == count);
1134 
1135   /* Eliminate duplicate cookies; that is, those whose name and value
1136      are the same.  */
1137   count = eliminate_dups (outgoing, count);
1138 
1139   /* Sort the array so that best-matching domains come first, and
1140      that, within one domain, best-matching paths come first. */
1141   qsort (outgoing, count, sizeof (struct weighed_cookie), goodness_comparator);
1142 
1143   /* Count the space the name=value pairs will take. */
1144   result_size = 0;
1145   for (i = 0; i < count; i++)
1146     {
1147       struct cookie *c = outgoing[i].cookie;
1148       /* name=value */
1149       result_size += strlen (c->attr) + 1 + strlen (c->value);
1150     }
1151 
1152   /* Allocate output buffer:
1153      name=value pairs -- result_size
1154      "; " separators  -- (count - 1) * 2
1155      \0 terminator    -- 1 */
1156   result_size = result_size + (count - 1) * 2 + 1;
1157   result = xmalloc (result_size);
1158   pos = 0;
1159   for (i = 0; i < count; i++)
1160     {
1161       struct cookie *c = outgoing[i].cookie;
1162       int namlen = strlen (c->attr);
1163       int vallen = strlen (c->value);
1164 
1165       memcpy (result + pos, c->attr, namlen);
1166       pos += namlen;
1167       result[pos++] = '=';
1168       memcpy (result + pos, c->value, vallen);
1169       pos += vallen;
1170       if (i < count - 1)
1171         {
1172           result[pos++] = ';';
1173           result[pos++] = ' ';
1174         }
1175     }
1176   result[pos++] = '\0';
1177   xfree (outgoing);
1178   assert (pos == result_size);
1179 
1180 out:
1181   if (path != pathbuf)
1182     xfree (path);
1183 
1184 return result;
1185 }
1186 
1187 /* Support for loading and saving cookies.  The format used for
1188    loading and saving should be the format of the `cookies.txt' file
1189    used by Netscape and Mozilla, at least the Unix versions.
1190    (Apparently IE can export cookies in that format as well.)  The
1191    format goes like this:
1192 
1193        DOMAIN DOMAIN-FLAG PATH SECURE-FLAG TIMESTAMP ATTR-NAME ATTR-VALUE
1194 
1195      DOMAIN      -- cookie domain, optionally followed by :PORT
1196      DOMAIN-FLAG -- whether all hosts in the domain match
1197      PATH        -- cookie path
1198      SECURE-FLAG -- whether cookie requires secure connection
1199      TIMESTAMP   -- expiry timestamp, number of seconds since epoch
1200      ATTR-NAME   -- name of the cookie attribute
1201      ATTR-VALUE  -- value of the cookie attribute (empty if absent)
1202 
1203    The fields are separated by TABs.  All fields are mandatory, except
1204    for ATTR-VALUE.  The `-FLAG' fields are boolean, their legal values
1205    being "TRUE" and "FALSE'.  Empty lines, lines consisting of
1206    whitespace only, and comment lines (beginning with # optionally
1207    preceded by whitespace) are ignored.
1208 
1209    Example line from cookies.txt (split in two lines for readability):
1210 
1211        .google.com      TRUE    /       FALSE   2147368447      \
1212        PREF     ID=34bb47565bbcd47b:LD=en:NR=20:TM=985172580:LM=985739012
1213 
1214 */
1215 
1216 /* If the region [B, E) ends with :<digits>, parse the number, return
1217    it, and store new boundary (location of the `:') to DOMAIN_E_PTR.
1218    If port is not specified, return 0.  */
1219 
1220 static int
domain_port(const char * domain_b,const char * domain_e,const char ** domain_e_ptr)1221 domain_port (const char *domain_b, const char *domain_e,
1222              const char **domain_e_ptr)
1223 {
1224   int port = 0;
1225   const char *p;
1226   const char *colon = memchr (domain_b, ':', domain_e - domain_b);
1227   if (!colon)
1228     return 0;
1229   for (p = colon + 1; p < domain_e && c_isdigit (*p); p++)
1230     port = 10 * port + (*p - '0');
1231   if (p < domain_e)
1232     /* Garbage following port number. */
1233     return 0;
1234   *domain_e_ptr = colon;
1235   return port;
1236 }
1237 
1238 #define GET_WORD(p, b, e) do {                  \
1239   b = p;                                        \
1240   while (*p && *p != '\t')                      \
1241     ++p;                                        \
1242   e = p;                                        \
1243   if (b == e || !*p)                            \
1244     goto next;                                  \
1245   ++p;                                          \
1246 } while (0)
1247 
1248 /* Load cookies from FILE.  */
1249 
1250 void
cookie_jar_load(struct cookie_jar * jar,const char * file)1251 cookie_jar_load (struct cookie_jar *jar, const char *file)
1252 {
1253   char *line = NULL;
1254   size_t bufsize = 0;
1255 
1256   FILE *fp = fopen (file, "r");
1257   if (!fp)
1258     {
1259       logprintf (LOG_NOTQUIET, _("Cannot open cookies file %s: %s\n"),
1260                  quote (file), strerror (errno));
1261       return;
1262     }
1263 
1264   cookies_now = time (NULL);
1265 
1266   while (getline (&line, &bufsize, fp) > 0)
1267     {
1268       struct cookie *cookie;
1269       char *p = line;
1270 
1271       double expiry;
1272       int port;
1273 
1274       char *domain_b  = NULL, *domain_e  = NULL;
1275       char *domflag_b = NULL, *domflag_e = NULL;
1276       char *path_b    = NULL, *path_e    = NULL;
1277       char *secure_b  = NULL, *secure_e  = NULL;
1278       char *expires_b = NULL, *expires_e = NULL;
1279       char *name_b    = NULL, *name_e    = NULL;
1280       char *value_b   = NULL, *value_e   = NULL;
1281 
1282       /* Skip leading white-space. */
1283       while (*p && c_isspace (*p))
1284         ++p;
1285       /* Ignore empty lines.  */
1286       if (!*p || *p == '#')
1287         continue;
1288 
1289       GET_WORD (p, domain_b,  domain_e);
1290       GET_WORD (p, domflag_b, domflag_e);
1291       GET_WORD (p, path_b,    path_e);
1292       GET_WORD (p, secure_b,  secure_e);
1293       GET_WORD (p, expires_b, expires_e);
1294       GET_WORD (p, name_b,    name_e);
1295 
1296       /* Don't use GET_WORD for value because it ends with newline,
1297          not TAB.  */
1298       value_b = p;
1299       value_e = p + strlen (p);
1300       if (value_e > value_b && value_e[-1] == '\n')
1301         --value_e;
1302       if (value_e > value_b && value_e[-1] == '\r')
1303         --value_e;
1304       /* Empty values are legal (I think), so don't bother checking. */
1305 
1306       cookie = cookie_new ();
1307 
1308       cookie->attr    = strdupdelim (name_b, name_e);
1309       cookie->value   = strdupdelim (value_b, value_e);
1310       cookie->path    = strdupdelim (path_b, path_e);
1311       cookie->secure  = BOUNDED_EQUAL (secure_b, secure_e, "TRUE");
1312 
1313       /* Curl source says, quoting Andre Garcia: "flag: A TRUE/FALSE
1314          value indicating if all machines within a given domain can
1315          access the variable.  This value is set automatically by the
1316          browser, depending on the value set for the domain."  */
1317       cookie->domain_exact = !BOUNDED_EQUAL (domflag_b, domflag_e, "TRUE");
1318 
1319       /* DOMAIN needs special treatment because we might need to
1320          extract the port.  */
1321       port = domain_port (domain_b, domain_e, (const char **)&domain_e);
1322       if (port)
1323         cookie->port = port;
1324 
1325       if (*domain_b == '.')
1326         ++domain_b;             /* remove leading dot internally */
1327       cookie->domain  = strdupdelim (domain_b, domain_e);
1328 
1329       /* safe default in case EXPIRES field is garbled. */
1330       expiry = (double)cookies_now - 1;
1331 
1332       /* I don't like changing the line, but it's safe here.  (line is
1333          malloced.)  */
1334       *expires_e = '\0';
1335       sscanf (expires_b, "%lf", &expiry);
1336 
1337       if (expiry == 0)
1338         {
1339           /* EXPIRY can be 0 for session cookies saved because the
1340              user specified `--keep-session-cookies' in the past.
1341              They remain session cookies, and will be saved only if
1342              the user has specified `keep-session-cookies' again.  */
1343         }
1344       else
1345         {
1346           if (expiry < cookies_now)
1347             goto abort_cookie;  /* ignore stale cookie. */
1348           cookie->expiry_time = (time_t) expiry;
1349           cookie->permanent = 1;
1350         }
1351 
1352       store_cookie (jar, cookie);
1353 
1354     next:
1355       continue;
1356 
1357     abort_cookie:
1358       delete_cookie (cookie);
1359     }
1360 
1361   xfree(line);
1362   fclose (fp);
1363 }
1364 
1365 /* Save cookies, in format described above, to FILE. */
1366 
1367 void
cookie_jar_save(struct cookie_jar * jar,const char * file)1368 cookie_jar_save (struct cookie_jar *jar, const char *file)
1369 {
1370   FILE *fp;
1371   hash_table_iterator iter;
1372 
1373   DEBUGP (("Saving cookies to %s.\n", file));
1374 
1375   cookies_now = time (NULL);
1376 
1377   fp = fopen (file, "w");
1378   if (!fp)
1379     {
1380       logprintf (LOG_NOTQUIET, _("Cannot open cookies file %s: %s\n"),
1381                  quote (file), strerror (errno));
1382       return;
1383     }
1384 
1385   fputs ("# HTTP Cookie File\n", fp);
1386   fprintf (fp, "# Generated by Wget on %s.\n", datetime_str (cookies_now));
1387   fputs ("# Edit at your own risk.\n\n", fp);
1388 
1389   for (hash_table_iterate (jar->chains, &iter);
1390        hash_table_iter_next (&iter);
1391        )
1392     {
1393       const char *domain = iter.key;
1394       struct cookie *cookie = iter.value;
1395       for (; cookie; cookie = cookie->next)
1396         {
1397           if (!cookie->permanent && !opt.keep_session_cookies)
1398             continue;
1399           if (cookie_expired_p (cookie))
1400             continue;
1401           if (!cookie->domain_exact)
1402             fputc ('.', fp);
1403           fputs (domain, fp);
1404           if (cookie->port != PORT_ANY)
1405             fprintf (fp, ":%d", cookie->port);
1406           fprintf (fp, "\t%s\t%s\t%s\t%.0f\t%s\t%s\n",
1407                    cookie->domain_exact ? "FALSE" : "TRUE",
1408                    cookie->path, cookie->secure ? "TRUE" : "FALSE",
1409                    (double)cookie->expiry_time,
1410                    cookie->attr, cookie->value);
1411           if (ferror (fp))
1412             goto out;
1413         }
1414     }
1415  out:
1416   if (ferror (fp))
1417     logprintf (LOG_NOTQUIET, _("Error writing to %s: %s\n"),
1418                quote (file), strerror (errno));
1419   if (fclose (fp) < 0)
1420     logprintf (LOG_NOTQUIET, _("Error closing %s: %s\n"),
1421                quote (file), strerror (errno));
1422 
1423   DEBUGP (("Done saving cookies.\n"));
1424 }
1425 
1426 /* Clean up cookie-related data. */
1427 
1428 void
cookie_jar_delete(struct cookie_jar * jar)1429 cookie_jar_delete (struct cookie_jar *jar)
1430 {
1431   /* Iterate over chains (indexed by domain) and free them. */
1432   hash_table_iterator iter;
1433   for (hash_table_iterate (jar->chains, &iter); hash_table_iter_next (&iter); )
1434     {
1435       struct cookie *chain = iter.value;
1436       xfree (iter.key);
1437       /* Then all cookies in this chain. */
1438       while (chain)
1439         {
1440           struct cookie *next = chain->next;
1441           delete_cookie (chain);
1442           chain = next;
1443         }
1444     }
1445   hash_table_destroy (jar->chains);
1446   xfree (jar);
1447 
1448 #ifdef HAVE_LIBPSL
1449   psl_free (psl);
1450   psl = NULL;
1451 #endif
1452 }
1453 
1454 /* Test cases.  Currently this is only tests parse_set_cookies.  To
1455    use, recompile Wget with -DTEST_COOKIES and call test_cookies()
1456    from main.  */
1457 
1458 #ifdef TEST_COOKIES
1459 void
test_cookies(void)1460 test_cookies (void)
1461 {
1462   /* Tests expected to succeed: */
1463   static struct {
1464     const char *data;
1465     const char *results[10];
1466   } tests_succ[] = {
1467     { "arg=value", {"arg", "value", NULL} },
1468     { "arg1=value1;arg2=value2", {"arg1", "value1", "arg2", "value2", NULL} },
1469     { "arg1=value1; arg2=value2", {"arg1", "value1", "arg2", "value2", NULL} },
1470     { "arg1=value1;  arg2=value2;", {"arg1", "value1", "arg2", "value2", NULL} },
1471     { "arg1=value1;  arg2=value2;  ", {"arg1", "value1", "arg2", "value2", NULL} },
1472     { "arg1=\"value1\"; arg2=\"\"", {"arg1", "value1", "arg2", "", NULL} },
1473     { "arg=", {"arg", "", NULL} },
1474     { "arg1=; arg2=", {"arg1", "", "arg2", "", NULL} },
1475     { "arg1 = ; arg2= ", {"arg1", "", "arg2", "", NULL} },
1476   };
1477 
1478   /* Tests expected to fail: */
1479   static char *tests_fail[] = {
1480     ";",
1481     "arg=\"unterminated",
1482     "=empty-name",
1483     "arg1=;=another-empty-name",
1484   };
1485   int i;
1486 
1487   for (i = 0; i < countof (tests_succ); i++)
1488     {
1489       int ind;
1490       const char *data = tests_succ[i].data;
1491       const char **expected = tests_succ[i].results;
1492       struct cookie *c;
1493 
1494       c = parse_set_cookie (data, true);
1495       if (!c)
1496         {
1497           printf ("NULL cookie returned for valid data: %s\n", data);
1498           continue;
1499         }
1500 
1501       /* Test whether extract_param handles these cases correctly. */
1502       {
1503         param_token name, value;
1504         const char *ptr = data;
1505         int j = 0;
1506         while (extract_param (&ptr, &name, &value, ';', NULL))
1507           {
1508             char *n = strdupdelim (name.b, name.e);
1509             char *v = strdupdelim (value.b, value.e);
1510             if (!expected[j])
1511               {
1512                 printf ("Too many parameters for '%s'\n", data);
1513                 break;
1514               }
1515             if (0 != strcmp (expected[j], n))
1516               printf ("Invalid name %d for '%s' (expected '%s', got '%s')\n",
1517                       j / 2 + 1, data, expected[j], n);
1518             if (0 != strcmp (expected[j + 1], v))
1519               printf ("Invalid value %d for '%s' (expected '%s', got '%s')\n",
1520                       j / 2 + 1, data, expected[j + 1], v);
1521             j += 2;
1522             xfree (n);
1523             xfree (v);
1524           }
1525         if (expected[j])
1526           printf ("Too few parameters for '%s'\n", data);
1527       }
1528     }
1529 
1530   for (i = 0; i < countof (tests_fail); i++)
1531     {
1532       struct cookie *c;
1533       char *data = tests_fail[i];
1534       c = parse_set_cookie (data, true);
1535       if (c)
1536         printf ("Failed to report error on invalid data: %s\n", data);
1537     }
1538 }
1539 #endif /* TEST_COOKIES */
1540