1 /* retawq/cookie.c - HTTP state management
2    This file is part of retawq (<http://retawq.sourceforge.net/>), a network
3    client created by Arne Thomassen; retawq is basically released under certain
4    versions of the GNU General Public License and WITHOUT ANY WARRANTY.
5    Read the file COPYING for license details, README for program information.
6    Copyright (C) 2001-2006 Arne Thomassen <arne@arne-thomassen.de>
7 */
8 
9 /* This code is part of the resource management; it's taken out for the only
10    reason that source files are smaller this way. Look how it's #include'd from
11    resource.c; maybe dirty, but simple, fast (inlined) and functional...
12 */
13 
14 #include <time.h> /* for ctime() */
15 
16 #define COOKIE_STRICT_ATTRVALUES 0 /* grumble... */
17 
18 /* We artificially limit the number and size of cookies; but our limits are
19    much more generous than the minimum mentioned in RFC2965, 5.3, and really
20    necessary to protect against servers that go nuts or even start attacks
21    intentionally (RFC2965, 5.3.1). */
22 #define COOKIE_MAXNUM_PER_HOST (40) /* number of cookies per host */
23 #define COOKIE_MAXNUM_PORTS (100) /* number of port numbers per cookie */
24 #define COOKIE_MAXLEN (10 * 1024) /* length of cookie data strings */
25 #define COOKIE_MAXNUM_SEND (1000) /* number of cookies for "Cookie:" header */
26 
27 #define is_cookie_whitespace(ch) \
28   ( ((ch) == ' ') || ((ch) == '\t') || ((ch) == '\r') || ((ch) == '\n') )
29 
30 enum
31 { coofNone = 0, coofUseExpiry = 0x01, coofExpired = 0x02, coofNotToDisk = 0x04,
32   coofSc2 = 0x08, coofPort = 0x10, coofPortvalues = 0x20, coofTlsOnly = 0x40
33 };
34 typedef unsigned char tCookieFlags;
35 
36 typedef unsigned char tCookieVersion;
37 
38 typedef struct tCookie
39 { struct tCookie* next;
40   char *name, *value, *path, *portstr, *comment;
41   const char* domain;
42   tPortnumber* portlist; /* (in network byte order) */
43   time_t expiry;
44   unsigned char portlistlen;
45   tCookieVersion version;
46   tCookieFlags flags;
47 } tCookie;
48 
49 
50 /* Helper functions */
51 
52 static one_caller __sallocator /*@out@*/ tCookie* __callocator
cookie_allocate(void)53   cookie_allocate(void)
54 { return((tCookie*) __memory_allocate(sizeof(tCookie), mapOther));
55 }
56 
__cookie_deallocate(const tCookie * cookie)57 static void __cookie_deallocate(const tCookie* cookie)
58 { __dealloc(cookie->name); __dealloc(cookie->value); __dealloc(cookie->domain);
59   __dealloc(cookie->path); __dealloc(cookie->portstr);
60   __dealloc(cookie->comment); __dealloc(cookie->portlist);
61 }
62 
cookie_deallocate(const tCookie * cookie)63 static my_inline void cookie_deallocate(/*@only@*/ const tCookie* cookie)
64 { __cookie_deallocate(cookie);
65   memory_deallocate(cookie);
66 }
67 
has_embedded_dots(const char * str)68 static tBoolean has_embedded_dots(const char* str)
69 { size_t len = strlen(str);
70   if (len < 3) return(falsE); /* can't have _any_ "embedded" thing */
71   return(cond2boolean(my_strnchr(str + 1, '.', len - 2) != NULL));
72 }
73 
effective_hostname(const char * hostname,tBoolean want_copy)74 static const char* effective_hostname(const char* hostname, tBoolean want_copy)
75 /* calculates the "effective hostname" of <hostname> (RFC2965, 1.) */
76 { if (my_strchr(hostname, '.') != NULL) /* the most likely case */
77   { if (want_copy) return(my_strdup(hostname));
78     else return(hostname);
79   }
80   else
81   { char* spfbuf;
82     my_spf(NULL, 0, &spfbuf, "%s.local", hostname);
83     return(my_spf_use(spfbuf));
84   }
85 }
86 
87 /* call this if you didn't want a copy from effective_hostname() */
88 #define effective_hostname_cleanup(orig, eff) \
89   do { if (eff != orig) memory_deallocate(eff); } while (0)
90 
is_hostname_numerical(const char * hostname)91 static tBoolean is_hostname_numerical(const char* hostname)
92 /* returns whether a hostname "looks" like a numerical IP address; we can't
93    simply check for a "num.num.num.num" pattern, e.g. because IPv6 addresses
94    look different; and we also can't use things like inet_addr() or inet_pton()
95    because that's not portable; however, the current algorithm is wrong too, so
96    FIXME! */
97 { const size_t len = strlen(hostname);
98   char a, z; /* first and last character of <hostname> */
99   return(cond2boolean( (len > 0) && ( (a = *hostname) >= '0' ) && (a <= '9')
100     && ( (z = hostname[len - 1]) >= '0' ) && (z <= '9') ));
101 }
102 
103 #define is_hostname_hdn(hostname) (!is_hostname_numerical(hostname))
104 
cookie_domainmatch(const char * A,const char * B)105 static tBoolean cookie_domainmatch(const char* A, const char* B)
106 /* domain-match relation (RFC2965, 1.); case-insensitivity is given because all
107    domain strings were converted to lowercase */
108 { if (!strcmp(A, B)) return(truE);
109   if ( (*B == '.') && is_suffix(A, B) && is_hostname_hdn(B + 1) &&
110        is_hostname_hdn(A) )
111     return(truE);
112   return(falsE);
113 }
114 
cookie_pathmatch(const char * P1,const char * P2)115 static one_caller tBoolean cookie_pathmatch(const char* P1, const char* P2)
116 /* path-match relation (RFC2965, 1.) - or was that "patch-math"? :-) */
117 { const size_t len2 = strlen(P2);
118   return(cond2boolean( (len2 <= strlen(P1)) && (!strncmp(P1, P2, len2)) ));
119 }
120 
cookie_expired(tCookie * cookie)121 static tBoolean cookie_expired(tCookie* cookie)
122 { tCookieFlags flags = cookie->flags;
123   if ( (!(flags & coofExpired)) && (flags & coofUseExpiry) &&
124        (cookie->expiry <= my_time()) )
125   { cookie->flags |= coofExpired; flags = cookie->flags; }
126   return(cond2boolean(flags & coofExpired));
127 }
128 
cookie_remove(tCachedHostInformation * hostinfo,const tCookie * cookie)129 static void cookie_remove(tCachedHostInformation* hostinfo,
130   const tCookie* cookie)
131 /* detaches the <cookie> from the <hostinfo> cookie list and deallocates it */
132 { tCookie *c = hostinfo->cookies, *next;
133   if (c == NULL) { /* "should not happen" */ }
134   else if (c == cookie) hostinfo->cookies = cookie->next;
135   else
136   { while ( (next = c->next) != NULL )
137     { if (next == cookie) { c->next = cookie->next; break; }
138       c = next;
139     }
140   }
141   cookie_deallocate(cookie);
142   if (hostinfo->cookiecount > 0) /* "should" be true */
143     hostinfo->cookiecount--;
144 }
145 
cookie_remove_expired(tCachedHostInformation * hostinfo)146 static one_caller void cookie_remove_expired(tCachedHostInformation* hostinfo)
147 /* removes all expired cookies from the hostinfo's cookie list */
148 { tCookie* cookie = hostinfo->cookies;
149   while (cookie != NULL)
150   { tCookie* next = cookie->next;
151     if (cookie_expired(cookie)) cookie_remove(hostinfo, cookie);
152     cookie = next;
153   }
154 }
155 
156 static one_caller /*@null@*/ tCookie*
cookie_lookup(const tCachedHostInformation * hostinfo,const tCookie * cookie)157   cookie_lookup(const tCachedHostInformation* hostinfo, const tCookie* cookie)
158 /* returns a cookie from the hostinfo->cookies list which is "the same" as
159    <cookie>; RFC2965, 3.3.3: "If a user agent receives a Set-Cookie2 response
160    header whose NAME is the same as that of a cookie it has previously stored,
161    the new cookie supersedes the old when: the old and new Domain attribute
162    values compare equal, using a case-insensitive string-compare; and, the old
163    and new Path attribute values string-compare equal (case-sensitive)." Case-
164    insensitivity for domain strings is given because they were converted to
165    lowercase. */
166 { tCookie* c = hostinfo->cookies;
167   if (c != NULL)
168   { const char *name = cookie->name, *domain = null2empty(cookie->domain),
169       *path = null2empty(cookie->path);
170     while (c != NULL)
171     { if ( (!strcmp(c->name, name)) &&
172            (!strcmp(null2empty(c->domain), domain)) &&
173            (!strcmp(null2empty(c->path), path)) )
174         break; /* found */
175       c = c->next;
176     }
177   }
178   return(c);
179 }
180 
cookie_config_allows(const tResource * resource)181 static tBoolean cookie_config_allows(const tResource* resource)
182 /* returns whether the configuration allows to store/send/... any cookies for
183    the <resource> */
184 { tBoolean retval = falsE; /* default */
185   const tConfigCookie* cc;
186   const tCachedHostInformation* hostinfo;
187   const char* hostname;
188   switch (resource->protocol)
189   { case rpHttp: cc = config.http_cookies; break;
190 #if OPTION_TLS
191     case rpHttps: cc = config.https_cookies; break;
192 #endif
193     default: cc = NULL; break;
194   }
195   if (cc == NULL) goto out; /* nothing allowed */
196   if ( (hostinfo = resource2textual_host(resource)) == NULL ) goto out;
197   if ( ( (hostname = hostinfo->hostname) == NULL ) || (*hostname == '\0') )
198     goto out;
199   while (cc != NULL)
200   { const char* pattern = cc->hosts_pattern;
201     if ( (pattern != NULL) && (my_pattern_matcher(pattern, hostname)) )
202     { if (cc->flags & ccfAllowed) retval = truE;
203       break;
204     }
205     cc = cc->next;
206   }
207   out:
208   return(retval);
209 }
210 
211 
212 /* Expiry values (date-time parsing) */
213 
214 static const struct
215 { const char* name; /* (sorted in alphabetical order) */
216   unsigned char monthnumber /*1..12*/, namelen,
217     monthlength; /* (sorted in real-world order, just to confuse readers:-) */
218 } month_table[12] =
219 { { "april",     4, 5,  31 },
220   { "august",    8, 6,  28 },
221   { "december", 12, 8,  31 },
222   { "february",  2, 8,  30 },
223   { "january",   1, 7,  31 },
224   { "july",      7, 4,  30 },
225   { "june",      6, 4,  31 },
226   { "march",     3, 5,  31 },
227   { "may",       5, 3,  30 },
228   { "november", 11, 8,  31 },
229   { "october",  10, 7,  30 },
230   { "september", 9, 9,  31 }
231 };
232 
233 #define nomod(y, d) ( (y % d) == 0 )
234 #define is_leapyear(y) ( nomod(y, 4) && (nomod(y, 400) || (!nomod(y, 100))) )
235 
month_maxday(unsigned char month,unsigned short year)236 static one_caller unsigned char month_maxday(unsigned char month, /* 1..12 */
237   unsigned short year)
238 /* maxday is the lexicographical predecessor of mayday... :-) */
239 { unsigned char maxday = month_table[month - 1].monthlength;
240   if ( (month == 2) && (is_leapyear(year)) ) maxday++; /* February */
241   return(maxday);
242 }
243 
dmy2yday(unsigned char day,unsigned char month,unsigned short year)244 static one_caller unsigned short dmy2yday(unsigned char day,
245   unsigned char month /*1..12*/, unsigned short year)
246 /* transforms a date to the corresponding year-day */
247 { static const unsigned short daysum[12] =
248   { 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334 };
249   unsigned short yday = daysum[month - 1];
250   if ( (month > 2) && (is_leapyear(year)) ) yday++;
251   yday += day;
252   yday--; /* year-day values are zero-based ("days since January 1") */
253   return(yday);
254 }
255 
parse_month_long(const char * str)256 static one_caller tMbsIndex parse_month_long(const char* str)
257 { my_binary_search(0, 11, streqcase3(str, month_table[idx].name),
258     return(idx))
259 }
260 
parse_month_short(const char * str)261 static one_caller tMbsIndex parse_month_short(const char* str)
262 { my_binary_search(0, 11, strneqcase3(str, month_table[idx].name, 3),
263     return(idx))
264 }
265 
266 static const_after_init time_t localtime_offset = 0;
267 
do_parse_datetime(const char * str,const char * pattern,time_t * _t)268 static one_caller tBoolean do_parse_datetime(const char* str,
269   const char* pattern, /*@out@*/ time_t* _t)
270 { const char *s = str, *p = pattern;
271   time_t t, tm_year;
272   char ch;
273   unsigned short year, yday;
274   unsigned char day, month, timepart[3], hour, minute, second;
275   year = yday = 0;
276   day = month = timepart[0] = timepart[1] = timepart[2]
277       = hour = minute = second = 0;
278   while ( (ch = *p++) != '\0' )
279   { if (ch == 'w') /* weekday */
280     { /* (We accept any combination of letters and ignore them.) */
281       if (my_isalpha(*s)) { do { s++; } while (my_isalpha(*s)); }
282       else goto failed;
283     }
284     else if (ch == 'W') /* _optional_ weekday, followed by "," and opt. ws */
285     { if (my_isalpha(*s))
286       { do { s++; } while (my_isalpha(*s));
287         if (*s++ != ',') goto failed;
288         while (is_cookie_whitespace(*s)) s++;
289       }
290     }
291     else if (ch == 'd') /* day */
292     { if (!my_isdigit(*s)) goto failed;
293       day = *s++ - '0';
294       if (my_isdigit(*s)) { day = 10 * day + (*s++ - '0'); }
295     }
296     else if (ch == 'm') /* month name */
297     { tMbsIndex idx;
298       if (!my_isalpha(*s)) goto failed; /* (quick pre-check) */
299       idx = parse_month_long(s);
300       if (idx >= 0) s += month_table[idx].namelen;
301       else
302       { idx = parse_month_short(s);
303         if (idx < 0) goto failed;
304         s += 3;
305       }
306       month = month_table[idx].monthnumber;
307     }
308     else if (ch == 'y') /* year */
309     { /* Some servers send only a single digit as the year... For "low" year
310          values, retawq follows RFC2822, 4.3 (which seems to be the most recent
311          definition of this transformation): "If a two digit year is
312          encountered whose value is between 00 and 49, the year is interpreted
313          by adding 2000, ending up with a value between 2000 and 2049. If a two
314          digit year is encountered with a value between 50 and 99, or any three
315          digit year is encountered, the year is interpreted by adding 1900." */
316       unsigned char i;
317       if (!my_isdigit(*s)) goto failed;
318       i = 0;
319       yearloop:
320       year = 10 * year + (*s++ - '0');
321       if ( (i < 3) && (my_isdigit(*s)) ) { i++; goto yearloop; }
322       if (year <= 49) year += 2000;
323       else if (year <= 999) year += 1900;
324       /* "else": hope it's usable, don't change it */
325     }
326     else if (ch == 't') /* time */
327     { unsigned short i;
328       handle_time:
329       i = 0;
330       timeloop: /* (Nah, not what you think...:-) */
331       if (!my_isdigit(*s)) goto failed;
332       timepart[i] = *s++ - '0';
333       if (my_isdigit(*s)) timepart[i] = 10 * timepart[i] + (*s++ - '0');
334       if ( (i < 2) && (*s == ':') ) { i++; s++; goto timeloop; }
335     }
336     else if (ch == 'T') /* _optional_ time at _end_ of string */
337     { while (is_cookie_whitespace(*s)) s++;
338       if (my_isdigit(*s)) goto handle_time;
339     }
340     else if (ch == ' ') /* whitespace */
341     { if (is_cookie_whitespace(*s))
342       { do { s++; } while (is_cookie_whitespace(*s)); }
343       else goto failed;
344     }
345     else if (*s++ != ch) goto failed; /* exact character match required */
346   }
347 
348   /* Parsing worked, now convert the data: */
349   if (year < 2006) { t = 0; goto done; } /* in the past - ignore details */
350   else if (year > 2020) year = 2020; /* "far enough" in the future for us */
351   hour = timepart[0]; minute = timepart[1]; second = timepart[2];
352   if (hour > 23) hour = 23;
353   if (minute > 59) minute = 59;
354   if (second > 59) second = 59;
355   if (month <= 0) month = 1;
356   else if (month > 12) month = 12;
357   if (day <= 0) day = 1;
358   else
359   { const unsigned char maxday = month_maxday(month, year);
360     if (day > maxday) day = maxday;
361   }
362   yday = dmy2yday(day, month, year);
363   tm_year = (time_t) (year - 1900);
364 
365   /* The following algorithm is taken from SUSv3 ("Base Definitions -> General
366      Concepts -> Seconds Since the Epoch"); we can't use library functions like
367      mktime() because they are often buggy or not portable. */
368   t = ((time_t) second) + ((time_t) minute)*60 + ((time_t) hour)*3600 +
369       ((time_t) yday)*86400 + (tm_year-70)*31536000 + ((tm_year-69)/4)*86400 -
370       ((tm_year-1)/100)*86400 + ((tm_year+299)/400)*86400;
371   /* ...and the next line finally converts UTC/GMT to local time. */
372   t += localtime_offset;
373 
374   done:
375   *_t = t;
376   return(truE);
377   failed:
378   return(falsE);
379 }
380 
parse_datetime(const char * str,time_t * t)381 static tBoolean parse_datetime(const char* str, /*@out@*/ time_t* t)
382 /* tries to convert an "expires" cookie attribute value string to a time_t and
383    returns whether that worked; we're very lenient in order to handle as many
384    (buggy) real-world expiry strings as possible... */
385 { enum { num = 3 };
386   /* Currently, only the formats mentioned in RFC2616, 3.3.1, are handled: */
387   static const char* const pattern[num] =
388   { "Wd m yT",  /* RFC822/1123/2822: "[ day "," ] dd mm [yy]yy hh:mm:ss zzz" */
389     "Wd-m-yT",  /* RFC850 (obsolete): "Weekday, DD-Mon-YY HH:MM:SS TIMEZONE" */
390     "w m d t y" /* asctime(), ctime() */
391   };
392   unsigned char i;
393   for (i = 0; i < num; i++)
394   { if (do_parse_datetime(str, pattern[i], t)) return(truE);
395   }
396   return(falsE); /* couldn't convert it - format unknown */
397 }
398 
399 
400 /* Storing */
401 
cookie_accept_any(const tResource * resource)402 static one_caller tBoolean cookie_accept_any(const tResource* resource)
403 /* returns whether any cookies may be accepted (i.e. stored in a hostinfo
404    cookie list) for the <resource> */
405 {
406 #if OPTION_LOCAL_CGI
407 #define not_local_cgi (resource->protocol != rpLocalCgi) &&
408 #else
409 #define not_local_cgi /* nothing */
410 #endif
411   return(cond2boolean(cookie_config_allows(resource) && not_local_cgi /* && */
412     (!(resource->flags & (rfIsRedirection | rfIsEmbedded))) ));
413   /* (redirection/embedded: RFC2965, 3.3.6) */
414 #undef not_local_cgi
415 }
416 
cookie_storable_c(const tCookie * cookie)417 static one_caller tBoolean cookie_storable_c(const tCookie* cookie)
418 /* returns whether the cookie components have a decent size */
419 { const char *name = cookie->name, *value = cookie->value,
420     *domain = cookie->domain, *path = cookie->path, *portstr = cookie->portstr,
421     *comment = cookie->comment;
422   if ( (strlen(name) > COOKIE_MAXLEN) ||
423        ( (value != NULL) && (strlen(value) > COOKIE_MAXLEN) ) ||
424        ( (domain != NULL) && (strlen(domain) > COOKIE_MAXLEN) ) ||
425        ( (path != NULL) && (strlen(path) > COOKIE_MAXLEN) ) ||
426        ( (portstr != NULL) && (strlen(portstr) > COOKIE_MAXLEN) ) ||
427        ( (comment != NULL) && (strlen(comment) > COOKIE_MAXLEN) ) )
428     return(falsE); /* crumblet too big to swallow */
429   return(truE);
430 }
431 
cookie_storable_h(tCachedHostInformation * hostinfo)432 static one_caller tBoolean cookie_storable_h(tCachedHostInformation* hostinfo)
433 /* returns whether there's enough room left in the hostinfo's cookie list to
434    store one more cookie. */
435 { if (hostinfo->cookiecount <= COOKIE_MAXNUM_PER_HOST) return(truE);
436   else
437   { /* Try to remove an expired cookie to get free room: */
438     tCookie* c = hostinfo->cookies;
439     tBoolean removed_one = falsE;
440     while (c != NULL)
441     { if (cookie_expired(c))
442       { cookie_remove(hostinfo, c); removed_one = truE; break; }
443       c = c->next;
444     }
445     return(removed_one);
446   }
447 }
448 
cookie_skip_whitespace(const char ** _ptr)449 static void cookie_skip_whitespace(const char** _ptr)
450 { const char* ptr = *_ptr;
451   while (1)
452   { const char ch = *ptr;
453     if (!is_cookie_whitespace(ch)) break;
454     ptr++;
455   }
456   *_ptr = ptr;
457 }
458 
459 enum
460 { canDontCare = 0, canComment = 1, canCommentUri = 2, canDiscard = 3,
461   canDomain = 4, canExpires = 5, canMaxAge = 6, canPath = 7, canPort = 8,
462   canSecure = 9, canVersion = 10
463 };
464 typedef unsigned char tCookieAttributeName;
465 #define MAX_CAN (10)
466 
467 static const char* const strCan[MAX_CAN + 1] =
468 { strA /*don't care*/, "comment", "commenturl", "discard", strDomain,
469   "expires", "max-age", "path", "port", "secure", strVersion
470 };
471 
cookie_do_lookup_attrname(const char * str)472 static one_caller tMbsIndex cookie_do_lookup_attrname(const char* str)
473 { my_binary_search(0, MAX_CAN, streqcase3(str, strCan[idx]), return(idx))
474 }
475 
cookie_lookup_attrname(const char * str)476 static one_caller tCookieAttributeName cookie_lookup_attrname(const char* str)
477 { tMbsIndex idx = cookie_do_lookup_attrname(str);
478   if (idx < 0) idx = 0; /* canDontCare */
479   return((tCookieAttributeName) idx);
480 }
481 
482 #if COOKIE_STRICT_ATTRVALUES
483 static tBoolean cookie_used_setcookie2;
is_tokenchar(unsigned char ch)484 static one_caller tBoolean is_tokenchar(unsigned char ch)
485 /* returns whether <ch> may appear in an HTTP token (RFC2616, 2.2) */
486 { static const unsigned char notok[32] = { 255, 255, 255, 255, 5, 147, 0, 252,
487    1, 0, 0, 56, 0, 0, 0, 168, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
488   return(cond2boolean(!my_bit_test(notok, ch)));
489 }
490 #endif
491 
492 static __sallocator /*@notnull@*/ char* __callocator
cookie_token(const char ** _ptr,tBoolean is_value,tBoolean is_expires_value)493   cookie_token(const char** _ptr, tBoolean is_value, tBoolean is_expires_value)
494 /* returns a single cookie token (or quoted string) */
495 { const char *ptr = *_ptr, *start, *ptr_wss = NULL;
496   char ch, *retval;
497   unsigned char* utemp;
498   if ( (is_value) && (*ptr == '"') ) /* quoted string */
499   { ptr++; start = ptr;
500     while (1)
501     { ch = *ptr;
502       if ( (ch == '\0') || (ch == '"') ) /* reached end of string */
503         break;
504       ptr++;
505     }
506     if (ch == '"') ptr_wss = ptr + 1;
507   }
508   else /* unquoted token */
509   { start = ptr;
510 #if COOKIE_STRICT_ATTRVALUES
511     /* This is the strictly correct algorithm for RFC2965... */
512     if (cookie_used_setcookie2)
513     { while (is_tokenchar(*ptr)) ptr++;
514     }
515     else
516 #endif
517     /* ...and this is the algorithm that should work with all those header
518        lines in real life which contain unquoted attribute values with
519        non-token characters, e.g. path=/ */
520     { while (1)
521       { ch = *ptr;
522         if ( (ch == '\0') || (ch == ';') ||
523              ( (!is_value) && ((ch == '=') || (is_cookie_whitespace(ch))) ) )
524           break;
525         if ( (ch == ',') && (!is_expires_value) )
526         { /* Handle a horrible syntax definition bug: in RFC2109, "," is used
527              as a separator of different cookies in a set-cookie header; but in
528              Netscape's proposal there are "expires=..." attributes which can
529              have a "," after a weekday name. We try to distinguish these cases
530              this way: if is_expires_value, the cookie follows Netscape's
531              proposal and not RFC2109 (because the latter doesn't define an
532              "expires=..." attribute), so the "," isn't a cookie separator but
533              a normal attrvalue character. So we _break_ on a "," only if we're
534              _not_ inside an "expires=..." attrvalue. (Note that Netscape's
535              proposal doesn't use "," for cookie separation because it doesn't
536              define cookie separation at all.) */
537           break;
538         }
539         ptr++;
540       }
541       if ( (is_value) && (ptr > start) ) /* trim trailing whitespace */
542       { ptr--;
543         while (ptr > start)
544         { ch = *ptr;
545           if (!is_cookie_whitespace(ch)) break;
546           ptr--;
547         }
548         ptr++;
549       }
550     }
551   }
552   retval = my_strndup(start, ptr - start);
553   utemp = (unsigned char*) retval;
554   while (1)
555   { const unsigned char uch = *utemp;
556     if (uch == '\0') break;
557     else if (is_control_char(uch)) *utemp = '?';
558     utemp++;
559   }
560   if (ptr_wss != NULL) ptr = ptr_wss; /* setup for whitespace-skipping */
561   cookie_skip_whitespace(&ptr);
562   *_ptr = ptr;
563   return(retval);
564 }
565 
566 #define is_attr_set(can) my_bit_test(canbits, can)
567 
cookie_handle_text(tResource * resource,const char * text,tBoolean used_setcookie2)568 static one_caller void cookie_handle_text(tResource* resource,
569   const char* text, tBoolean used_setcookie2)
570 /* decides whether the cookie data (an HTTP header text snippet) may be stored
571    in the hostinfo cookie list and stores it if so; this function may only be
572    called if a former cookie_accept_any() call agreed. */
573 { enum { canbytes = (((MAX_CAN + 1) + 7) / 8) };
574   unsigned char canbits[canbytes]; /* bitfield */
575   tCachedHostInformation* hostinfo = resource2textual_host(resource);
576   tCookie cookie, *lcookie; /* ("l" as in "lookup") */
577 
578   if (hostinfo == NULL) return; /* "should not happen" */
579 #if COOKIE_STRICT_ATTRVALUES
580   cookie_used_setcookie2 = used_setcookie2;
581 #endif
582 
583   /* Parse the HTTP header text for a single cookie */
584 
585   loop:
586   my_memclr_var(cookie); my_memclr_arr(canbits);
587   cookie.name = cookie_token(&text, falsE, falsE);
588   if ( (cookie.name[0] == '\0') || (*text != '=') )
589   { /* fundamental syntax error; ignore all the further text */
590     memory_deallocate(cookie.name);
591     return;
592   }
593   text++; cookie_skip_whitespace(&text);
594   cookie.value = cookie_token(&text, truE, falsE);
595 
596   while (*text == ';')
597   { char *attrname, *attrvalue;
598     tCookieAttributeName can;
599 
600     text++; cookie_skip_whitespace(&text);
601     attrname = cookie_token(&text, falsE, falsE);
602     can = cookie_lookup_attrname(attrname);
603     memory_deallocate(attrname);
604 
605     if (*text != '=') attrvalue = NULL;
606     else
607     { text++; cookie_skip_whitespace(&text);
608       attrvalue = cookie_token(&text, truE, cond2boolean(can == canExpires));
609     }
610 
611     if (is_attr_set(can))
612     { /* RFC2965, 3.2.2: "If an attribute appears more than once in a cookie,
613          the client SHALL use only the value associated with the first
614          appearance of the attribute." */
615       goto skip_attribute;
616     }
617     my_bit_set(canbits, can);
618 
619     switch (can)
620     { case canComment: case canCommentUri:
621         if ( (attrvalue != NULL) && (*attrvalue != '\0') )
622         { tBoolean is_uri = cond2boolean(can == canCommentUri);
623           const char* comment = cookie.comment;
624           char* spfbuf;
625           if (comment != NULL) /* already stored text or URI formerly */
626           { my_spf(NULL, 0, &spfbuf, (is_uri ? "%s <%s>" : "%s %s"), comment,
627               attrvalue);
628             memory_deallocate(comment); /* forget old text */
629             cookie.comment = my_spf_use(spfbuf); /* store new text */
630           }
631           else
632           { if (is_uri)
633             { my_spf(NULL, 0, &spfbuf, "<%s>", attrvalue);
634               cookie.comment = my_spf_use(spfbuf);
635             }
636             else { cookie.comment = attrvalue; attrvalue = NULL; }
637           }
638         }
639         break;
640       case canDiscard: cookie.flags |= coofNotToDisk; break;
641       case canDomain:
642         if ( (attrvalue != NULL) && (*attrvalue != '\0') )
643         { if ( (used_setcookie2) && (*attrvalue != '.') )
644           { /* RFC2965, 3.2.2: we must prepend a dot */
645             char* d = __memory_allocate(strlen(attrvalue) + 1 + 1, mapString);
646             *d = '.'; my_strcpy_tolower(d + 1, attrvalue); cookie.domain = d;
647           }
648           else cookie.domain = my_strdup_tolower(attrvalue);
649         }
650         break;
651       case canExpires:
652         if ( (attrvalue != NULL) && (*attrvalue != '\0') &&
653              (!is_attr_set(canMaxAge)) && (!used_setcookie2) )
654         { /* (We don't let the old-fashioned canExpires override canMaxAge. And
655               we don't handle canExpires attrvalues for set-cookie2 headers
656               ("should not" appear there anyway, but...) due to the "," anomaly
657               - cf. the is_expires_value comment above.) */
658           time_t t;
659           if (parse_datetime(attrvalue, &t))
660           { cookie.expiry = t; cookie.flags |= coofUseExpiry; }
661         }
662         break;
663       case canMaxAge:
664         /* IMPLEMENTME: make this more precise, using Date and Age headers! */
665         if ( (attrvalue != NULL) && (my_isdigit(*attrvalue)) )
666         { int i;
667           my_atoi(attrvalue, &i, NULL, MY_ATOI_INT_MAX);
668           cookie.expiry = my_time() + ((time_t) i);
669           cookie.flags |= coofUseExpiry;
670         }
671         break;
672       case canPath: cookie.path = attrvalue; attrvalue = NULL; break;
673       case canPort:
674         cookie.flags |= coofPort;
675         if (attrvalue != NULL) cookie.portstr = my_strdup(attrvalue);
676         if ( (attrvalue != NULL) && (my_isdigit(*attrvalue)) )
677         { char *s = attrvalue, *p; /* start, ptr */
678           tPortnumber portnum[COOKIE_MAXNUM_PORTS];
679           unsigned short portcount = 0;
680           char ch;
681           int i;
682           next_port:
683           p = s;
684           while (1)
685           { ch = *p;
686             if (ch == '\0') break;
687             else if (ch == ',') { *p = '\0'; break; }
688             p++;
689           }
690           my_atoi(s, &i, NULL, 99999);
691           if ( (i >= 0) && (i <= 65535) ) /* store it (unless duplicate) */
692           { const tPortnumber port = (tPortnumber) htons((tPortnumber) i);
693             tBoolean found = falsE;
694             unsigned short count;
695             for (count = 0; count < portcount; count++)
696             { if (portnum[count] == port) { found = truE; break; }
697             }
698             if (!found) portnum[portcount++] = port;
699           }
700           if ( (ch != '\0') && (portcount < COOKIE_MAXNUM_PORTS) )
701           { s = p + 1; goto next_port; }
702           if (portcount > 0) /* actually got some port numbers */
703           { const size_t size = portcount * sizeof(tPortnumber);
704             tPortnumber* pn = (tPortnumber*) __memory_allocate(size, mapOther);
705             my_memcpy(pn, portnum, size);
706             cookie.portlist = pn; cookie.portlistlen = portcount;
707             cookie.flags |= coofPortvalues;
708           }
709         }
710         else if (used_setcookie2) /* RFC2965, 3.3.4, "Port Selection", 2. */
711         { tPortnumber* p = cookie.portlist =
712             (tPortnumber*) __memory_allocate(sizeof(tPortnumber), mapOther);
713           *p = resource->uri_data->portnumber; cookie.portlistlen = 1;
714         }
715         break;
716 #if OPTION_TLS
717       case canSecure:
718         /* RFC2965, 3.2.2: "When it sends a "secure" cookie back to a server,
719            the user agent SHOULD use no less than the same level of security as
720            was used when it received the cookie from the server." */
721         if (is_tlslike(resource->protocol)) cookie.flags |= coofTlsOnly;
722         break;
723 #endif
724       case canVersion:
725         if (attrvalue != NULL)
726         { int i;
727           my_atoi(attrvalue, &i, NULL, 99);
728           cookie.version = (tCookieVersion) i;
729         }
730         break;
731     }
732     skip_attribute:
733     __dealloc(attrvalue);
734   }
735 
736   /* Handle this cookie */
737 
738   if (cookie.name[0] == '$') goto ignore_cookie; /* RFC2965, 3.2.2 */
739   if ( (used_setcookie2) && (!is_attr_set(canVersion)) )
740     goto ignore_cookie; /* RFC2965, 3.3.2 */
741   if (cookie.domain != NULL)
742   { const char *domain = cookie.domain, *reshn;
743     if (used_setcookie2)
744     { if ( (!has_embedded_dots(domain)) && (!strcmp(domain, ".local")) )
745         goto ignore_cookie; /* RFC2965, 3.3.2, *2 */
746     }
747     else
748     { if ( (!has_embedded_dots(domain)) || (*domain != '.') )
749         goto ignore_cookie; /* RFC2109, 4.3.2, *2 */
750     }
751     if ( (reshn = resource2textual_host(resource)->hostname) != NULL )
752     { const char* eff = effective_hostname(reshn, falsE);
753       tBoolean is_good = cookie_domainmatch(eff, domain);
754       effective_hostname_cleanup(reshn, eff);
755       if (!is_good) goto ignore_cookie; /* RFC2965, 3.3.2, *3 */
756       if ( (is_hostname_hdn(reshn)) && (is_suffix(reshn, domain)) )
757       { ssize_t pos = (ssize_t) (strlen(reshn) - strlen(domain));
758         while (--pos >= 0)
759         { if (reshn[pos] == '.') goto ignore_cookie; /* RFC2965, 3.3.2, *4 */
760         }
761       }
762     }
763   }
764   if (cookie.path != NULL)
765   { const char* uri_path = null2empty(resource->uri_data->path);
766     const size_t plen = strlen(cookie.path), uri_plen = strlen(uri_path);
767     if ( (plen > uri_plen) || (strncmp(cookie.path, uri_path, plen)) )
768       goto ignore_cookie; /* RFC2965, 3.3.2, *1 */
769   }
770   if (cookie.portlist != NULL)
771   { const tPortnumber resport = resource->uri_data->portnumber,
772       *p = cookie.portlist;
773     unsigned short i;
774     tBoolean found = falsE;
775     for (i = 0; i < cookie.portlistlen; i++)
776     { if (p[i] == resport) { found = truE; break; } }
777     if (!found) goto ignore_cookie; /* RFC2965, 3.3.2, *5 */
778   }
779 
780   if (cookie.domain == NULL) /* use default (RFC2965, 3.3.1) */
781   { const char* hostname = resource2textual_host(resource)->hostname;
782     cookie.domain = effective_hostname(null2empty(hostname), truE);
783   }
784   if (cookie.path == NULL) /* use default (RFC2965, 3.3.1) */
785   { const char *path = resource->uri_data->path, *slash;
786     if ( (path != NULL) && ( (slash = my_strrchr(path, chDirsep)) != NULL ) )
787       cookie.path = my_strndup(path, slash - path + 1);
788     else cookie.path = my_strdup(strSlash);
789   }
790   if (used_setcookie2) cookie.flags |= coofSc2;
791 
792   if (!cookie_storable_c(&cookie)) goto ignore_cookie; /* too big */
793   lcookie = cookie_lookup(hostinfo, &cookie);
794   if (lcookie != NULL) /* a cookie with that name/... already exists */
795   { if ( ( (cookie.flags & coofSc2) || (!(lcookie->flags & coofSc2)) ) &&
796          (cookie.version >= lcookie->version) )
797     { /* (The condition is a consequence of RFC2965, 9.1.) */
798       resource->flags |= rfCookieStorer;
799       if (cookie_expired(&cookie)) cookie_remove(hostinfo, lcookie); /*forget*/
800       else { __cookie_deallocate(lcookie); *lcookie=cookie; goto next_cookie; }
801     }
802   }
803   else /* it's a "new" cookie */
804   { if ( (!cookie_expired(&cookie)) && (cookie_storable_h(hostinfo)) )
805     { resource->flags |= rfCookieStorer;
806       lcookie = cookie_allocate();
807       *lcookie = cookie; lcookie->next = hostinfo->cookies;
808       hostinfo->cookies = lcookie; hostinfo->cookiecount++;
809       goto next_cookie;
810     }
811   }
812 
813   ignore_cookie:
814   __cookie_deallocate(&cookie);
815 
816   /* Look for further cookies */
817 
818   next_cookie:
819   if (*text == ',') { text++; cookie_skip_whitespace(&text); goto loop; }
820 }
821 
822 #undef is_attr_set
823 
824 
825 /* Sending */
826 
827 typedef struct
828 { const char* text; /* the text to be sent for a single cookie */
829   unsigned short domainlen; /* length of the cookie->domain string */
830   unsigned short pathlen; /* length of the cookie->path string */
831 } tCookieSorterElement;
832 
cookie_sorter(const void * _a,const void * _b)833 static int cookie_sorter(const void* _a, const void* _b)
834 /* sorts by domain length (voluntary) and path length (required by RFC2965,
835    3.3.4) */
836 { const tCookieSorterElement *a = (const tCookieSorterElement*) _a,
837     *b = (const tCookieSorterElement*) _b;
838   unsigned short alen = a->pathlen, blen = b->pathlen;
839   if (alen != blen) return( ((int) blen) - ((int) alen) );
840   return( ((int) b->domainlen) - ((int) a->domainlen) );
841 }
842 
cookie_collect(tResource * resource)843 static one_caller const char* cookie_collect(tResource* resource)
844 /* collects all cookies which should be sent for the <resource> */
845 { const char *retval = strEmpty, *reshn, *reseff, *respath;
846   tHashIndex bucket;
847   unsigned short count, maxcount; /* number of cookies to be sent */
848   /*@relnull@*/ tCookieSorterElement* sorter_base; /* cookie texts */
849   tCookieVersion highest_version;
850 
851   if (!cookie_config_allows(resource)) goto out0; /* the most likely case */
852   if (resource->flags & (rfIsRedirection | rfIsEmbedded)) /* RFC2965, 3.3.6 */
853     goto out0;
854 
855   reshn = resource2textual_host(resource)->hostname;
856   if (reshn == NULL) goto out0; /* "should not happen" */
857   reseff = effective_hostname(reshn, falsE);
858 
859   retval = my_strdup("Cookie2: $Version=\"1\"\r\n"); /* RFC2965, 3.3.5, 9.1 */
860   sorter_base = NULL; count = maxcount = 0; highest_version = 0;
861   respath = null2empty(resource->uri_data->path);
862 
863   /* Traverse the cookie lists of all hosts and collect applicable cookies */
864 
865   for (bucket = 0; bucket < HASHTABLESIZE_CHI; bucket++)
866   { tCachedHostInformation* hostinfo = chi_head[bucket];
867     while (hostinfo != NULL)
868     { tCookie* cookie = hostinfo->cookies;
869       tBoolean found_expired = falsE;
870       while (cookie != NULL)
871       { static const char strAttrPort[] = "; $Port";
872         const char *domain, *path, *quote, *spfdomain, *spfpath, *spfport;
873         char *spfbuf, *temp;
874         unsigned char portlistlen;
875         tCookieFlags flags;
876         tCookieVersion version;
877 
878         /* Check whether this cookie should/may be sent */
879 
880         if (cookie_expired(cookie)) { found_expired = truE; goto cont; }
881         flags = cookie->flags;
882         if ( (flags & coofTlsOnly)
883 #if OPTION_TLS
884              && (!is_tlslike(resource->protocol))
885 #endif
886            )
887           goto cont;
888         domain = cookie->domain;
889         if (!cookie_domainmatch(reseff, null2empty(domain))) goto cont;
890         path = cookie->path;
891         if (!cookie_pathmatch(respath, null2empty(path))) goto cont;
892 
893         portlistlen = cookie->portlistlen;
894         if (portlistlen > 0)
895         { tPortnumber resport = resource->uri_data->portnumber,
896             *p = cookie->portlist;
897           unsigned short i;
898           tBoolean allowed = falsE;
899           for (i = 0; i < portlistlen; i++)
900           { if (p[i] == resport) { allowed = truE; break; }
901           }
902           if (!allowed) goto cont;
903         }
904 
905         /* Okay, actually wanna send this cookie */
906 
907         if (count >= maxcount) /* need to allocate more space */
908         { if (maxcount >= COOKIE_MAXNUM_SEND) goto out; /* too many cookies */
909           maxcount += 20;
910           sorter_base = memory_reallocate(sorter_base,
911             maxcount * sizeof(tCookieSorterElement), mapOther);
912         }
913         version = cookie->version;
914         quote = ( (version > 0) ? strDoubleQuote : strEmpty );
915         if ( (domain == NULL) || (version == 0) ) spfdomain = strEmpty;
916         else
917         { my_spf(NULL, 0, &temp, "; $Domain=\"%s\"", domain);
918           spfdomain = my_spf_use(temp);
919         }
920         if ( (path == NULL) || (version == 0) ) spfpath = strEmpty;
921         else
922         { my_spf(NULL, 0, &temp, "; $Path=\"%s\"", path);
923           spfpath = my_spf_use(temp);
924         }
925 
926         if (flags & coofPort)
927         { const char* portstr = cookie->portstr;
928           if (portstr == NULL) spfport = strAttrPort;
929           else
930           { my_spf(NULL, 0, &temp, "%s=\"%s\"", strAttrPort, portstr);
931             spfport = my_spf_use(temp);
932           }
933         }
934         else spfport = strEmpty;
935 
936         my_spf(NULL, 0, &spfbuf, "%s=%s%s%s%s%s%s", cookie->name, quote,
937           cookie->value, quote, spfpath, spfdomain, spfport);
938         my_spf_cleanup(strEmpty, spfdomain); my_spf_cleanup(strEmpty, spfpath);
939         if ( (spfport != strEmpty) && (spfport != strAttrPort) )
940           memory_deallocate(spfport);
941         sorter_base[count].text = my_spf_use(spfbuf);
942         sorter_base[count].domainlen = strlen(null2empty(domain));
943         sorter_base[count].pathlen = strlen(null2empty(path));
944         count++;
945         if (highest_version < version) highest_version = version;
946         cont:
947         cookie = cookie->next;
948       } /* cookies */
949       if (found_expired) cookie_remove_expired(hostinfo);
950       hostinfo = hostinfo->next;
951     } /* hostinfos */
952   } /* buckets */
953   out:
954   if (count > 0) /* actually send a "Cookie:" header */
955   { char *spfbuf, *cookies, *dest;
956     unsigned short cnt;
957     size_t len = 0;
958     resource->flags |= rfCookieSender;
959     qsort(sorter_base, count, sizeof(tCookieSorterElement), cookie_sorter);
960     for (cnt = 0; cnt < count; cnt++) len += strlen(sorter_base[cnt].text);
961     len += 2 * (count - 1) + 1; /* for the "; " separators and trailing '\0' */
962     cookies = dest = __memory_allocate(len, mapString);
963     for (cnt = 0; cnt < count; cnt++)
964     { const char *text = sorter_base[cnt].text, *src = text;
965       char ch;
966       if (cnt > 0) { *dest++ = ';'; *dest++ = ' '; }
967       while ( (ch = *src++) != '\0' ) *dest++ = ch;
968       memory_deallocate(text);
969     }
970     memory_deallocate(sorter_base);
971     *dest = '\0';
972     my_spf(NULL, 0, &spfbuf, "%sCookie: %s%s\r\n", retval,
973       ( (highest_version > 0) ? "$Version=\"1\"; " : strEmpty ), /* CHECKME! */
974       cookies);
975     memory_deallocate(retval); memory_deallocate(cookies);
976     retval = my_spf_use(spfbuf);
977   }
978   effective_hostname_cleanup(reshn, reseff);
979   out0:
980   return(retval);
981 }
982 
983 #define cookie_collect_cleanup(ptr) \
984   do { if (ptr != strEmpty) memory_deallocate(ptr); } while (0)
985 
986 
987 /* Inspection */
988 
cookie_reviewlist(const tCachedHostInformation * hostinfo)989 static one_caller char* cookie_reviewlist(const tCachedHostInformation*
990   hostinfo)
991 /* constructs a "review list" (an HTML page snippet) for "about:hostinfo"
992    resources */
993 { char* retval;
994   tCookie* cookie = hostinfo->cookies;
995   if (cookie == NULL) return(NULL);
996 
997   retval = my_strdup(_("\n<br>cookies:"));
998   while (cookie != NULL)
999   { const char *portstr, *expiry, *cn = cookie->name, *cv = cookie->value,
1000       *cd = null2empty(cookie->domain), *cp = null2empty(cookie->path),
1001       *cc = null2empty(cookie->comment), *hn = htmlify(cn), *hv = htmlify(cv),
1002       *hd = htmlify(cd), *hp = htmlify(cp), *hc = htmlify(cc);
1003     char *spfbuf;
1004     tCookieFlags flags = cookie->flags;
1005 
1006     if (cookie->portlistlen <= 0) portstr = strEmpty;
1007     else
1008     { char* p = strbuf;
1009       tPortnumber* portlist = cookie->portlist;
1010       tBoolean is_first = truE;
1011       unsigned short i;
1012       p += sprint_safe(p, _(", ports="));
1013       for (i = 0; i < cookie->portlistlen; i++)
1014       { if (is_first) is_first = falsE;
1015         else p += sprint_safe(p, ",");
1016         p += sprint_safe(p, strPercd, ntohs(portlist[i]));
1017       }
1018       portstr = strbuf;
1019     }
1020 
1021     if (flags & coofUseExpiry)
1022     { sprint_safe(strbuf2, _("expiry=%ld, "), cookie->expiry);
1023       expiry = strbuf2;
1024     }
1025     else expiry = strEmpty;
1026 
1027     my_spf(NULL, 0, &spfbuf,
1028       _("%s\n<br>%s=%s, domain=%s, path=%s, comment=%s, %sflags=%d%s%s%s"),
1029       retval, hn, hv, hd, hp, hc, expiry, cookie->flags, portstr,
1030       ( (flags & coofExpired) ? _(" (expired)") : strEmpty ),
1031       ( (flags & coofTlsOnly) ? _(" (secure)") : strEmpty ));
1032     memory_deallocate(retval); retval = my_spf_use(spfbuf);
1033     htmlify_cleanup(cn, hn); htmlify_cleanup(cv, hv); htmlify_cleanup(cd, hd);
1034     htmlify_cleanup(cp, hp); htmlify_cleanup(cc, hc);
1035     cookie = cookie->next;
1036   }
1037   return(retval);
1038 }
1039 
1040 
1041 /* Initialization */
1042 
cookie_initialize(void)1043 static one_caller void __init cookie_initialize(void)
1044 { /* Calculate the time offset between UTC/GMT and the local time. This code is
1045      a good candidate for the Ugliest Time Calculation / Gross Mental Teardown
1046      award... The point is to avoid buggy or non-portable library functions
1047      (and "struct tm" as a whole). */
1048   time_t now = my_time(), test; /* <now> is the UTC time */
1049   const char* str = ctime(&now); /* convert UTC number to local-time string */
1050   if ( (str != NULL) && (parse_datetime(str, &test)) )
1051   { /* "should" always work; this converts local-time string to local-time
1052        number; now we just have to compare UTC number and local-time number: */
1053     localtime_offset = test - now;
1054   }
1055 }
1056