1 /** @file
2 
3   A brief file description
4 
5   @section license License
6 
7   Licensed to the Apache Software Foundation (ASF) under one
8   or more contributor license agreements.  See the NOTICE file
9   distributed with this work for additional information
10   regarding copyright ownership.  The ASF licenses this file
11   to you under the Apache License, Version 2.0 (the
12   "License"); you may not use this file except in compliance
13   with the License.  You may obtain a copy of the License at
14 
15       http://www.apache.org/licenses/LICENSE-2.0
16 
17   Unless required by applicable law or agreed to in writing, software
18   distributed under the License is distributed on an "AS IS" BASIS,
19   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
20   See the License for the specific language governing permissions and
21   limitations under the License.
22  */
23 
24 #include "tscore/ink_platform.h"
25 
26 #include "HttpTransact.h"
27 #include "HttpTransactHeaders.h"
28 #include "HttpTransactCache.h"
29 #include <ctime>
30 #include "HTTP.h"
31 #include "HttpCompat.h"
32 #include "tscore/InkErrno.h"
33 
34 /**
35   Find the pointer and length of an etag, after stripping off any leading
36   "W/" prefix, and surrounding double quotes.
37 
38 */
39 inline static const char *
find_etag(const char * raw_tag_field,int raw_tag_field_len,int * length)40 find_etag(const char *raw_tag_field, int raw_tag_field_len, int *length)
41 {
42   const char *quote;
43   int etag_length        = 0;
44   const char *etag_start = raw_tag_field;
45   const char *etag_end   = raw_tag_field + raw_tag_field_len;
46 
47   if ((raw_tag_field_len >= 2) && (etag_start[0] == 'W' && etag_start[1] == '/')) {
48     etag_start += 2;
49   }
50 
51   etag_length = etag_end - etag_start;
52 
53   if ((etag_start < etag_end) && (*etag_start == '"')) {
54     ++etag_start;
55     --etag_length;
56     quote = static_cast<const char *>(memchr(etag_start, '"', etag_length));
57     if (quote) {
58       etag_length = quote - etag_start;
59     }
60   }
61   *length = etag_length;
62   return etag_start;
63 }
64 
65 /**
66   Match an etag raw_tag_field with a list of tags in the comma-separated
67   string field_to_match, using strong rules.
68 
69 */
70 inline static bool
do_strings_match_strongly(const char * raw_tag_field,int raw_tag_field_len,const char * comma_sep_tag_list,int comma_sep_tag_list_len)71 do_strings_match_strongly(const char *raw_tag_field, int raw_tag_field_len, const char *comma_sep_tag_list,
72                           int comma_sep_tag_list_len)
73 {
74   StrList tag_list;
75   const char *etag_start;
76   int n, etag_length;
77 
78   // Can never match a weak tag with a strong compare
79   if ((raw_tag_field_len >= 2) && (raw_tag_field[0] == 'W' && raw_tag_field[1] == '/')) {
80     return false;
81   }
82   // Find the unalterated tag
83   etag_start = find_etag(raw_tag_field, raw_tag_field_len, &etag_length);
84 
85   // Rip the field list into a comma-separated field list
86   HttpCompat::parse_comma_list(&tag_list, comma_sep_tag_list, comma_sep_tag_list_len);
87 
88   // Loop over all the tags in the tag list
89   for (Str *tag = tag_list.head; tag; tag = tag->next) {
90     // If field is "*", then we got a match
91     if ((tag->len == 1) && (tag->str[0] == '*')) {
92       return true;
93     }
94 
95     n = 0;
96 
97     if ((static_cast<int>(tag->len - n) == etag_length) && (strncmp(etag_start, tag->str + n, etag_length) == 0)) {
98       return true;
99     }
100   }
101 
102   return false;
103 }
104 
105 /**
106   Match an etag raw_tag_field with a list of tags in the comma-separated
107   string field_to_match, using weak rules.
108 
109 */
110 inline static bool
do_strings_match_weakly(const char * raw_tag_field,int raw_tag_field_len,const char * comma_sep_tag_list,int comma_sep_tag_list_len)111 do_strings_match_weakly(const char *raw_tag_field, int raw_tag_field_len, const char *comma_sep_tag_list,
112                         int comma_sep_tag_list_len)
113 {
114   StrList tag_list;
115   const char *etag_start;
116   const char *cur_tag;
117   int etag_length, cur_tag_len;
118 
119   // Find the unalterated tag
120   etag_start = find_etag(raw_tag_field, raw_tag_field_len, &etag_length);
121 
122   // Rip the field list into a comma-separated field list
123   HttpCompat::parse_comma_list(&tag_list, comma_sep_tag_list, comma_sep_tag_list_len);
124 
125   for (Str *tag = tag_list.head; tag; tag = tag->next) {
126     // If field is "*", then we got a match
127     if ((tag->len == 1) && (tag->str[0] == '*')) {
128       return true;
129     }
130 
131     // strip off the leading 'W/' and quotation marks from the
132     // current tag, then compare for equality with above tag.
133     cur_tag = find_etag(tag->str, tag->len, &cur_tag_len);
134     if ((cur_tag_len == etag_length) && (strncmp(cur_tag, etag_start, cur_tag_len) == 0)) {
135       return true;
136     }
137   }
138   return false;
139 }
140 
141 inline static bool
is_asterisk(char * s)142 is_asterisk(char *s)
143 {
144   return ((s[0] == '*') && (s[1] == NUL));
145 }
146 
147 inline static bool
is_empty(char * s)148 is_empty(char *s)
149 {
150   return (s[0] == NUL);
151 }
152 
153 /**
154   Given a set of alternates, select the best match.
155 
156   The current school of thought: quality 1st, freshness 2nd.  Loop through
157   alternates and find the one with the highest quality factor. Then
158   determine if it is fresh enough. If not, find the next best match. In
159   keeping with "quality is job 1", subsequent matches will only be
160   considered if their quality is equal to the quality of the first match.
161 
162   @return index in cache alternates vector.
163 
164 */
165 int
SelectFromAlternates(CacheHTTPInfoVector * cache_vector,HTTPHdr * client_request,const OverridableHttpConfigParams * http_config_params)166 HttpTransactCache::SelectFromAlternates(CacheHTTPInfoVector *cache_vector, HTTPHdr *client_request,
167                                         const OverridableHttpConfigParams *http_config_params)
168 {
169   time_t current_age, best_age = CacheHighAgeWatermark;
170   time_t t_now         = 0;
171   int best_index       = -1;
172   float best_Q         = -1.0;
173   float unacceptable_Q = 0.0;
174 
175   int alt_count = cache_vector->count();
176   if (alt_count == 0) {
177     return -1;
178   }
179 
180   Debug("http_match", "[SelectFromAlternates] # alternates = %d", alt_count);
181   Debug("http_seq", "[SelectFromAlternates] %d alternates for this cached doc", alt_count);
182   if (is_debug_tag_set("http_alts")) {
183     fprintf(stderr, "[alts] There are %d alternates for this request header.\n", alt_count);
184   }
185 
186   // so that plugins can make cache reads for http
187   // docs to check if the doc exists in the cache
188   if (!client_request->valid()) {
189     return 0;
190   }
191 
192   for (int i = 0; i < alt_count; i++) {
193     float Q;
194     CacheHTTPInfo *obj       = cache_vector->get(i);
195     HTTPHdr *cached_request  = obj->request_get();
196     HTTPHdr *cached_response = obj->response_get();
197 
198     if (!(obj->object_key_get() == zero_key)) {
199       ink_assert(cached_request->valid());
200       ink_assert(cached_response->valid());
201 
202       Q = calculate_quality_of_match(http_config_params, client_request, cached_request, cached_response);
203 
204       if (alt_count > 1) {
205         if (t_now == 0) {
206           t_now = ink_local_time();
207         }
208         current_age = HttpTransactHeaders::calculate_document_age(obj->request_sent_time_get(), obj->response_received_time_get(),
209                                                                   cached_response, cached_response->get_date(), t_now);
210         // Overflow?
211         if (current_age < 0) {
212           current_age = CacheHighAgeWatermark;
213         }
214       } else {
215         current_age = static_cast<time_t>(0);
216       }
217 
218       if (is_debug_tag_set("http_alts")) {
219         fprintf(stderr, "[alts] ---- alternate #%d (Q = %g) has these request/response hdrs:\n", i + 1, Q);
220         char b[4096];
221         int used, tmp, offset;
222         int done;
223 
224         offset = 0;
225         do {
226           used = 0;
227           tmp  = offset;
228           done = cached_request->print(b, sizeof(b) - 1, &used, &tmp);
229           offset += used;
230           b[used] = '\0';
231           fprintf(stderr, "%s", b);
232         } while (!done);
233 
234         offset = 0;
235         do {
236           used = 0;
237           tmp  = offset;
238           done = cached_response->print(b, sizeof(b) - 1, &used, &tmp);
239           offset += used;
240           b[used] = '\0';
241           fprintf(stderr, "%s", b);
242         } while (!done);
243       }
244 
245       if ((Q > best_Q) || ((Q == best_Q) && (current_age <= best_age))) {
246         best_Q     = Q;
247         best_age   = current_age;
248         best_index = i;
249       }
250     }
251   }
252   Debug("http_seq", "[SelectFromAlternates] Chosen alternate # %d", best_index);
253   if (is_debug_tag_set("http_alts")) {
254     fprintf(stderr, "[alts] and the winner is alternate number %d\n", best_index);
255   }
256 
257   if ((best_index != -1) && (best_Q > unacceptable_Q)) {
258     return best_index;
259   } else {
260     return -1;
261   }
262 }
263 
264 /**
265   For cached req/res and incoming req, return quality of match.
266 
267   The current school of thought: quality 1st, freshness 2nd.  This
268   function takes a user agent request client_request and the two headers
269   for a cached object (obj_client_request and obj_origin_server_response),
270   and returns a floating point number for how well the object matches
271   the client's request.
272 
273   Two factors currently affect a match: Accept headers, which filter and
274   sort the matches, and Vary headers, which constrain whether a dynamic
275   document matches a request.
276 
277   Note: According to the specs, specific matching takes precedence over
278   wildcard matching. For example, listed in precedence: text/html;q=0.5,
279   text/ascii, image/'*', '*'/'*'. So, ideally, in choosing between
280   alternates, we should given preference to those which matched
281   specifically over those which matched with wildcards.
282 
283   @return quality (-1: no match, 0..1: poor..good).
284 
285 */
286 float
calculate_quality_of_match(const OverridableHttpConfigParams * http_config_param,HTTPHdr * client_request,HTTPHdr * obj_client_request,HTTPHdr * obj_origin_server_response)287 HttpTransactCache::calculate_quality_of_match(const OverridableHttpConfigParams *http_config_param, HTTPHdr *client_request,
288                                               HTTPHdr *obj_client_request, HTTPHdr *obj_origin_server_response)
289 {
290   // For PURGE requests, any alternate is good really.
291   if (client_request->method_get_wksidx() == HTTP_WKSIDX_PURGE) {
292     return static_cast<float>(1.0);
293   }
294 
295   // Now calculate a quality based on all sorts of logic
296   float q[4], Q;
297   MIMEField *accept_field;
298   MIMEField *cached_accept_field;
299   MIMEField *content_field;
300 
301   // vary_skip_mask is used as a bitmask, 0b01 or 0b11 depending on the presence of Vary.
302   // This allows us to AND each of the four configs against it; Table:
303   //
304   //   Conf   Mask          Conf   Mask         Conf   Mask
305   //   ----   ----          ----   ----         ----   ----
306   //    00  &  01 == false   01  &  01 == true   10  &  01 == false
307   //    00  &  11 == false   01  &  11 == true   10  &  11 == true
308   //
309   // A true value means the check for that config can be skipped. Note: from a users
310   // perspective, the configs are simply 0, 1 or 2.
311   unsigned int vary_skip_mask = obj_origin_server_response->presence(MIME_PRESENCE_VARY) ? 1 : 3;
312 
313   // Make debug output happy
314   q[1] = (q[2] = (q[3] = -2.0));
315 
316   // This content_field is used for a couple of headers, so get it first
317   content_field = obj_origin_server_response->field_find(MIME_FIELD_CONTENT_TYPE, MIME_LEN_CONTENT_TYPE);
318 
319   // Accept: header
320   if (http_config_param->ignore_accept_mismatch & vary_skip_mask) {
321     // Ignore it
322     q[0] = 1.0;
323   } else {
324     accept_field = client_request->field_find(MIME_FIELD_ACCEPT, MIME_LEN_ACCEPT);
325 
326     // A NULL Accept or a NULL Content-Type field are perfect matches.
327     if (content_field == nullptr || accept_field == nullptr) {
328       q[0] = 1.0; // TODO: Why should this not be 1.001 ?? // leif
329     } else {
330       q[0] = calculate_quality_of_accept_match(accept_field, content_field);
331     }
332   }
333 
334   if (q[0] >= 0.0) {
335     // Accept-Charset: header
336     if (http_config_param->ignore_accept_charset_mismatch & vary_skip_mask) {
337       // Ignore it
338       q[1] = 1.0;
339     } else {
340       accept_field        = client_request->field_find(MIME_FIELD_ACCEPT_CHARSET, MIME_LEN_ACCEPT_CHARSET);
341       cached_accept_field = obj_client_request->field_find(MIME_FIELD_ACCEPT_CHARSET, MIME_LEN_ACCEPT_CHARSET);
342 
343       // absence in both requests counts as exact match
344       if (accept_field == nullptr && cached_accept_field == nullptr) {
345         Debug("http_alternate", "Exact match for ACCEPT CHARSET (not in request nor cache)");
346         q[1] = 1.001; // slightly higher weight to this guy
347       } else {
348         q[1] = calculate_quality_of_accept_charset_match(accept_field, content_field, cached_accept_field);
349       }
350     }
351 
352     if (q[1] >= 0.0) {
353       // Accept-Encoding: header
354       if (http_config_param->ignore_accept_encoding_mismatch & vary_skip_mask) {
355         // Ignore it
356         q[2] = 1.0;
357       } else {
358         accept_field        = client_request->field_find(MIME_FIELD_ACCEPT_ENCODING, MIME_LEN_ACCEPT_ENCODING);
359         content_field       = obj_origin_server_response->field_find(MIME_FIELD_CONTENT_ENCODING, MIME_LEN_CONTENT_ENCODING);
360         cached_accept_field = obj_client_request->field_find(MIME_FIELD_ACCEPT_ENCODING, MIME_LEN_ACCEPT_ENCODING);
361 
362         // absence in both requests counts as exact match
363         if (accept_field == nullptr && cached_accept_field == nullptr) {
364           Debug("http_alternate", "Exact match for ACCEPT ENCODING (not in request nor cache)");
365           q[2] = 1.001; // slightly higher weight to this guy
366         } else {
367           q[2] = calculate_quality_of_accept_encoding_match(accept_field, content_field, cached_accept_field);
368         }
369       }
370 
371       if (q[2] >= 0.0) {
372         // Accept-Language: header
373         if (http_config_param->ignore_accept_language_mismatch & vary_skip_mask) {
374           // Ignore it
375           q[3] = 1.0;
376         } else {
377           accept_field        = client_request->field_find(MIME_FIELD_ACCEPT_LANGUAGE, MIME_LEN_ACCEPT_LANGUAGE);
378           content_field       = obj_origin_server_response->field_find(MIME_FIELD_CONTENT_LANGUAGE, MIME_LEN_CONTENT_LANGUAGE);
379           cached_accept_field = obj_client_request->field_find(MIME_FIELD_ACCEPT_LANGUAGE, MIME_LEN_ACCEPT_LANGUAGE);
380 
381           // absence in both requests counts as exact match
382           if (accept_field == nullptr && cached_accept_field == nullptr) {
383             Debug("http_alternate", "Exact match for ACCEPT LANGUAGE (not in request nor cache)");
384             q[3] = 1.001; // slightly higher weight to this guy
385           } else {
386             q[3] = calculate_quality_of_accept_language_match(accept_field, content_field, cached_accept_field);
387           }
388         }
389       }
390     }
391   }
392 
393   // final quality is minimum Q, or -1, if some match failed //
394   Q = ((q[0] < 0) || (q[1] < 0) || (q[2] < 0) || (q[3] < 0)) ? -1.0 : q[0] * q[1] * q[2] * q[3];
395 
396   Debug("http_match", "    CalcQualityOfMatch: Accept match = %g", q[0]);
397   Debug("http_seq", "    CalcQualityOfMatch: Accept match = %g", q[0]);
398   Debug("http_alternate", "Content-Type and Accept %f", q[0]);
399 
400   Debug("http_match", "    CalcQualityOfMatch: AcceptCharset match = %g", q[1]);
401   Debug("http_seq", "    CalcQualityOfMatch: AcceptCharset match = %g", q[1]);
402   Debug("http_alternate", "Content-Type and Accept-Charset %f", q[1]);
403 
404   Debug("http_match", "    CalcQualityOfMatch: AcceptEncoding match = %g", q[2]);
405   Debug("http_seq", "    CalcQualityOfMatch: AcceptEncoding match = %g", q[2]);
406   Debug("http_alternate", "Content-Encoding and Accept-Encoding %f", q[2]);
407 
408   Debug("http_match", "    CalcQualityOfMatch: AcceptLanguage match = %g", q[3]);
409   Debug("http_seq", "    CalcQualityOfMatch: AcceptLanguage match = %g", q[3]);
410   Debug("http_alternate", "Content-Language and Accept-Language %f", q[3]);
411 
412   Debug("http_alternate", "Mult's Quality Factor: %f", Q);
413   Debug("http_alternate", "----------End of Alternate----------");
414 
415   int force_alt = 0;
416 
417   if (Q > 0.0) {
418     APIHook *hook;
419     HttpAltInfo info;
420     float qvalue;
421 
422     hook = http_global_hooks->get(TS_HTTP_SELECT_ALT_HOOK);
423     if (hook) {
424       info.m_client_req.copy_shallow(client_request);
425       info.m_cached_req.copy_shallow(obj_client_request);
426       info.m_cached_resp.copy_shallow(obj_origin_server_response);
427       qvalue = 1.0;
428 
429       while (hook) {
430         info.m_qvalue = 1.0;
431         hook->invoke(TS_EVENT_HTTP_SELECT_ALT, &info);
432         hook = hook->m_link.next;
433         if (info.m_qvalue < 0.0) {
434           info.m_qvalue = 0.0;
435         } else if (info.m_qvalue > 1.0) {
436           if (info.m_qvalue == FLT_MAX) {
437             force_alt = 1;
438           }
439           info.m_qvalue = 1.0;
440         }
441         qvalue *= info.m_qvalue;
442       }
443       Q *= qvalue;
444 
445       // Clear out any SDK allocated values from the
446       //   hdr handles
447       info.m_client_req.clear();
448       info.m_cached_req.clear();
449       info.m_cached_resp.clear();
450     }
451   }
452 
453   if (Q >= 0.0 && !force_alt) { // make sense to check 'variability' only if Q >= 0.0
454     // set quality to -1, if cached copy would vary for this request //
455     Variability_t variability = CalcVariability(http_config_param, client_request, obj_client_request, obj_origin_server_response);
456 
457     if (variability != VARIABILITY_NONE) {
458       Q = -1.0;
459     }
460 
461     Debug("http_match", "    CalcQualityOfMatch: CalcVariability says variability = %d", (variability != VARIABILITY_NONE));
462     Debug("http_seq", "    CalcQualityOfMatch: CalcVariability says variability = %d", (variability != VARIABILITY_NONE));
463     Debug("http_match", "    CalcQualityOfMatch: Returning final Q = %g", Q);
464     Debug("http_seq", "    CalcQualityOfMatch: Returning final Q = %g", Q);
465   }
466 
467   return Q;
468 }
469 
470 /**
471   Match request Accept with response Content-Type.
472 
473   If the Accept field mime-type value is *, do not attempt to match,
474   but note the q value for the wildcard match. If the type is not *,
475   but the subtype is * and the Accept type and Content type match,
476   again do not attempt to match, but note the q value. If neither of
477   these two cases, match, keeping track of the highest q value for the
478   matches. At the end of the loop over the Accept header field values,
479   if the highest q value is -1.0 (there was no specific match), if there
480   was a wildcard subtype match, set the q value to the wildcard subtype q
481   value. If there is still no match, and there is a wildcard type match,
482   set the q value to the wildcard type q value.
483 
484   We allow no Content-Type headers in responses to match with quality 1.0.
485 
486   @return quality (-1: no match, 0..1: poor..good).
487 
488 */
489 float
calculate_quality_of_accept_match(MIMEField * accept_field,MIMEField * content_field)490 HttpTransactCache::calculate_quality_of_accept_match(MIMEField *accept_field, MIMEField *content_field)
491 {
492   float q = -1.0;
493   const char *c_raw, *a_raw;
494   int c_raw_len, a_raw_len;
495   char c_type[32], c_subtype[32];
496   Str *a_value;
497   StrList c_param_list, a_values_list;
498   bool wildcard_type_present    = false;
499   bool wildcard_subtype_present = false;
500   float wildcard_type_q         = 1.0;
501   float wildcard_subtype_q      = 1.0;
502 
503   ink_assert((accept_field != nullptr) && (content_field != nullptr));
504 
505   // Extract the content-type field value before the semicolon.
506   // This has to be done just once because assuming single
507   // content-type in document. If more than one content
508   // type, will have to do as in content-language, content-
509   // encoding matching where we loop over both accept and
510   // content-type fields.
511 
512   c_raw = content_field->value_get(&c_raw_len);
513   HttpCompat::parse_semicolon_list(&c_param_list, c_raw, c_raw_len);
514   Str *c_param = c_param_list.head;
515 
516   if (!c_param) {
517     return (1.0);
518   }
519   // Parse the type and subtype of the Content-Type field.
520   HttpCompat::parse_mime_type(c_param->str, c_type, c_subtype, sizeof(c_type), sizeof(c_subtype));
521 
522   // Special case for webp because Safari is has Accept: */*, but doesn't support webp
523   bool content_type_webp = ((strcasecmp("webp", c_subtype) == 0) && (strcasecmp("image", c_type) == 0));
524 
525   // Now loop over Accept field values.
526   // TODO: Should we check the return value (count) from this?
527   accept_field->value_get_comma_list(&a_values_list);
528 
529   for (a_value = a_values_list.head; a_value; a_value = a_value->next) {
530     // Get the raw string to the current comma-sep Accept field value
531     a_raw     = a_value->str;
532     a_raw_len = a_value->len;
533 
534     // Extract the field value before the semicolon
535     StrList a_param_list;
536     HttpCompat::parse_semicolon_list(&a_param_list, a_raw, a_raw_len);
537 
538     // Read the next type/subtype media-range
539     Str *a_param = a_param_list.head;
540     if (!a_param) {
541       continue;
542     }
543 
544     // Parse the type and subtype of the Accept field
545     char a_type[32], a_subtype[32];
546     HttpCompat::parse_mime_type(a_param->str, a_type, a_subtype, sizeof(a_type), sizeof(a_subtype));
547 
548     Debug("http_match", "matching Content-type; '%s/%s' with Accept value '%s/%s'\n", c_type, c_subtype, a_type, a_subtype);
549 
550     bool wildcard_found = true;
551     // Only do wildcard checks if the content type is not image/webp
552     if (content_type_webp == false) {
553       // Is there a wildcard in the type or subtype?
554       if (is_asterisk(a_type)) {
555         wildcard_type_present = true;
556         wildcard_type_q       = HttpCompat::find_Q_param_in_strlist(&a_param_list);
557       } else if (is_asterisk(a_subtype) && (strcasecmp(a_type, c_type) == 0)) {
558         wildcard_subtype_present = true;
559         wildcard_subtype_q       = HttpCompat::find_Q_param_in_strlist(&a_param_list);
560       } else {
561         wildcard_found = false;
562       }
563     }
564     if (content_type_webp == true || wildcard_found == false) {
565       // No wildcard or the content type is image/webp. Do explicit matching of accept and content values.
566       if ((strcasecmp(a_type, c_type) == 0) && (strcasecmp(a_subtype, c_subtype) == 0)) {
567         float tq;
568         tq = HttpCompat::find_Q_param_in_strlist(&a_param_list);
569         q  = (tq > q ? tq : q);
570       }
571     }
572   }
573 
574   // At this point either there is an explicit match, in
575   // which case q will not be -1.0 and will be returned.
576   // If there was no explicit match, but the accept field
577   // had wildcards, return the wildcard match q value.
578 
579   // No explicit match, but wildcard subtype match
580   if ((q == -1.0) && (wildcard_subtype_present == true)) {
581     q = wildcard_subtype_q;
582   }
583   // No explicit match, but wildcard type match.
584   if ((q == -1.0) && (wildcard_type_present == true)) {
585     q = wildcard_type_q;
586   }
587 
588   return (q);
589 }
590 
591 /**
592   Match request Accept-Charset with response Content-Type.
593 
594   Extract the response charset from the Content-Type field - the charset
595   is after the semicolon. Loop through the charsets in the request's
596   Accept-Charset field. If the Accept-Charset value is a wildcard, do not
597   attempt to match. Otherwise match and note the highest q value. If after
598   the loop the q value is -1, indicating no match, then if Accept-Charset
599   had a wildcard, allow it to match - setting q to the wildcard q value.
600   If there is still no match and the Content-Type was the default charset,
601   allow a match with a q value of 1.0.
602 
603   We allow no Content-Type headers in responses to match with quality 1.0.
604 
605   @return quality (-1: no match, 0..1: poor..good).
606 
607 */
608 static inline bool
does_charset_match(char * charset1,char * charset2)609 does_charset_match(char *charset1, char *charset2)
610 {
611   return (is_asterisk(charset1) || is_empty(charset1) || (strcasecmp(charset1, charset2) == 0));
612 }
613 
614 float
calculate_quality_of_accept_charset_match(MIMEField * accept_field,MIMEField * content_field,MIMEField * cached_accept_field)615 HttpTransactCache::calculate_quality_of_accept_charset_match(MIMEField *accept_field, MIMEField *content_field,
616                                                              MIMEField *cached_accept_field)
617 {
618   float q = -1.0;
619   const char *c_raw, *a_raw, *ca_raw;
620   int c_raw_len, a_raw_len, ca_raw_len;
621   StrList a_values_list;
622   Str *a_value;
623   char c_charset[128];
624   char *a_charset;
625   int a_charset_len;
626   const char *default_charset = "utf-8";
627   bool wildcard_present       = false;
628   float wildcard_q            = 1.0;
629 
630   // prefer exact matches
631   if (accept_field && cached_accept_field) {
632     a_raw  = accept_field->value_get(&a_raw_len);
633     ca_raw = cached_accept_field->value_get(&ca_raw_len);
634     if (a_raw && ca_raw && a_raw_len == ca_raw_len && !strncmp(a_raw, ca_raw, a_raw_len)) {
635       Debug("http_alternate", "Exact match for ACCEPT CHARSET");
636       return static_cast<float>(1.001); // slightly higher weight to this guy
637     }
638   }
639   // return match if either ac or ct is missing
640   // this check is different from accept-encoding
641   if (accept_field == nullptr || content_field == nullptr) {
642     return static_cast<float>(1.0);
643   }
644   // get the charset of this content-type //
645   c_raw = content_field->value_get(&c_raw_len);
646   if (!HttpCompat::lookup_param_in_semicolon_string(c_raw, c_raw_len, "charset", c_charset, sizeof(c_charset) - 1)) {
647     ink_strlcpy(c_charset, default_charset, sizeof(c_charset));
648   }
649   // Now loop over Accept-Charset field values.
650   // TODO: Should we check the return value (count) from this?
651   accept_field->value_get_comma_list(&a_values_list);
652 
653   for (a_value = a_values_list.head; a_value; a_value = a_value->next) {
654     // Get the raw string to the current comma-sep Accept-Charset field value
655     a_raw     = a_value->str;
656     a_raw_len = a_value->len;
657 
658     // Extract the field value before the semicolon
659     StrList a_param_list(true); // FIXME: copies & NUL-terminates strings
660     HttpCompat::parse_semicolon_list(&a_param_list, a_raw, a_raw_len);
661 
662     if (a_param_list.head) {
663       a_charset     = const_cast<char *>(a_param_list.head->str);
664       a_charset_len = a_param_list.head->len;
665     } else {
666       continue;
667     }
668 
669     //      printf("matching Content-type; '%s' with Accept-Charset value '%s'\n",
670     //             c_charset,a_charset);
671 
672     // dont match wildcards //
673     if ((a_charset_len == 1) && (a_charset[0] == '*')) {
674       wildcard_present = true;
675       wildcard_q       = HttpCompat::find_Q_param_in_strlist(&a_param_list);
676     } else {
677       // if type matches, get the Q factor //
678       if (does_charset_match(a_charset, c_charset)) {
679         float tq;
680         tq = HttpCompat::find_Q_param_in_strlist(&a_param_list);
681         q  = (tq > q ? tq : q);
682       }
683     }
684   }
685 
686   // if no match and wildcard present, allow match //
687   if ((q == -1.0) && (wildcard_present == true)) {
688     q = wildcard_q;
689   }
690   // if no match, still allow default_charset //
691   if ((q == -1) && (strcasecmp(c_charset, default_charset) == 0)) {
692     q = 1.0;
693   }
694   return (q);
695 }
696 
697 /**
698   Match request Accept-Encoding with response Content-Encoding.
699 
700   First determine if the cached document has identity encoding. This
701   can be the case if the document has no Content-Encoding header field
702   or if the Content-Encoding field explicitly lists "identity". Then,
703   if there is no Accept-Encoding header and the cached response uses
704   identity encoding return a match. If there is no Accept-Encoding header
705   and the cached document uses some other form of encoding, also return
706   a match, albeit one with a slightly lower q value (0.999).
707 
708   If none of the above cases occurs, compare Content-Encoding with
709   Accept-Encoding, by looping over the Content-Encoding values (there
710   may be more than one, since a document may be gzipped, followed by
711   compressed, etc.). If any of the Content-Encoding values are not in
712   the Accept-Encoding header, exit the loop. Before exiting, if there
713   has not been a match, match a wildcard in the Accept-Encoding field
714   and if still no match, match an identity encoding - this may happen
715   if the request did not list "identity" in the Accept-Encoding field,
716   but the response listed it in the Content-Encoding field. In this last
717   case, match with a q value of 0.001.
718 
719   The return values are:
720     - -1.0: Doesn't match
721     - 0.999: No Accept-Encoding header, and Content-Encoding does not list
722       "identity".
723     - 0.001: Accept-Encoding was not empty, but Content-Encoding was
724       either empty or explicitly listed "identity".
725     - 0.0..1.0: Matches with a quality between 0 (poor) and 1 (good).
726 
727   @return quality (-1: no match, 0..1: poor..good).
728 
729 */
730 static inline bool
does_encoding_match(char * enc1,const char * enc2)731 does_encoding_match(char *enc1, const char *enc2)
732 {
733   if (is_asterisk(enc1) || ((strcasecmp(enc1, enc2)) == 0)) {
734     return true;
735   }
736 
737   // rfc2616,sec3.5: applications SHOULD consider "x-gzip" and "x-compress" to be
738   //                equivalent to "gzip" and "compress" respectively
739   if ((!strcasecmp(enc1, "gzip") && !strcasecmp(enc2, "x-gzip")) || (!strcasecmp(enc1, "x-gzip") && !strcasecmp(enc2, "gzip")) ||
740       (!strcasecmp(enc1, "compress") && !strcasecmp(enc2, "x-compress")) ||
741       (!strcasecmp(enc1, "x-compress") && !strcasecmp(enc2, "compress"))) {
742     return true;
743   }
744 
745   return false;
746 }
747 
748 bool
match_content_encoding(MIMEField * accept_field,const char * encoding_identifier)749 HttpTransactCache::match_content_encoding(MIMEField *accept_field, const char *encoding_identifier)
750 {
751   Str *a_value;
752   const char *a_raw;
753   StrList a_values_list;
754   if (!accept_field) {
755     return false;
756   }
757   // TODO: Should we check the return value (count) here?
758   accept_field->value_get_comma_list(&a_values_list);
759 
760   for (a_value = a_values_list.head; a_value; a_value = a_value->next) {
761     char *a_encoding = nullptr;
762     StrList a_param_list;
763     a_raw = a_value->str;
764     HttpCompat::parse_semicolon_list(&a_param_list, a_raw);
765     if (a_param_list.head) {
766       a_encoding = const_cast<char *>(a_param_list.head->str);
767     } else {
768       continue;
769     }
770     float q;
771     q = HttpCompat::find_Q_param_in_strlist(&a_param_list);
772     if (q != 0 && does_encoding_match(a_encoding, encoding_identifier)) {
773       return true;
774     }
775   }
776   return false;
777 }
778 
779 // TODO: This used to take a length for c_raw, but that was never used, so removed it from the prototype.
780 static inline bool
match_accept_content_encoding(const char * c_raw,MIMEField * accept_field,bool * wildcard_present,float * wildcard_q,float * q)781 match_accept_content_encoding(const char *c_raw, MIMEField *accept_field, bool *wildcard_present, float *wildcard_q, float *q)
782 {
783   Str *a_value;
784   const char *a_raw;
785   StrList a_values_list;
786 
787   if (!accept_field) {
788     return false;
789   }
790   // loop over Accept-Encoding elements, looking for match //
791   // TODO: Should we check the return value (count) here?
792   accept_field->value_get_comma_list(&a_values_list);
793 
794   for (a_value = a_values_list.head; a_value; a_value = a_value->next) {
795     char *a_encoding = nullptr;
796     StrList a_param_list;
797 
798     // Get the raw string to the current comma-sep Accept-Charset field value
799     a_raw = a_value->str;
800 
801     // break Accept-Encoding piece into semi-colon separated parts //
802     HttpCompat::parse_semicolon_list(&a_param_list, a_raw);
803     if (a_param_list.head) {
804       a_encoding = const_cast<char *>(a_param_list.head->str);
805     } else {
806       continue;
807     }
808 
809     if (is_asterisk(a_encoding)) {
810       *wildcard_present = true;
811       *wildcard_q       = HttpCompat::find_Q_param_in_strlist(&a_param_list);
812       return true;
813     } else if (does_encoding_match(a_encoding, c_raw)) {
814       // if type matches, get the Q factor //
815       float tq;
816       tq = HttpCompat::find_Q_param_in_strlist(&a_param_list);
817       *q = (tq > *q ? tq : *q);
818 
819       return true;
820     } else {
821       // so this c_raw value did not match this a_raw value. big deal.
822     }
823   }
824   return false;
825 }
826 
827 float
calculate_quality_of_accept_encoding_match(MIMEField * accept_field,MIMEField * content_field,MIMEField * cached_accept_field)828 HttpTransactCache::calculate_quality_of_accept_encoding_match(MIMEField *accept_field, MIMEField *content_field,
829                                                               MIMEField *cached_accept_field)
830 {
831   float q                   = -1.0;
832   bool is_identity_encoding = false;
833   const char *c_encoding;
834   int c_encoding_len;
835   bool wildcard_present = false;
836   float wildcard_q      = 1.0;
837   StrList c_values_list;
838   Str *c_value;
839   const char *a_raw, *ca_raw;
840   int a_raw_len, ca_raw_len;
841 
842   // prefer exact matches
843   if (accept_field && cached_accept_field) {
844     a_raw  = accept_field->value_get(&a_raw_len);
845     ca_raw = cached_accept_field->value_get(&ca_raw_len);
846     if (a_raw && ca_raw && a_raw_len == ca_raw_len && !strncmp(a_raw, ca_raw, a_raw_len)) {
847       Debug("http_alternate", "Exact match for ACCEPT ENCODING");
848       return static_cast<float>(1.001); // slightly higher weight to this guy
849     }
850   }
851   // return match if both ae and ce are missing
852   // this check is different from accept charset
853   if (accept_field == nullptr && content_field == nullptr) {
854     return static_cast<float>(1.0);
855   }
856   // if no Content-Encoding, treat as "identity" //
857   if (!content_field) {
858     Debug("http_match", "[calculate_quality_accept_encoding_match]: "
859                         "response hdr does not have content-encoding.");
860     is_identity_encoding = true;
861   } else {
862     // TODO: Should we check the return value (count) here?
863     content_field->value_get_comma_list(&c_values_list);
864 
865     content_field->value_get(&c_encoding_len);
866     if (c_encoding_len == 0) {
867       is_identity_encoding = true;
868     } else {
869       // does this document have the identity encoding? //
870       for (c_value = c_values_list.head; c_value; c_value = c_value->next) {
871         c_encoding     = c_value->str;
872         c_encoding_len = c_value->len;
873         if ((c_encoding_len >= 8) && (strncasecmp(c_encoding, "identity", 8) == 0)) {
874           is_identity_encoding = true;
875           break;
876         }
877       }
878     }
879   }
880 
881   ///////////////////////////////////////////////////////////////////////
882   // if no Accept-Encoding header, only match identity                 //
883   //   The 1.1 spec says servers MAY assume that clients will accept   //
884   //   any encoding if no header is sent.  Unforntunately, this does   //
885   //   not work 1.0 clients & is particularly thorny when the proxy    //
886   //   created the encoding as the result of a transform.  Http 1.1   //
887   //   purists would say that if proxy encodes something it's really   //
888   //   a transfer-encoding and not a content-encoding but again this   //
889   //   causes problems with 1.0 clients                                //
890   ///////////////////////////////////////////////////////////////////////
891   if (!accept_field) {
892     if (is_identity_encoding) {
893       if (!cached_accept_field) {
894         return (static_cast<float>(1.0));
895       } else {
896         return (static_cast<float>(0.001));
897       }
898     } else {
899       return (static_cast<float>(-1.0));
900     }
901   }
902 
903   // handle special case where no content-encoding in response, but
904   // request has an accept-encoding header, possibly with the identity
905   // field, with a q value;
906   if (!content_field) {
907     if (!match_accept_content_encoding("identity", accept_field, &wildcard_present, &wildcard_q, &q)) {
908       // CE was not returned, and AE does not have identity
909       if (match_content_encoding(accept_field, "gzip") and match_content_encoding(cached_accept_field, "gzip")) {
910         return 1.0f;
911       }
912       goto encoding_wildcard;
913     }
914     // use q from identity match
915 
916   } else {
917     // "Accept-encoding must correctly handle multiple content encoding"
918     // The combined quality factor is the product of all quality factors.
919     // (Note that there may be other possible choice, eg, min(),
920     // but I think multiplication is the best.)
921     // For example, if "content-encoding: a, b", and quality factors
922     // of a and b (in accept-encoding header) are q_a and q_b, resp,
923     // then the combined quality factor is (q_a * q_b).
924     // If any one of the content-encoding is not matched,
925     // then the q value will not be changed.
926     float combined_q = 1.0;
927     for (c_value = c_values_list.head; c_value; c_value = c_value->next) {
928       float this_q = -1.0;
929       if (!match_accept_content_encoding(c_value->str, accept_field, &wildcard_present, &wildcard_q, &this_q)) {
930         goto encoding_wildcard;
931       }
932       combined_q *= this_q;
933     }
934     q = combined_q;
935   }
936 
937 encoding_wildcard:
938   // match the wildcard now //
939   if ((q == -1.0) && (wildcard_present == true)) {
940     q = wildcard_q;
941   }
942   /////////////////////////////////////////////////////////////////////////
943   // there was an Accept-Encoding, but it didn't match anything, at      //
944   // any quality level --- if this is an identity-coded document, that's //
945   // still okay, but otherwise, this is just not a match at all.         //
946   /////////////////////////////////////////////////////////////////////////
947   if ((q == -1.0) && is_identity_encoding) {
948     if (match_content_encoding(accept_field, "gzip")) {
949       if (match_content_encoding(cached_accept_field, "gzip")) {
950         return 1.0f;
951       } else {
952         // always try to fetch GZIP content if we have not tried sending AE before
953         return -1.0f;
954       }
955     } else if (cached_accept_field && !match_content_encoding(cached_accept_field, "gzip")) {
956       return 0.001f;
957     } else {
958       return -1.0f;
959     }
960   }
961   //      q = (float)-1.0;
962   return (q);
963 }
964 
965 /**
966   Match request Accept-Language with response Content-Language.
967 
968   Language matching is a little more complicated because of "ranges".
969   First, no Accept-Language header or no Content-Language headers match
970   with q of 1. Otherwise, loop over Content-Languages. If there is a
971   match with a language in the Accept-Language field, keep track of
972   how many characters were in the value. The q value for the longest
973   range is returned. If there was no explicit match or a mismatch,
974   try wildcard matching.
975 
976   @return quality (-1: no match, 0..1: poor..good).
977 
978 */
979 static inline bool
does_language_range_match(const char * range1,const char * range2)980 does_language_range_match(const char *range1, const char *range2)
981 {
982   while (*range1 && *range2 && (ParseRules::ink_tolower(*range1) == ParseRules::ink_tolower(*range2))) {
983     range1 += 1;
984     range2 += 1;
985   }
986 
987   // matches if range equals tag, or if range is a lang prefix of tag
988   if ((((*range1 == NUL) && (*range2 == NUL)) || ((*range1 == NUL) && (*range2 == '-')))) {
989     return true;
990   }
991 
992   return false;
993 }
994 
995 static inline bool
match_accept_content_language(const char * c_raw,MIMEField * accept_field,bool * wildcard_present,float * wildcard_q,float * q,int * a_range_length)996 match_accept_content_language(const char *c_raw, MIMEField *accept_field, bool *wildcard_present, float *wildcard_q, float *q,
997                               int *a_range_length)
998 {
999   const char *a_raw;
1000   int a_raw_len;
1001   StrList a_values_list;
1002   Str *a_value;
1003 
1004   ink_assert(accept_field != nullptr);
1005 
1006   // loop over each language-range pattern //
1007   // TODO: Should we check the return value (count) here?
1008   accept_field->value_get_comma_list(&a_values_list);
1009 
1010   for (a_value = a_values_list.head; a_value; a_value = a_value->next) {
1011     a_raw     = a_value->str;
1012     a_raw_len = a_value->len;
1013 
1014     char *a_range;
1015     StrList a_param_list;
1016 
1017     HttpCompat::parse_semicolon_list(&a_param_list, a_raw, a_raw_len);
1018     float tq = HttpCompat::find_Q_param_in_strlist(&a_param_list);
1019 
1020     /////////////////////////////////////////////////////////////////////
1021     // This algorithm is a bit weird --- the resulting Q factor is     //
1022     // the Q value corresponding to the LONGEST range field that       //
1023     // matched, or if none matched, then the Q value of any asterisk.  //
1024     // Also, if the lang value is "", meaning that no Content-Language //
1025     // was specified, this document matches all accept headers.        //
1026     /////////////////////////////////////////////////////////////////////
1027     if (a_param_list.head) {
1028       a_range         = const_cast<char *>(a_param_list.head->str);
1029       *a_range_length = a_param_list.head->len;
1030     } else {
1031       continue;
1032     }
1033 
1034     if (is_asterisk(a_range)) {
1035       *wildcard_present = true;
1036       *wildcard_q       = HttpCompat::find_Q_param_in_strlist(&a_param_list);
1037       return true;
1038     } else if (does_language_range_match(a_range, c_raw)) {
1039       *q = tq;
1040       return true;
1041     } else {
1042     }
1043   }
1044 
1045   return false;
1046 }
1047 
1048 // FIX: This code is icky, and i suspect wrong in places, particularly
1049 //      because parts of match_accept_content_language are commented out.
1050 //      It looks like lots of hacks were done.  The code should probably
1051 //      be updated to use the code in HttpCompat::match_accept_language.
1052 
1053 float
calculate_quality_of_accept_language_match(MIMEField * accept_field,MIMEField * content_field,MIMEField * cached_accept_field)1054 HttpTransactCache::calculate_quality_of_accept_language_match(MIMEField *accept_field, MIMEField *content_field,
1055                                                               MIMEField *cached_accept_field)
1056 {
1057   float q = -1.0;
1058   int a_range_length;
1059   bool wildcard_present = false;
1060   float wildcard_q      = 1.0;
1061   float min_q           = 1.0;
1062   bool match_found      = false;
1063   StrList c_values_list;
1064   Str *c_value;
1065   const char *c_raw, *a_raw, *ca_raw;
1066   int a_raw_len, ca_raw_len;
1067 
1068   // Bug 2393700 prefer exact matches
1069   if (accept_field && cached_accept_field) {
1070     a_raw  = accept_field->value_get(&a_raw_len);
1071     ca_raw = cached_accept_field->value_get(&ca_raw_len);
1072     if (a_raw && ca_raw && a_raw_len == ca_raw_len && !strncmp(a_raw, ca_raw, a_raw_len)) {
1073       Debug("http_alternate", "Exact match for ACCEPT LANGUAGE");
1074       return static_cast<float>(1.001); // slightly higher weight to this guy
1075     }
1076   }
1077 
1078   if (!accept_field) {
1079     return (1.0);
1080   }
1081   // handle special case where no content-language in response, but
1082   // request has an accept-language header, possibly with the identity
1083   // field, with a q value;
1084 
1085   if (!content_field) {
1086     if (match_accept_content_language("identity", accept_field, &wildcard_present, &wildcard_q, &q, &a_range_length)) {
1087       goto language_wildcard;
1088     }
1089     Debug("http_match", "[calculate_quality_accept_language_match]: "
1090                         "response hdr does not have content-language.");
1091     return (1.0);
1092   }
1093 
1094   // loop over content languages //
1095   // TODO: Should we check the return value (count) here?
1096   content_field->value_get_comma_list(&c_values_list);
1097   for (c_value = c_values_list.head; c_value; c_value = c_value->next) {
1098     c_raw = c_value->str;
1099 
1100     // get Content-Language value //
1101     if (match_accept_content_language(c_raw, accept_field, &wildcard_present, &wildcard_q, &q, &a_range_length)) {
1102       min_q       = (min_q < q ? min_q : q);
1103       match_found = true;
1104     }
1105   }
1106   if (match_found) {
1107     q = min_q;
1108   } else {
1109     q = -1.0;
1110   }
1111 
1112 language_wildcard:
1113   // match the wildcard now //
1114   if ((q == -1.0) && (wildcard_present == true)) {
1115     q = wildcard_q;
1116   }
1117   return (q);
1118 }
1119 
1120 /**
1121   If the cached object contains a Vary header, then the object only
1122   matches if ALL of the headers named in Vary are present in the new
1123   request, and these match the headers in the stored request.  We relax
1124   this rule to allow matches if neither the current nor original client
1125   headers contained a varying header. This is different from what is
1126   stated in the specs.
1127 
1128 */
1129 Variability_t
CalcVariability(const OverridableHttpConfigParams * http_config_params,HTTPHdr * client_request,HTTPHdr * obj_client_request,HTTPHdr * obj_origin_server_response)1130 HttpTransactCache::CalcVariability(const OverridableHttpConfigParams *http_config_params, HTTPHdr *client_request,
1131                                    HTTPHdr *obj_client_request, HTTPHdr *obj_origin_server_response)
1132 {
1133   ink_assert(http_config_params != nullptr);
1134   ink_assert(client_request != nullptr);
1135   ink_assert(obj_client_request != nullptr);
1136   ink_assert(obj_origin_server_response != nullptr);
1137 
1138   Variability_t variability = VARIABILITY_NONE;
1139   if (obj_origin_server_response->presence(MIME_PRESENCE_VARY)) {
1140     StrList vary_list;
1141 
1142     if (obj_origin_server_response->value_get_comma_list(MIME_FIELD_VARY, MIME_LEN_VARY, &vary_list) > 0) {
1143       if (is_debug_tag_set("http_match") && vary_list.head) {
1144         Debug("http_match", "Vary list of %d elements", vary_list.count);
1145         vary_list.dump(stderr);
1146       }
1147 
1148       // for each field that varies, see if current & original hdrs match //
1149       for (Str *field = vary_list.head; field != nullptr; field = field->next) {
1150         if (field->len == 0) {
1151           continue;
1152         }
1153 
1154         /////////////////////////////////////////////////////////////
1155         // If the field name is unhandled, we should probably do a //
1156         // string comparison on the values of this extension field //
1157         // but currently we just treat it equivalent to a '*'.     //
1158         /////////////////////////////////////////////////////////////
1159 
1160         Debug("http_match", "Vary: %s", field->str);
1161         if (((field->str[0] == '*') && (field->str[1] == NUL))) {
1162           Debug("http_match", "Wildcard variability --- object not served from cache");
1163           variability = VARIABILITY_ALL;
1164           break;
1165         }
1166         ////////////////////////////////////////////////////////////////////////////////////////
1167         // Special case: if 'proxy.config.http.global_user_agent_header' set                  //
1168         // we should ignore Vary: User-Agent.                                                 //
1169         ////////////////////////////////////////////////////////////////////////////////////////
1170         if (http_config_params->global_user_agent_header && !strcasecmp(const_cast<char *>(field->str), "User-Agent")) {
1171           continue;
1172         }
1173 
1174         // Disable Vary mismatch checking for Accept-Encoding.  This is only safe to
1175         // set if you are promising to fix any Accept-Encoding/Content-Encoding mismatches.
1176         if (http_config_params->ignore_accept_encoding_mismatch && !strcasecmp(const_cast<char *>(field->str), "Accept-Encoding")) {
1177           continue;
1178         }
1179 
1180         ///////////////////////////////////////////////////////////////////
1181         // Take the current vary field and look up the headers in        //
1182         // the current client, and the original client.  The cached      //
1183         // object varies unless BOTH the current client and the original //
1184         // client contain the header, and the header values are equal.   //
1185         // We relax this to allow a match if NEITHER have the header.    //
1186         //                                                               //
1187         // While header "equality" appears to be header-specific, the    //
1188         // RFC2068 spec implies that matching only needs to account for  //
1189         // differences in whitespace and support for multiple headers    //
1190         // with the same name.  Case is presumably also insignificant.   //
1191         // Other variations (such as q=1 vs. a field with no q factor)   //
1192         // mean that the values DO NOT match.                            //
1193         ///////////////////////////////////////////////////////////////////
1194 
1195         ink_assert(strlen(field->str) == field->len);
1196 
1197         char *field_name_str = const_cast<char *>(hdrtoken_string_to_wks(field->str, field->len));
1198         if (field_name_str == nullptr) {
1199           field_name_str = const_cast<char *>(field->str);
1200         }
1201 
1202         MIMEField *cached_hdr_field  = obj_client_request->field_find(field_name_str, field->len);
1203         MIMEField *current_hdr_field = client_request->field_find(field_name_str, field->len);
1204 
1205         // Header values match? //
1206         if (!HttpCompat::do_vary_header_values_match(cached_hdr_field, current_hdr_field)) {
1207           variability = VARIABILITY_SOME;
1208           break;
1209         }
1210       }
1211     }
1212   }
1213 
1214   return variability;
1215 }
1216 
1217 /**
1218   If the request has If-modified-since or If-none-match,
1219   HTTP_STATUS_NOT_MODIFIED is returned if both or the existing one
1220   (if only one exists) fails; otherwise, the response's status code
1221   is returned.
1222 
1223   If the request has If-unmodified-since or If-match,
1224   HTTP_STATUS_PRECONDITION_FAILED is returned if one fails; otherwise,
1225   the response's status code is returned.
1226 
1227   If the request is a RANGE request with If-range,
1228   HTTP_STATUS_RANGE_NOT_SATISFIABLE is returned if the If-range condition
1229   is not satisfied (or fails); that means the document is changed and
1230   the whole document should be returned with 200 status code. Otherwise,
1231   the response's status code is returned.
1232 
1233   @return status code: HTTP_STATUS_NOT_MODIFIED,
1234     HTTP_STATUS_PRECONDITION_FAILED, or HTTP_STATUS_RANGE_NOT_SATISFIABLE.
1235 
1236 */
1237 HTTPStatus
match_response_to_request_conditionals(HTTPHdr * request,HTTPHdr * response,ink_time_t response_received_time)1238 HttpTransactCache::match_response_to_request_conditionals(HTTPHdr *request, HTTPHdr *response, ink_time_t response_received_time)
1239 {
1240   HTTPStatus response_code = HTTP_STATUS_NONE;
1241 
1242   ink_assert(response->status_get() != HTTP_STATUS_NOT_MODIFIED);
1243   ink_assert(response->status_get() != HTTP_STATUS_PRECONDITION_FAILED);
1244   ink_assert(response->status_get() != HTTP_STATUS_RANGE_NOT_SATISFIABLE);
1245 
1246   if (!(request->presence(MIME_PRESENCE_IF_MODIFIED_SINCE | MIME_PRESENCE_IF_NONE_MATCH | MIME_PRESENCE_IF_UNMODIFIED_SINCE |
1247                           MIME_PRESENCE_IF_MATCH | MIME_PRESENCE_RANGE))) {
1248     return response->status_get();
1249   }
1250 
1251   // If-None-Match: may match weakly //
1252   if (request->presence(MIME_PRESENCE_IF_NONE_MATCH)) {
1253     int raw_etags_len, comma_sep_tag_list_len;
1254     const char *raw_etags          = response->value_get(MIME_FIELD_ETAG, MIME_LEN_ETAG, &raw_etags_len);
1255     const char *comma_sep_tag_list = nullptr;
1256 
1257     if (raw_etags) {
1258       comma_sep_tag_list = request->value_get(MIME_FIELD_IF_NONE_MATCH, MIME_LEN_IF_NONE_MATCH, &comma_sep_tag_list_len);
1259       if (!comma_sep_tag_list) {
1260         comma_sep_tag_list     = "";
1261         comma_sep_tag_list_len = 0;
1262       }
1263 
1264       ////////////////////////////////////////////////////////////////////////
1265       // If we have an etag and a if-none-match, we are talking to someone  //
1266       // who is doing a 1.1 revalidate. Since this is a GET request with no //
1267       // sub-ranges, we can do a weak validation.                           //
1268       ////////////////////////////////////////////////////////////////////////
1269       if (do_strings_match_weakly(raw_etags, raw_etags_len, comma_sep_tag_list, comma_sep_tag_list_len)) {
1270         return HTTP_STATUS_NOT_MODIFIED;
1271       } else {
1272         return response->status_get();
1273       }
1274     }
1275   }
1276 
1277   // If-Modified-Since //
1278   if (request->presence(MIME_PRESENCE_IF_MODIFIED_SINCE)) {
1279     if (response->presence(MIME_PRESENCE_LAST_MODIFIED)) {
1280       ink_time_t lm_value = response->get_last_modified();
1281 
1282       // we won't return NOT_MODIFIED if Last-modified is too recent
1283       if ((lm_value == 0) || (request->get_if_modified_since() < lm_value)) {
1284         return response->status_get();
1285       }
1286 
1287       response_code = HTTP_STATUS_NOT_MODIFIED;
1288     } else if (response->presence(MIME_PRESENCE_DATE)) {
1289       ink_time_t date_value = response->get_date();
1290 
1291       // we won't return NOT_MODIFIED if Date is too recent
1292       if ((date_value == 0) || (request->get_if_modified_since() < date_value)) {
1293         return response->status_get();
1294       }
1295 
1296       response_code = HTTP_STATUS_NOT_MODIFIED;
1297     } else {
1298       // we won't return NOT_MODIFIED if received time is too recent
1299       if (request->get_if_modified_since() < response_received_time) {
1300         return response->status_get();
1301       }
1302 
1303       response_code = HTTP_STATUS_NOT_MODIFIED;
1304     }
1305   }
1306 
1307   // There is no If-none-match, and If-modified-since failed,
1308   // so return NOT_MODIFIED
1309   if (response_code != HTTP_STATUS_NONE) {
1310     return response_code;
1311   }
1312 
1313   // If-Match: must match strongly //
1314   if (request->presence(MIME_PRESENCE_IF_MATCH)) {
1315     int raw_etags_len              = 0;
1316     int comma_sep_tag_list_len     = 0;
1317     const char *raw_etags          = response->value_get(MIME_FIELD_ETAG, MIME_LEN_ETAG, &raw_etags_len);
1318     const char *comma_sep_tag_list = nullptr;
1319 
1320     if (raw_etags) {
1321       comma_sep_tag_list = request->value_get(MIME_FIELD_IF_MATCH, MIME_LEN_IF_MATCH, &comma_sep_tag_list_len);
1322     }
1323 
1324     if (!comma_sep_tag_list) {
1325       comma_sep_tag_list     = "";
1326       comma_sep_tag_list_len = 0;
1327     }
1328 
1329     if (!raw_etags) {
1330       raw_etags     = "";
1331       raw_etags_len = 0;
1332     }
1333 
1334     if (do_strings_match_strongly(raw_etags, raw_etags_len, comma_sep_tag_list, comma_sep_tag_list_len)) {
1335       return response->status_get();
1336     } else {
1337       return HTTP_STATUS_PRECONDITION_FAILED;
1338     }
1339   }
1340 
1341   // If-Unmodified-Since //
1342   if (request->presence(MIME_PRESENCE_IF_UNMODIFIED_SINCE)) {
1343     // lm_value is zero if Last-modified not exists
1344     ink_time_t lm_value = response->get_last_modified();
1345 
1346     // Condition fails if Last-modified not exists
1347     if ((request->get_if_unmodified_since() < lm_value) || (lm_value == 0)) {
1348       return HTTP_STATUS_PRECONDITION_FAILED;
1349     } else {
1350       response_code = response->status_get();
1351     }
1352   }
1353 
1354   // There is no If-match, and If-unmodified-since passed,
1355   // so return the original response code
1356   if (response_code != HTTP_STATUS_NONE) {
1357     return response_code;
1358   }
1359 
1360   // Handling If-Range header:
1361   // As Range && If-Range don't occur often, we want to put the
1362   // If-Range code in the end
1363   if (request->presence(MIME_PRESENCE_RANGE) && request->presence(MIME_PRESENCE_IF_RANGE)) {
1364     int raw_len, comma_sep_list_len;
1365 
1366     const char *if_value = request->value_get(MIME_FIELD_IF_RANGE, MIME_LEN_IF_RANGE, &comma_sep_list_len);
1367 
1368     // this is an ETag, similar to If-Match
1369     if (!if_value || if_value[0] == '"' || (comma_sep_list_len > 1 && if_value[1] == '/')) {
1370       if (!if_value) {
1371         if_value           = "";
1372         comma_sep_list_len = 0;
1373       }
1374 
1375       const char *raw_etags = response->value_get(MIME_FIELD_ETAG, MIME_LEN_ETAG, &raw_len);
1376 
1377       if (!raw_etags) {
1378         raw_etags = "";
1379         raw_len   = 0;
1380       }
1381 
1382       if (do_strings_match_strongly(raw_etags, raw_len, if_value, comma_sep_list_len)) {
1383         return response->status_get();
1384       } else {
1385         return HTTP_STATUS_RANGE_NOT_SATISFIABLE;
1386       }
1387     }
1388     // this a Date, similar to If-Unmodified-Since but must be an exact match
1389     else {
1390       // lm_value is zero if Last-modified not exists
1391       ink_time_t lm_value = response->get_last_modified();
1392 
1393       // condition fails if Last-modified not exists
1394       if ((request->get_if_range_date() != lm_value) || (lm_value == 0)) {
1395         return HTTP_STATUS_RANGE_NOT_SATISFIABLE;
1396       } else {
1397         return response->status_get();
1398       }
1399     }
1400   }
1401 
1402   return response->status_get();
1403 }
1404