1 /** @file
2
3 A brief file description
4
5 @section license License
6
7 Licensed to the Apache Software Foundation (ASF) under one
8 or more contributor license agreements. See the NOTICE file
9 distributed with this work for additional information
10 regarding copyright ownership. The ASF licenses this file
11 to you under the Apache License, Version 2.0 (the
12 "License"); you may not use this file except in compliance
13 with the License. You may obtain a copy of the License at
14
15 http://www.apache.org/licenses/LICENSE-2.0
16
17 Unless required by applicable law or agreed to in writing, software
18 distributed under the License is distributed on an "AS IS" BASIS,
19 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
20 See the License for the specific language governing permissions and
21 limitations under the License.
22 */
23
24 #include "tscore/ink_platform.h"
25
26 #include "HttpTransact.h"
27 #include "HttpTransactHeaders.h"
28 #include "HttpTransactCache.h"
29 #include <ctime>
30 #include "HTTP.h"
31 #include "HttpCompat.h"
32 #include "tscore/InkErrno.h"
33
34 /**
35 Find the pointer and length of an etag, after stripping off any leading
36 "W/" prefix, and surrounding double quotes.
37
38 */
39 inline static const char *
find_etag(const char * raw_tag_field,int raw_tag_field_len,int * length)40 find_etag(const char *raw_tag_field, int raw_tag_field_len, int *length)
41 {
42 const char *quote;
43 int etag_length = 0;
44 const char *etag_start = raw_tag_field;
45 const char *etag_end = raw_tag_field + raw_tag_field_len;
46
47 if ((raw_tag_field_len >= 2) && (etag_start[0] == 'W' && etag_start[1] == '/')) {
48 etag_start += 2;
49 }
50
51 etag_length = etag_end - etag_start;
52
53 if ((etag_start < etag_end) && (*etag_start == '"')) {
54 ++etag_start;
55 --etag_length;
56 quote = static_cast<const char *>(memchr(etag_start, '"', etag_length));
57 if (quote) {
58 etag_length = quote - etag_start;
59 }
60 }
61 *length = etag_length;
62 return etag_start;
63 }
64
65 /**
66 Match an etag raw_tag_field with a list of tags in the comma-separated
67 string field_to_match, using strong rules.
68
69 */
70 inline static bool
do_strings_match_strongly(const char * raw_tag_field,int raw_tag_field_len,const char * comma_sep_tag_list,int comma_sep_tag_list_len)71 do_strings_match_strongly(const char *raw_tag_field, int raw_tag_field_len, const char *comma_sep_tag_list,
72 int comma_sep_tag_list_len)
73 {
74 StrList tag_list;
75 const char *etag_start;
76 int n, etag_length;
77
78 // Can never match a weak tag with a strong compare
79 if ((raw_tag_field_len >= 2) && (raw_tag_field[0] == 'W' && raw_tag_field[1] == '/')) {
80 return false;
81 }
82 // Find the unalterated tag
83 etag_start = find_etag(raw_tag_field, raw_tag_field_len, &etag_length);
84
85 // Rip the field list into a comma-separated field list
86 HttpCompat::parse_comma_list(&tag_list, comma_sep_tag_list, comma_sep_tag_list_len);
87
88 // Loop over all the tags in the tag list
89 for (Str *tag = tag_list.head; tag; tag = tag->next) {
90 // If field is "*", then we got a match
91 if ((tag->len == 1) && (tag->str[0] == '*')) {
92 return true;
93 }
94
95 n = 0;
96
97 if ((static_cast<int>(tag->len - n) == etag_length) && (strncmp(etag_start, tag->str + n, etag_length) == 0)) {
98 return true;
99 }
100 }
101
102 return false;
103 }
104
105 /**
106 Match an etag raw_tag_field with a list of tags in the comma-separated
107 string field_to_match, using weak rules.
108
109 */
110 inline static bool
do_strings_match_weakly(const char * raw_tag_field,int raw_tag_field_len,const char * comma_sep_tag_list,int comma_sep_tag_list_len)111 do_strings_match_weakly(const char *raw_tag_field, int raw_tag_field_len, const char *comma_sep_tag_list,
112 int comma_sep_tag_list_len)
113 {
114 StrList tag_list;
115 const char *etag_start;
116 const char *cur_tag;
117 int etag_length, cur_tag_len;
118
119 // Find the unalterated tag
120 etag_start = find_etag(raw_tag_field, raw_tag_field_len, &etag_length);
121
122 // Rip the field list into a comma-separated field list
123 HttpCompat::parse_comma_list(&tag_list, comma_sep_tag_list, comma_sep_tag_list_len);
124
125 for (Str *tag = tag_list.head; tag; tag = tag->next) {
126 // If field is "*", then we got a match
127 if ((tag->len == 1) && (tag->str[0] == '*')) {
128 return true;
129 }
130
131 // strip off the leading 'W/' and quotation marks from the
132 // current tag, then compare for equality with above tag.
133 cur_tag = find_etag(tag->str, tag->len, &cur_tag_len);
134 if ((cur_tag_len == etag_length) && (strncmp(cur_tag, etag_start, cur_tag_len) == 0)) {
135 return true;
136 }
137 }
138 return false;
139 }
140
141 inline static bool
is_asterisk(char * s)142 is_asterisk(char *s)
143 {
144 return ((s[0] == '*') && (s[1] == NUL));
145 }
146
147 inline static bool
is_empty(char * s)148 is_empty(char *s)
149 {
150 return (s[0] == NUL);
151 }
152
153 /**
154 Given a set of alternates, select the best match.
155
156 The current school of thought: quality 1st, freshness 2nd. Loop through
157 alternates and find the one with the highest quality factor. Then
158 determine if it is fresh enough. If not, find the next best match. In
159 keeping with "quality is job 1", subsequent matches will only be
160 considered if their quality is equal to the quality of the first match.
161
162 @return index in cache alternates vector.
163
164 */
165 int
SelectFromAlternates(CacheHTTPInfoVector * cache_vector,HTTPHdr * client_request,const OverridableHttpConfigParams * http_config_params)166 HttpTransactCache::SelectFromAlternates(CacheHTTPInfoVector *cache_vector, HTTPHdr *client_request,
167 const OverridableHttpConfigParams *http_config_params)
168 {
169 time_t current_age, best_age = CacheHighAgeWatermark;
170 time_t t_now = 0;
171 int best_index = -1;
172 float best_Q = -1.0;
173 float unacceptable_Q = 0.0;
174
175 int alt_count = cache_vector->count();
176 if (alt_count == 0) {
177 return -1;
178 }
179
180 Debug("http_match", "[SelectFromAlternates] # alternates = %d", alt_count);
181 Debug("http_seq", "[SelectFromAlternates] %d alternates for this cached doc", alt_count);
182 if (is_debug_tag_set("http_alts")) {
183 fprintf(stderr, "[alts] There are %d alternates for this request header.\n", alt_count);
184 }
185
186 // so that plugins can make cache reads for http
187 // docs to check if the doc exists in the cache
188 if (!client_request->valid()) {
189 return 0;
190 }
191
192 for (int i = 0; i < alt_count; i++) {
193 float Q;
194 CacheHTTPInfo *obj = cache_vector->get(i);
195 HTTPHdr *cached_request = obj->request_get();
196 HTTPHdr *cached_response = obj->response_get();
197
198 if (!(obj->object_key_get() == zero_key)) {
199 ink_assert(cached_request->valid());
200 ink_assert(cached_response->valid());
201
202 Q = calculate_quality_of_match(http_config_params, client_request, cached_request, cached_response);
203
204 if (alt_count > 1) {
205 if (t_now == 0) {
206 t_now = ink_local_time();
207 }
208 current_age = HttpTransactHeaders::calculate_document_age(obj->request_sent_time_get(), obj->response_received_time_get(),
209 cached_response, cached_response->get_date(), t_now);
210 // Overflow?
211 if (current_age < 0) {
212 current_age = CacheHighAgeWatermark;
213 }
214 } else {
215 current_age = static_cast<time_t>(0);
216 }
217
218 if (is_debug_tag_set("http_alts")) {
219 fprintf(stderr, "[alts] ---- alternate #%d (Q = %g) has these request/response hdrs:\n", i + 1, Q);
220 char b[4096];
221 int used, tmp, offset;
222 int done;
223
224 offset = 0;
225 do {
226 used = 0;
227 tmp = offset;
228 done = cached_request->print(b, sizeof(b) - 1, &used, &tmp);
229 offset += used;
230 b[used] = '\0';
231 fprintf(stderr, "%s", b);
232 } while (!done);
233
234 offset = 0;
235 do {
236 used = 0;
237 tmp = offset;
238 done = cached_response->print(b, sizeof(b) - 1, &used, &tmp);
239 offset += used;
240 b[used] = '\0';
241 fprintf(stderr, "%s", b);
242 } while (!done);
243 }
244
245 if ((Q > best_Q) || ((Q == best_Q) && (current_age <= best_age))) {
246 best_Q = Q;
247 best_age = current_age;
248 best_index = i;
249 }
250 }
251 }
252 Debug("http_seq", "[SelectFromAlternates] Chosen alternate # %d", best_index);
253 if (is_debug_tag_set("http_alts")) {
254 fprintf(stderr, "[alts] and the winner is alternate number %d\n", best_index);
255 }
256
257 if ((best_index != -1) && (best_Q > unacceptable_Q)) {
258 return best_index;
259 } else {
260 return -1;
261 }
262 }
263
264 /**
265 For cached req/res and incoming req, return quality of match.
266
267 The current school of thought: quality 1st, freshness 2nd. This
268 function takes a user agent request client_request and the two headers
269 for a cached object (obj_client_request and obj_origin_server_response),
270 and returns a floating point number for how well the object matches
271 the client's request.
272
273 Two factors currently affect a match: Accept headers, which filter and
274 sort the matches, and Vary headers, which constrain whether a dynamic
275 document matches a request.
276
277 Note: According to the specs, specific matching takes precedence over
278 wildcard matching. For example, listed in precedence: text/html;q=0.5,
279 text/ascii, image/'*', '*'/'*'. So, ideally, in choosing between
280 alternates, we should given preference to those which matched
281 specifically over those which matched with wildcards.
282
283 @return quality (-1: no match, 0..1: poor..good).
284
285 */
286 float
calculate_quality_of_match(const OverridableHttpConfigParams * http_config_param,HTTPHdr * client_request,HTTPHdr * obj_client_request,HTTPHdr * obj_origin_server_response)287 HttpTransactCache::calculate_quality_of_match(const OverridableHttpConfigParams *http_config_param, HTTPHdr *client_request,
288 HTTPHdr *obj_client_request, HTTPHdr *obj_origin_server_response)
289 {
290 // For PURGE requests, any alternate is good really.
291 if (client_request->method_get_wksidx() == HTTP_WKSIDX_PURGE) {
292 return static_cast<float>(1.0);
293 }
294
295 // Now calculate a quality based on all sorts of logic
296 float q[4], Q;
297 MIMEField *accept_field;
298 MIMEField *cached_accept_field;
299 MIMEField *content_field;
300
301 // vary_skip_mask is used as a bitmask, 0b01 or 0b11 depending on the presence of Vary.
302 // This allows us to AND each of the four configs against it; Table:
303 //
304 // Conf Mask Conf Mask Conf Mask
305 // ---- ---- ---- ---- ---- ----
306 // 00 & 01 == false 01 & 01 == true 10 & 01 == false
307 // 00 & 11 == false 01 & 11 == true 10 & 11 == true
308 //
309 // A true value means the check for that config can be skipped. Note: from a users
310 // perspective, the configs are simply 0, 1 or 2.
311 unsigned int vary_skip_mask = obj_origin_server_response->presence(MIME_PRESENCE_VARY) ? 1 : 3;
312
313 // Make debug output happy
314 q[1] = (q[2] = (q[3] = -2.0));
315
316 // This content_field is used for a couple of headers, so get it first
317 content_field = obj_origin_server_response->field_find(MIME_FIELD_CONTENT_TYPE, MIME_LEN_CONTENT_TYPE);
318
319 // Accept: header
320 if (http_config_param->ignore_accept_mismatch & vary_skip_mask) {
321 // Ignore it
322 q[0] = 1.0;
323 } else {
324 accept_field = client_request->field_find(MIME_FIELD_ACCEPT, MIME_LEN_ACCEPT);
325
326 // A NULL Accept or a NULL Content-Type field are perfect matches.
327 if (content_field == nullptr || accept_field == nullptr) {
328 q[0] = 1.0; // TODO: Why should this not be 1.001 ?? // leif
329 } else {
330 q[0] = calculate_quality_of_accept_match(accept_field, content_field);
331 }
332 }
333
334 if (q[0] >= 0.0) {
335 // Accept-Charset: header
336 if (http_config_param->ignore_accept_charset_mismatch & vary_skip_mask) {
337 // Ignore it
338 q[1] = 1.0;
339 } else {
340 accept_field = client_request->field_find(MIME_FIELD_ACCEPT_CHARSET, MIME_LEN_ACCEPT_CHARSET);
341 cached_accept_field = obj_client_request->field_find(MIME_FIELD_ACCEPT_CHARSET, MIME_LEN_ACCEPT_CHARSET);
342
343 // absence in both requests counts as exact match
344 if (accept_field == nullptr && cached_accept_field == nullptr) {
345 Debug("http_alternate", "Exact match for ACCEPT CHARSET (not in request nor cache)");
346 q[1] = 1.001; // slightly higher weight to this guy
347 } else {
348 q[1] = calculate_quality_of_accept_charset_match(accept_field, content_field, cached_accept_field);
349 }
350 }
351
352 if (q[1] >= 0.0) {
353 // Accept-Encoding: header
354 if (http_config_param->ignore_accept_encoding_mismatch & vary_skip_mask) {
355 // Ignore it
356 q[2] = 1.0;
357 } else {
358 accept_field = client_request->field_find(MIME_FIELD_ACCEPT_ENCODING, MIME_LEN_ACCEPT_ENCODING);
359 content_field = obj_origin_server_response->field_find(MIME_FIELD_CONTENT_ENCODING, MIME_LEN_CONTENT_ENCODING);
360 cached_accept_field = obj_client_request->field_find(MIME_FIELD_ACCEPT_ENCODING, MIME_LEN_ACCEPT_ENCODING);
361
362 // absence in both requests counts as exact match
363 if (accept_field == nullptr && cached_accept_field == nullptr) {
364 Debug("http_alternate", "Exact match for ACCEPT ENCODING (not in request nor cache)");
365 q[2] = 1.001; // slightly higher weight to this guy
366 } else {
367 q[2] = calculate_quality_of_accept_encoding_match(accept_field, content_field, cached_accept_field);
368 }
369 }
370
371 if (q[2] >= 0.0) {
372 // Accept-Language: header
373 if (http_config_param->ignore_accept_language_mismatch & vary_skip_mask) {
374 // Ignore it
375 q[3] = 1.0;
376 } else {
377 accept_field = client_request->field_find(MIME_FIELD_ACCEPT_LANGUAGE, MIME_LEN_ACCEPT_LANGUAGE);
378 content_field = obj_origin_server_response->field_find(MIME_FIELD_CONTENT_LANGUAGE, MIME_LEN_CONTENT_LANGUAGE);
379 cached_accept_field = obj_client_request->field_find(MIME_FIELD_ACCEPT_LANGUAGE, MIME_LEN_ACCEPT_LANGUAGE);
380
381 // absence in both requests counts as exact match
382 if (accept_field == nullptr && cached_accept_field == nullptr) {
383 Debug("http_alternate", "Exact match for ACCEPT LANGUAGE (not in request nor cache)");
384 q[3] = 1.001; // slightly higher weight to this guy
385 } else {
386 q[3] = calculate_quality_of_accept_language_match(accept_field, content_field, cached_accept_field);
387 }
388 }
389 }
390 }
391 }
392
393 // final quality is minimum Q, or -1, if some match failed //
394 Q = ((q[0] < 0) || (q[1] < 0) || (q[2] < 0) || (q[3] < 0)) ? -1.0 : q[0] * q[1] * q[2] * q[3];
395
396 Debug("http_match", " CalcQualityOfMatch: Accept match = %g", q[0]);
397 Debug("http_seq", " CalcQualityOfMatch: Accept match = %g", q[0]);
398 Debug("http_alternate", "Content-Type and Accept %f", q[0]);
399
400 Debug("http_match", " CalcQualityOfMatch: AcceptCharset match = %g", q[1]);
401 Debug("http_seq", " CalcQualityOfMatch: AcceptCharset match = %g", q[1]);
402 Debug("http_alternate", "Content-Type and Accept-Charset %f", q[1]);
403
404 Debug("http_match", " CalcQualityOfMatch: AcceptEncoding match = %g", q[2]);
405 Debug("http_seq", " CalcQualityOfMatch: AcceptEncoding match = %g", q[2]);
406 Debug("http_alternate", "Content-Encoding and Accept-Encoding %f", q[2]);
407
408 Debug("http_match", " CalcQualityOfMatch: AcceptLanguage match = %g", q[3]);
409 Debug("http_seq", " CalcQualityOfMatch: AcceptLanguage match = %g", q[3]);
410 Debug("http_alternate", "Content-Language and Accept-Language %f", q[3]);
411
412 Debug("http_alternate", "Mult's Quality Factor: %f", Q);
413 Debug("http_alternate", "----------End of Alternate----------");
414
415 int force_alt = 0;
416
417 if (Q > 0.0) {
418 APIHook *hook;
419 HttpAltInfo info;
420 float qvalue;
421
422 hook = http_global_hooks->get(TS_HTTP_SELECT_ALT_HOOK);
423 if (hook) {
424 info.m_client_req.copy_shallow(client_request);
425 info.m_cached_req.copy_shallow(obj_client_request);
426 info.m_cached_resp.copy_shallow(obj_origin_server_response);
427 qvalue = 1.0;
428
429 while (hook) {
430 info.m_qvalue = 1.0;
431 hook->invoke(TS_EVENT_HTTP_SELECT_ALT, &info);
432 hook = hook->m_link.next;
433 if (info.m_qvalue < 0.0) {
434 info.m_qvalue = 0.0;
435 } else if (info.m_qvalue > 1.0) {
436 if (info.m_qvalue == FLT_MAX) {
437 force_alt = 1;
438 }
439 info.m_qvalue = 1.0;
440 }
441 qvalue *= info.m_qvalue;
442 }
443 Q *= qvalue;
444
445 // Clear out any SDK allocated values from the
446 // hdr handles
447 info.m_client_req.clear();
448 info.m_cached_req.clear();
449 info.m_cached_resp.clear();
450 }
451 }
452
453 if (Q >= 0.0 && !force_alt) { // make sense to check 'variability' only if Q >= 0.0
454 // set quality to -1, if cached copy would vary for this request //
455 Variability_t variability = CalcVariability(http_config_param, client_request, obj_client_request, obj_origin_server_response);
456
457 if (variability != VARIABILITY_NONE) {
458 Q = -1.0;
459 }
460
461 Debug("http_match", " CalcQualityOfMatch: CalcVariability says variability = %d", (variability != VARIABILITY_NONE));
462 Debug("http_seq", " CalcQualityOfMatch: CalcVariability says variability = %d", (variability != VARIABILITY_NONE));
463 Debug("http_match", " CalcQualityOfMatch: Returning final Q = %g", Q);
464 Debug("http_seq", " CalcQualityOfMatch: Returning final Q = %g", Q);
465 }
466
467 return Q;
468 }
469
470 /**
471 Match request Accept with response Content-Type.
472
473 If the Accept field mime-type value is *, do not attempt to match,
474 but note the q value for the wildcard match. If the type is not *,
475 but the subtype is * and the Accept type and Content type match,
476 again do not attempt to match, but note the q value. If neither of
477 these two cases, match, keeping track of the highest q value for the
478 matches. At the end of the loop over the Accept header field values,
479 if the highest q value is -1.0 (there was no specific match), if there
480 was a wildcard subtype match, set the q value to the wildcard subtype q
481 value. If there is still no match, and there is a wildcard type match,
482 set the q value to the wildcard type q value.
483
484 We allow no Content-Type headers in responses to match with quality 1.0.
485
486 @return quality (-1: no match, 0..1: poor..good).
487
488 */
489 float
calculate_quality_of_accept_match(MIMEField * accept_field,MIMEField * content_field)490 HttpTransactCache::calculate_quality_of_accept_match(MIMEField *accept_field, MIMEField *content_field)
491 {
492 float q = -1.0;
493 const char *c_raw, *a_raw;
494 int c_raw_len, a_raw_len;
495 char c_type[32], c_subtype[32];
496 Str *a_value;
497 StrList c_param_list, a_values_list;
498 bool wildcard_type_present = false;
499 bool wildcard_subtype_present = false;
500 float wildcard_type_q = 1.0;
501 float wildcard_subtype_q = 1.0;
502
503 ink_assert((accept_field != nullptr) && (content_field != nullptr));
504
505 // Extract the content-type field value before the semicolon.
506 // This has to be done just once because assuming single
507 // content-type in document. If more than one content
508 // type, will have to do as in content-language, content-
509 // encoding matching where we loop over both accept and
510 // content-type fields.
511
512 c_raw = content_field->value_get(&c_raw_len);
513 HttpCompat::parse_semicolon_list(&c_param_list, c_raw, c_raw_len);
514 Str *c_param = c_param_list.head;
515
516 if (!c_param) {
517 return (1.0);
518 }
519 // Parse the type and subtype of the Content-Type field.
520 HttpCompat::parse_mime_type(c_param->str, c_type, c_subtype, sizeof(c_type), sizeof(c_subtype));
521
522 // Special case for webp because Safari is has Accept: */*, but doesn't support webp
523 bool content_type_webp = ((strcasecmp("webp", c_subtype) == 0) && (strcasecmp("image", c_type) == 0));
524
525 // Now loop over Accept field values.
526 // TODO: Should we check the return value (count) from this?
527 accept_field->value_get_comma_list(&a_values_list);
528
529 for (a_value = a_values_list.head; a_value; a_value = a_value->next) {
530 // Get the raw string to the current comma-sep Accept field value
531 a_raw = a_value->str;
532 a_raw_len = a_value->len;
533
534 // Extract the field value before the semicolon
535 StrList a_param_list;
536 HttpCompat::parse_semicolon_list(&a_param_list, a_raw, a_raw_len);
537
538 // Read the next type/subtype media-range
539 Str *a_param = a_param_list.head;
540 if (!a_param) {
541 continue;
542 }
543
544 // Parse the type and subtype of the Accept field
545 char a_type[32], a_subtype[32];
546 HttpCompat::parse_mime_type(a_param->str, a_type, a_subtype, sizeof(a_type), sizeof(a_subtype));
547
548 Debug("http_match", "matching Content-type; '%s/%s' with Accept value '%s/%s'\n", c_type, c_subtype, a_type, a_subtype);
549
550 bool wildcard_found = true;
551 // Only do wildcard checks if the content type is not image/webp
552 if (content_type_webp == false) {
553 // Is there a wildcard in the type or subtype?
554 if (is_asterisk(a_type)) {
555 wildcard_type_present = true;
556 wildcard_type_q = HttpCompat::find_Q_param_in_strlist(&a_param_list);
557 } else if (is_asterisk(a_subtype) && (strcasecmp(a_type, c_type) == 0)) {
558 wildcard_subtype_present = true;
559 wildcard_subtype_q = HttpCompat::find_Q_param_in_strlist(&a_param_list);
560 } else {
561 wildcard_found = false;
562 }
563 }
564 if (content_type_webp == true || wildcard_found == false) {
565 // No wildcard or the content type is image/webp. Do explicit matching of accept and content values.
566 if ((strcasecmp(a_type, c_type) == 0) && (strcasecmp(a_subtype, c_subtype) == 0)) {
567 float tq;
568 tq = HttpCompat::find_Q_param_in_strlist(&a_param_list);
569 q = (tq > q ? tq : q);
570 }
571 }
572 }
573
574 // At this point either there is an explicit match, in
575 // which case q will not be -1.0 and will be returned.
576 // If there was no explicit match, but the accept field
577 // had wildcards, return the wildcard match q value.
578
579 // No explicit match, but wildcard subtype match
580 if ((q == -1.0) && (wildcard_subtype_present == true)) {
581 q = wildcard_subtype_q;
582 }
583 // No explicit match, but wildcard type match.
584 if ((q == -1.0) && (wildcard_type_present == true)) {
585 q = wildcard_type_q;
586 }
587
588 return (q);
589 }
590
591 /**
592 Match request Accept-Charset with response Content-Type.
593
594 Extract the response charset from the Content-Type field - the charset
595 is after the semicolon. Loop through the charsets in the request's
596 Accept-Charset field. If the Accept-Charset value is a wildcard, do not
597 attempt to match. Otherwise match and note the highest q value. If after
598 the loop the q value is -1, indicating no match, then if Accept-Charset
599 had a wildcard, allow it to match - setting q to the wildcard q value.
600 If there is still no match and the Content-Type was the default charset,
601 allow a match with a q value of 1.0.
602
603 We allow no Content-Type headers in responses to match with quality 1.0.
604
605 @return quality (-1: no match, 0..1: poor..good).
606
607 */
608 static inline bool
does_charset_match(char * charset1,char * charset2)609 does_charset_match(char *charset1, char *charset2)
610 {
611 return (is_asterisk(charset1) || is_empty(charset1) || (strcasecmp(charset1, charset2) == 0));
612 }
613
614 float
calculate_quality_of_accept_charset_match(MIMEField * accept_field,MIMEField * content_field,MIMEField * cached_accept_field)615 HttpTransactCache::calculate_quality_of_accept_charset_match(MIMEField *accept_field, MIMEField *content_field,
616 MIMEField *cached_accept_field)
617 {
618 float q = -1.0;
619 const char *c_raw, *a_raw, *ca_raw;
620 int c_raw_len, a_raw_len, ca_raw_len;
621 StrList a_values_list;
622 Str *a_value;
623 char c_charset[128];
624 char *a_charset;
625 int a_charset_len;
626 const char *default_charset = "utf-8";
627 bool wildcard_present = false;
628 float wildcard_q = 1.0;
629
630 // prefer exact matches
631 if (accept_field && cached_accept_field) {
632 a_raw = accept_field->value_get(&a_raw_len);
633 ca_raw = cached_accept_field->value_get(&ca_raw_len);
634 if (a_raw && ca_raw && a_raw_len == ca_raw_len && !strncmp(a_raw, ca_raw, a_raw_len)) {
635 Debug("http_alternate", "Exact match for ACCEPT CHARSET");
636 return static_cast<float>(1.001); // slightly higher weight to this guy
637 }
638 }
639 // return match if either ac or ct is missing
640 // this check is different from accept-encoding
641 if (accept_field == nullptr || content_field == nullptr) {
642 return static_cast<float>(1.0);
643 }
644 // get the charset of this content-type //
645 c_raw = content_field->value_get(&c_raw_len);
646 if (!HttpCompat::lookup_param_in_semicolon_string(c_raw, c_raw_len, "charset", c_charset, sizeof(c_charset) - 1)) {
647 ink_strlcpy(c_charset, default_charset, sizeof(c_charset));
648 }
649 // Now loop over Accept-Charset field values.
650 // TODO: Should we check the return value (count) from this?
651 accept_field->value_get_comma_list(&a_values_list);
652
653 for (a_value = a_values_list.head; a_value; a_value = a_value->next) {
654 // Get the raw string to the current comma-sep Accept-Charset field value
655 a_raw = a_value->str;
656 a_raw_len = a_value->len;
657
658 // Extract the field value before the semicolon
659 StrList a_param_list(true); // FIXME: copies & NUL-terminates strings
660 HttpCompat::parse_semicolon_list(&a_param_list, a_raw, a_raw_len);
661
662 if (a_param_list.head) {
663 a_charset = const_cast<char *>(a_param_list.head->str);
664 a_charset_len = a_param_list.head->len;
665 } else {
666 continue;
667 }
668
669 // printf("matching Content-type; '%s' with Accept-Charset value '%s'\n",
670 // c_charset,a_charset);
671
672 // dont match wildcards //
673 if ((a_charset_len == 1) && (a_charset[0] == '*')) {
674 wildcard_present = true;
675 wildcard_q = HttpCompat::find_Q_param_in_strlist(&a_param_list);
676 } else {
677 // if type matches, get the Q factor //
678 if (does_charset_match(a_charset, c_charset)) {
679 float tq;
680 tq = HttpCompat::find_Q_param_in_strlist(&a_param_list);
681 q = (tq > q ? tq : q);
682 }
683 }
684 }
685
686 // if no match and wildcard present, allow match //
687 if ((q == -1.0) && (wildcard_present == true)) {
688 q = wildcard_q;
689 }
690 // if no match, still allow default_charset //
691 if ((q == -1) && (strcasecmp(c_charset, default_charset) == 0)) {
692 q = 1.0;
693 }
694 return (q);
695 }
696
697 /**
698 Match request Accept-Encoding with response Content-Encoding.
699
700 First determine if the cached document has identity encoding. This
701 can be the case if the document has no Content-Encoding header field
702 or if the Content-Encoding field explicitly lists "identity". Then,
703 if there is no Accept-Encoding header and the cached response uses
704 identity encoding return a match. If there is no Accept-Encoding header
705 and the cached document uses some other form of encoding, also return
706 a match, albeit one with a slightly lower q value (0.999).
707
708 If none of the above cases occurs, compare Content-Encoding with
709 Accept-Encoding, by looping over the Content-Encoding values (there
710 may be more than one, since a document may be gzipped, followed by
711 compressed, etc.). If any of the Content-Encoding values are not in
712 the Accept-Encoding header, exit the loop. Before exiting, if there
713 has not been a match, match a wildcard in the Accept-Encoding field
714 and if still no match, match an identity encoding - this may happen
715 if the request did not list "identity" in the Accept-Encoding field,
716 but the response listed it in the Content-Encoding field. In this last
717 case, match with a q value of 0.001.
718
719 The return values are:
720 - -1.0: Doesn't match
721 - 0.999: No Accept-Encoding header, and Content-Encoding does not list
722 "identity".
723 - 0.001: Accept-Encoding was not empty, but Content-Encoding was
724 either empty or explicitly listed "identity".
725 - 0.0..1.0: Matches with a quality between 0 (poor) and 1 (good).
726
727 @return quality (-1: no match, 0..1: poor..good).
728
729 */
730 static inline bool
does_encoding_match(char * enc1,const char * enc2)731 does_encoding_match(char *enc1, const char *enc2)
732 {
733 if (is_asterisk(enc1) || ((strcasecmp(enc1, enc2)) == 0)) {
734 return true;
735 }
736
737 // rfc2616,sec3.5: applications SHOULD consider "x-gzip" and "x-compress" to be
738 // equivalent to "gzip" and "compress" respectively
739 if ((!strcasecmp(enc1, "gzip") && !strcasecmp(enc2, "x-gzip")) || (!strcasecmp(enc1, "x-gzip") && !strcasecmp(enc2, "gzip")) ||
740 (!strcasecmp(enc1, "compress") && !strcasecmp(enc2, "x-compress")) ||
741 (!strcasecmp(enc1, "x-compress") && !strcasecmp(enc2, "compress"))) {
742 return true;
743 }
744
745 return false;
746 }
747
748 bool
match_content_encoding(MIMEField * accept_field,const char * encoding_identifier)749 HttpTransactCache::match_content_encoding(MIMEField *accept_field, const char *encoding_identifier)
750 {
751 Str *a_value;
752 const char *a_raw;
753 StrList a_values_list;
754 if (!accept_field) {
755 return false;
756 }
757 // TODO: Should we check the return value (count) here?
758 accept_field->value_get_comma_list(&a_values_list);
759
760 for (a_value = a_values_list.head; a_value; a_value = a_value->next) {
761 char *a_encoding = nullptr;
762 StrList a_param_list;
763 a_raw = a_value->str;
764 HttpCompat::parse_semicolon_list(&a_param_list, a_raw);
765 if (a_param_list.head) {
766 a_encoding = const_cast<char *>(a_param_list.head->str);
767 } else {
768 continue;
769 }
770 float q;
771 q = HttpCompat::find_Q_param_in_strlist(&a_param_list);
772 if (q != 0 && does_encoding_match(a_encoding, encoding_identifier)) {
773 return true;
774 }
775 }
776 return false;
777 }
778
779 // TODO: This used to take a length for c_raw, but that was never used, so removed it from the prototype.
780 static inline bool
match_accept_content_encoding(const char * c_raw,MIMEField * accept_field,bool * wildcard_present,float * wildcard_q,float * q)781 match_accept_content_encoding(const char *c_raw, MIMEField *accept_field, bool *wildcard_present, float *wildcard_q, float *q)
782 {
783 Str *a_value;
784 const char *a_raw;
785 StrList a_values_list;
786
787 if (!accept_field) {
788 return false;
789 }
790 // loop over Accept-Encoding elements, looking for match //
791 // TODO: Should we check the return value (count) here?
792 accept_field->value_get_comma_list(&a_values_list);
793
794 for (a_value = a_values_list.head; a_value; a_value = a_value->next) {
795 char *a_encoding = nullptr;
796 StrList a_param_list;
797
798 // Get the raw string to the current comma-sep Accept-Charset field value
799 a_raw = a_value->str;
800
801 // break Accept-Encoding piece into semi-colon separated parts //
802 HttpCompat::parse_semicolon_list(&a_param_list, a_raw);
803 if (a_param_list.head) {
804 a_encoding = const_cast<char *>(a_param_list.head->str);
805 } else {
806 continue;
807 }
808
809 if (is_asterisk(a_encoding)) {
810 *wildcard_present = true;
811 *wildcard_q = HttpCompat::find_Q_param_in_strlist(&a_param_list);
812 return true;
813 } else if (does_encoding_match(a_encoding, c_raw)) {
814 // if type matches, get the Q factor //
815 float tq;
816 tq = HttpCompat::find_Q_param_in_strlist(&a_param_list);
817 *q = (tq > *q ? tq : *q);
818
819 return true;
820 } else {
821 // so this c_raw value did not match this a_raw value. big deal.
822 }
823 }
824 return false;
825 }
826
827 float
calculate_quality_of_accept_encoding_match(MIMEField * accept_field,MIMEField * content_field,MIMEField * cached_accept_field)828 HttpTransactCache::calculate_quality_of_accept_encoding_match(MIMEField *accept_field, MIMEField *content_field,
829 MIMEField *cached_accept_field)
830 {
831 float q = -1.0;
832 bool is_identity_encoding = false;
833 const char *c_encoding;
834 int c_encoding_len;
835 bool wildcard_present = false;
836 float wildcard_q = 1.0;
837 StrList c_values_list;
838 Str *c_value;
839 const char *a_raw, *ca_raw;
840 int a_raw_len, ca_raw_len;
841
842 // prefer exact matches
843 if (accept_field && cached_accept_field) {
844 a_raw = accept_field->value_get(&a_raw_len);
845 ca_raw = cached_accept_field->value_get(&ca_raw_len);
846 if (a_raw && ca_raw && a_raw_len == ca_raw_len && !strncmp(a_raw, ca_raw, a_raw_len)) {
847 Debug("http_alternate", "Exact match for ACCEPT ENCODING");
848 return static_cast<float>(1.001); // slightly higher weight to this guy
849 }
850 }
851 // return match if both ae and ce are missing
852 // this check is different from accept charset
853 if (accept_field == nullptr && content_field == nullptr) {
854 return static_cast<float>(1.0);
855 }
856 // if no Content-Encoding, treat as "identity" //
857 if (!content_field) {
858 Debug("http_match", "[calculate_quality_accept_encoding_match]: "
859 "response hdr does not have content-encoding.");
860 is_identity_encoding = true;
861 } else {
862 // TODO: Should we check the return value (count) here?
863 content_field->value_get_comma_list(&c_values_list);
864
865 content_field->value_get(&c_encoding_len);
866 if (c_encoding_len == 0) {
867 is_identity_encoding = true;
868 } else {
869 // does this document have the identity encoding? //
870 for (c_value = c_values_list.head; c_value; c_value = c_value->next) {
871 c_encoding = c_value->str;
872 c_encoding_len = c_value->len;
873 if ((c_encoding_len >= 8) && (strncasecmp(c_encoding, "identity", 8) == 0)) {
874 is_identity_encoding = true;
875 break;
876 }
877 }
878 }
879 }
880
881 ///////////////////////////////////////////////////////////////////////
882 // if no Accept-Encoding header, only match identity //
883 // The 1.1 spec says servers MAY assume that clients will accept //
884 // any encoding if no header is sent. Unforntunately, this does //
885 // not work 1.0 clients & is particularly thorny when the proxy //
886 // created the encoding as the result of a transform. Http 1.1 //
887 // purists would say that if proxy encodes something it's really //
888 // a transfer-encoding and not a content-encoding but again this //
889 // causes problems with 1.0 clients //
890 ///////////////////////////////////////////////////////////////////////
891 if (!accept_field) {
892 if (is_identity_encoding) {
893 if (!cached_accept_field) {
894 return (static_cast<float>(1.0));
895 } else {
896 return (static_cast<float>(0.001));
897 }
898 } else {
899 return (static_cast<float>(-1.0));
900 }
901 }
902
903 // handle special case where no content-encoding in response, but
904 // request has an accept-encoding header, possibly with the identity
905 // field, with a q value;
906 if (!content_field) {
907 if (!match_accept_content_encoding("identity", accept_field, &wildcard_present, &wildcard_q, &q)) {
908 // CE was not returned, and AE does not have identity
909 if (match_content_encoding(accept_field, "gzip") and match_content_encoding(cached_accept_field, "gzip")) {
910 return 1.0f;
911 }
912 goto encoding_wildcard;
913 }
914 // use q from identity match
915
916 } else {
917 // "Accept-encoding must correctly handle multiple content encoding"
918 // The combined quality factor is the product of all quality factors.
919 // (Note that there may be other possible choice, eg, min(),
920 // but I think multiplication is the best.)
921 // For example, if "content-encoding: a, b", and quality factors
922 // of a and b (in accept-encoding header) are q_a and q_b, resp,
923 // then the combined quality factor is (q_a * q_b).
924 // If any one of the content-encoding is not matched,
925 // then the q value will not be changed.
926 float combined_q = 1.0;
927 for (c_value = c_values_list.head; c_value; c_value = c_value->next) {
928 float this_q = -1.0;
929 if (!match_accept_content_encoding(c_value->str, accept_field, &wildcard_present, &wildcard_q, &this_q)) {
930 goto encoding_wildcard;
931 }
932 combined_q *= this_q;
933 }
934 q = combined_q;
935 }
936
937 encoding_wildcard:
938 // match the wildcard now //
939 if ((q == -1.0) && (wildcard_present == true)) {
940 q = wildcard_q;
941 }
942 /////////////////////////////////////////////////////////////////////////
943 // there was an Accept-Encoding, but it didn't match anything, at //
944 // any quality level --- if this is an identity-coded document, that's //
945 // still okay, but otherwise, this is just not a match at all. //
946 /////////////////////////////////////////////////////////////////////////
947 if ((q == -1.0) && is_identity_encoding) {
948 if (match_content_encoding(accept_field, "gzip")) {
949 if (match_content_encoding(cached_accept_field, "gzip")) {
950 return 1.0f;
951 } else {
952 // always try to fetch GZIP content if we have not tried sending AE before
953 return -1.0f;
954 }
955 } else if (cached_accept_field && !match_content_encoding(cached_accept_field, "gzip")) {
956 return 0.001f;
957 } else {
958 return -1.0f;
959 }
960 }
961 // q = (float)-1.0;
962 return (q);
963 }
964
965 /**
966 Match request Accept-Language with response Content-Language.
967
968 Language matching is a little more complicated because of "ranges".
969 First, no Accept-Language header or no Content-Language headers match
970 with q of 1. Otherwise, loop over Content-Languages. If there is a
971 match with a language in the Accept-Language field, keep track of
972 how many characters were in the value. The q value for the longest
973 range is returned. If there was no explicit match or a mismatch,
974 try wildcard matching.
975
976 @return quality (-1: no match, 0..1: poor..good).
977
978 */
979 static inline bool
does_language_range_match(const char * range1,const char * range2)980 does_language_range_match(const char *range1, const char *range2)
981 {
982 while (*range1 && *range2 && (ParseRules::ink_tolower(*range1) == ParseRules::ink_tolower(*range2))) {
983 range1 += 1;
984 range2 += 1;
985 }
986
987 // matches if range equals tag, or if range is a lang prefix of tag
988 if ((((*range1 == NUL) && (*range2 == NUL)) || ((*range1 == NUL) && (*range2 == '-')))) {
989 return true;
990 }
991
992 return false;
993 }
994
995 static inline bool
match_accept_content_language(const char * c_raw,MIMEField * accept_field,bool * wildcard_present,float * wildcard_q,float * q,int * a_range_length)996 match_accept_content_language(const char *c_raw, MIMEField *accept_field, bool *wildcard_present, float *wildcard_q, float *q,
997 int *a_range_length)
998 {
999 const char *a_raw;
1000 int a_raw_len;
1001 StrList a_values_list;
1002 Str *a_value;
1003
1004 ink_assert(accept_field != nullptr);
1005
1006 // loop over each language-range pattern //
1007 // TODO: Should we check the return value (count) here?
1008 accept_field->value_get_comma_list(&a_values_list);
1009
1010 for (a_value = a_values_list.head; a_value; a_value = a_value->next) {
1011 a_raw = a_value->str;
1012 a_raw_len = a_value->len;
1013
1014 char *a_range;
1015 StrList a_param_list;
1016
1017 HttpCompat::parse_semicolon_list(&a_param_list, a_raw, a_raw_len);
1018 float tq = HttpCompat::find_Q_param_in_strlist(&a_param_list);
1019
1020 /////////////////////////////////////////////////////////////////////
1021 // This algorithm is a bit weird --- the resulting Q factor is //
1022 // the Q value corresponding to the LONGEST range field that //
1023 // matched, or if none matched, then the Q value of any asterisk. //
1024 // Also, if the lang value is "", meaning that no Content-Language //
1025 // was specified, this document matches all accept headers. //
1026 /////////////////////////////////////////////////////////////////////
1027 if (a_param_list.head) {
1028 a_range = const_cast<char *>(a_param_list.head->str);
1029 *a_range_length = a_param_list.head->len;
1030 } else {
1031 continue;
1032 }
1033
1034 if (is_asterisk(a_range)) {
1035 *wildcard_present = true;
1036 *wildcard_q = HttpCompat::find_Q_param_in_strlist(&a_param_list);
1037 return true;
1038 } else if (does_language_range_match(a_range, c_raw)) {
1039 *q = tq;
1040 return true;
1041 } else {
1042 }
1043 }
1044
1045 return false;
1046 }
1047
1048 // FIX: This code is icky, and i suspect wrong in places, particularly
1049 // because parts of match_accept_content_language are commented out.
1050 // It looks like lots of hacks were done. The code should probably
1051 // be updated to use the code in HttpCompat::match_accept_language.
1052
1053 float
calculate_quality_of_accept_language_match(MIMEField * accept_field,MIMEField * content_field,MIMEField * cached_accept_field)1054 HttpTransactCache::calculate_quality_of_accept_language_match(MIMEField *accept_field, MIMEField *content_field,
1055 MIMEField *cached_accept_field)
1056 {
1057 float q = -1.0;
1058 int a_range_length;
1059 bool wildcard_present = false;
1060 float wildcard_q = 1.0;
1061 float min_q = 1.0;
1062 bool match_found = false;
1063 StrList c_values_list;
1064 Str *c_value;
1065 const char *c_raw, *a_raw, *ca_raw;
1066 int a_raw_len, ca_raw_len;
1067
1068 // Bug 2393700 prefer exact matches
1069 if (accept_field && cached_accept_field) {
1070 a_raw = accept_field->value_get(&a_raw_len);
1071 ca_raw = cached_accept_field->value_get(&ca_raw_len);
1072 if (a_raw && ca_raw && a_raw_len == ca_raw_len && !strncmp(a_raw, ca_raw, a_raw_len)) {
1073 Debug("http_alternate", "Exact match for ACCEPT LANGUAGE");
1074 return static_cast<float>(1.001); // slightly higher weight to this guy
1075 }
1076 }
1077
1078 if (!accept_field) {
1079 return (1.0);
1080 }
1081 // handle special case where no content-language in response, but
1082 // request has an accept-language header, possibly with the identity
1083 // field, with a q value;
1084
1085 if (!content_field) {
1086 if (match_accept_content_language("identity", accept_field, &wildcard_present, &wildcard_q, &q, &a_range_length)) {
1087 goto language_wildcard;
1088 }
1089 Debug("http_match", "[calculate_quality_accept_language_match]: "
1090 "response hdr does not have content-language.");
1091 return (1.0);
1092 }
1093
1094 // loop over content languages //
1095 // TODO: Should we check the return value (count) here?
1096 content_field->value_get_comma_list(&c_values_list);
1097 for (c_value = c_values_list.head; c_value; c_value = c_value->next) {
1098 c_raw = c_value->str;
1099
1100 // get Content-Language value //
1101 if (match_accept_content_language(c_raw, accept_field, &wildcard_present, &wildcard_q, &q, &a_range_length)) {
1102 min_q = (min_q < q ? min_q : q);
1103 match_found = true;
1104 }
1105 }
1106 if (match_found) {
1107 q = min_q;
1108 } else {
1109 q = -1.0;
1110 }
1111
1112 language_wildcard:
1113 // match the wildcard now //
1114 if ((q == -1.0) && (wildcard_present == true)) {
1115 q = wildcard_q;
1116 }
1117 return (q);
1118 }
1119
1120 /**
1121 If the cached object contains a Vary header, then the object only
1122 matches if ALL of the headers named in Vary are present in the new
1123 request, and these match the headers in the stored request. We relax
1124 this rule to allow matches if neither the current nor original client
1125 headers contained a varying header. This is different from what is
1126 stated in the specs.
1127
1128 */
1129 Variability_t
CalcVariability(const OverridableHttpConfigParams * http_config_params,HTTPHdr * client_request,HTTPHdr * obj_client_request,HTTPHdr * obj_origin_server_response)1130 HttpTransactCache::CalcVariability(const OverridableHttpConfigParams *http_config_params, HTTPHdr *client_request,
1131 HTTPHdr *obj_client_request, HTTPHdr *obj_origin_server_response)
1132 {
1133 ink_assert(http_config_params != nullptr);
1134 ink_assert(client_request != nullptr);
1135 ink_assert(obj_client_request != nullptr);
1136 ink_assert(obj_origin_server_response != nullptr);
1137
1138 Variability_t variability = VARIABILITY_NONE;
1139 if (obj_origin_server_response->presence(MIME_PRESENCE_VARY)) {
1140 StrList vary_list;
1141
1142 if (obj_origin_server_response->value_get_comma_list(MIME_FIELD_VARY, MIME_LEN_VARY, &vary_list) > 0) {
1143 if (is_debug_tag_set("http_match") && vary_list.head) {
1144 Debug("http_match", "Vary list of %d elements", vary_list.count);
1145 vary_list.dump(stderr);
1146 }
1147
1148 // for each field that varies, see if current & original hdrs match //
1149 for (Str *field = vary_list.head; field != nullptr; field = field->next) {
1150 if (field->len == 0) {
1151 continue;
1152 }
1153
1154 /////////////////////////////////////////////////////////////
1155 // If the field name is unhandled, we should probably do a //
1156 // string comparison on the values of this extension field //
1157 // but currently we just treat it equivalent to a '*'. //
1158 /////////////////////////////////////////////////////////////
1159
1160 Debug("http_match", "Vary: %s", field->str);
1161 if (((field->str[0] == '*') && (field->str[1] == NUL))) {
1162 Debug("http_match", "Wildcard variability --- object not served from cache");
1163 variability = VARIABILITY_ALL;
1164 break;
1165 }
1166 ////////////////////////////////////////////////////////////////////////////////////////
1167 // Special case: if 'proxy.config.http.global_user_agent_header' set //
1168 // we should ignore Vary: User-Agent. //
1169 ////////////////////////////////////////////////////////////////////////////////////////
1170 if (http_config_params->global_user_agent_header && !strcasecmp(const_cast<char *>(field->str), "User-Agent")) {
1171 continue;
1172 }
1173
1174 // Disable Vary mismatch checking for Accept-Encoding. This is only safe to
1175 // set if you are promising to fix any Accept-Encoding/Content-Encoding mismatches.
1176 if (http_config_params->ignore_accept_encoding_mismatch && !strcasecmp(const_cast<char *>(field->str), "Accept-Encoding")) {
1177 continue;
1178 }
1179
1180 ///////////////////////////////////////////////////////////////////
1181 // Take the current vary field and look up the headers in //
1182 // the current client, and the original client. The cached //
1183 // object varies unless BOTH the current client and the original //
1184 // client contain the header, and the header values are equal. //
1185 // We relax this to allow a match if NEITHER have the header. //
1186 // //
1187 // While header "equality" appears to be header-specific, the //
1188 // RFC2068 spec implies that matching only needs to account for //
1189 // differences in whitespace and support for multiple headers //
1190 // with the same name. Case is presumably also insignificant. //
1191 // Other variations (such as q=1 vs. a field with no q factor) //
1192 // mean that the values DO NOT match. //
1193 ///////////////////////////////////////////////////////////////////
1194
1195 ink_assert(strlen(field->str) == field->len);
1196
1197 char *field_name_str = const_cast<char *>(hdrtoken_string_to_wks(field->str, field->len));
1198 if (field_name_str == nullptr) {
1199 field_name_str = const_cast<char *>(field->str);
1200 }
1201
1202 MIMEField *cached_hdr_field = obj_client_request->field_find(field_name_str, field->len);
1203 MIMEField *current_hdr_field = client_request->field_find(field_name_str, field->len);
1204
1205 // Header values match? //
1206 if (!HttpCompat::do_vary_header_values_match(cached_hdr_field, current_hdr_field)) {
1207 variability = VARIABILITY_SOME;
1208 break;
1209 }
1210 }
1211 }
1212 }
1213
1214 return variability;
1215 }
1216
1217 /**
1218 If the request has If-modified-since or If-none-match,
1219 HTTP_STATUS_NOT_MODIFIED is returned if both or the existing one
1220 (if only one exists) fails; otherwise, the response's status code
1221 is returned.
1222
1223 If the request has If-unmodified-since or If-match,
1224 HTTP_STATUS_PRECONDITION_FAILED is returned if one fails; otherwise,
1225 the response's status code is returned.
1226
1227 If the request is a RANGE request with If-range,
1228 HTTP_STATUS_RANGE_NOT_SATISFIABLE is returned if the If-range condition
1229 is not satisfied (or fails); that means the document is changed and
1230 the whole document should be returned with 200 status code. Otherwise,
1231 the response's status code is returned.
1232
1233 @return status code: HTTP_STATUS_NOT_MODIFIED,
1234 HTTP_STATUS_PRECONDITION_FAILED, or HTTP_STATUS_RANGE_NOT_SATISFIABLE.
1235
1236 */
1237 HTTPStatus
match_response_to_request_conditionals(HTTPHdr * request,HTTPHdr * response,ink_time_t response_received_time)1238 HttpTransactCache::match_response_to_request_conditionals(HTTPHdr *request, HTTPHdr *response, ink_time_t response_received_time)
1239 {
1240 HTTPStatus response_code = HTTP_STATUS_NONE;
1241
1242 ink_assert(response->status_get() != HTTP_STATUS_NOT_MODIFIED);
1243 ink_assert(response->status_get() != HTTP_STATUS_PRECONDITION_FAILED);
1244 ink_assert(response->status_get() != HTTP_STATUS_RANGE_NOT_SATISFIABLE);
1245
1246 if (!(request->presence(MIME_PRESENCE_IF_MODIFIED_SINCE | MIME_PRESENCE_IF_NONE_MATCH | MIME_PRESENCE_IF_UNMODIFIED_SINCE |
1247 MIME_PRESENCE_IF_MATCH | MIME_PRESENCE_RANGE))) {
1248 return response->status_get();
1249 }
1250
1251 // If-None-Match: may match weakly //
1252 if (request->presence(MIME_PRESENCE_IF_NONE_MATCH)) {
1253 int raw_etags_len, comma_sep_tag_list_len;
1254 const char *raw_etags = response->value_get(MIME_FIELD_ETAG, MIME_LEN_ETAG, &raw_etags_len);
1255 const char *comma_sep_tag_list = nullptr;
1256
1257 if (raw_etags) {
1258 comma_sep_tag_list = request->value_get(MIME_FIELD_IF_NONE_MATCH, MIME_LEN_IF_NONE_MATCH, &comma_sep_tag_list_len);
1259 if (!comma_sep_tag_list) {
1260 comma_sep_tag_list = "";
1261 comma_sep_tag_list_len = 0;
1262 }
1263
1264 ////////////////////////////////////////////////////////////////////////
1265 // If we have an etag and a if-none-match, we are talking to someone //
1266 // who is doing a 1.1 revalidate. Since this is a GET request with no //
1267 // sub-ranges, we can do a weak validation. //
1268 ////////////////////////////////////////////////////////////////////////
1269 if (do_strings_match_weakly(raw_etags, raw_etags_len, comma_sep_tag_list, comma_sep_tag_list_len)) {
1270 return HTTP_STATUS_NOT_MODIFIED;
1271 } else {
1272 return response->status_get();
1273 }
1274 }
1275 }
1276
1277 // If-Modified-Since //
1278 if (request->presence(MIME_PRESENCE_IF_MODIFIED_SINCE)) {
1279 if (response->presence(MIME_PRESENCE_LAST_MODIFIED)) {
1280 ink_time_t lm_value = response->get_last_modified();
1281
1282 // we won't return NOT_MODIFIED if Last-modified is too recent
1283 if ((lm_value == 0) || (request->get_if_modified_since() < lm_value)) {
1284 return response->status_get();
1285 }
1286
1287 response_code = HTTP_STATUS_NOT_MODIFIED;
1288 } else if (response->presence(MIME_PRESENCE_DATE)) {
1289 ink_time_t date_value = response->get_date();
1290
1291 // we won't return NOT_MODIFIED if Date is too recent
1292 if ((date_value == 0) || (request->get_if_modified_since() < date_value)) {
1293 return response->status_get();
1294 }
1295
1296 response_code = HTTP_STATUS_NOT_MODIFIED;
1297 } else {
1298 // we won't return NOT_MODIFIED if received time is too recent
1299 if (request->get_if_modified_since() < response_received_time) {
1300 return response->status_get();
1301 }
1302
1303 response_code = HTTP_STATUS_NOT_MODIFIED;
1304 }
1305 }
1306
1307 // There is no If-none-match, and If-modified-since failed,
1308 // so return NOT_MODIFIED
1309 if (response_code != HTTP_STATUS_NONE) {
1310 return response_code;
1311 }
1312
1313 // If-Match: must match strongly //
1314 if (request->presence(MIME_PRESENCE_IF_MATCH)) {
1315 int raw_etags_len = 0;
1316 int comma_sep_tag_list_len = 0;
1317 const char *raw_etags = response->value_get(MIME_FIELD_ETAG, MIME_LEN_ETAG, &raw_etags_len);
1318 const char *comma_sep_tag_list = nullptr;
1319
1320 if (raw_etags) {
1321 comma_sep_tag_list = request->value_get(MIME_FIELD_IF_MATCH, MIME_LEN_IF_MATCH, &comma_sep_tag_list_len);
1322 }
1323
1324 if (!comma_sep_tag_list) {
1325 comma_sep_tag_list = "";
1326 comma_sep_tag_list_len = 0;
1327 }
1328
1329 if (!raw_etags) {
1330 raw_etags = "";
1331 raw_etags_len = 0;
1332 }
1333
1334 if (do_strings_match_strongly(raw_etags, raw_etags_len, comma_sep_tag_list, comma_sep_tag_list_len)) {
1335 return response->status_get();
1336 } else {
1337 return HTTP_STATUS_PRECONDITION_FAILED;
1338 }
1339 }
1340
1341 // If-Unmodified-Since //
1342 if (request->presence(MIME_PRESENCE_IF_UNMODIFIED_SINCE)) {
1343 // lm_value is zero if Last-modified not exists
1344 ink_time_t lm_value = response->get_last_modified();
1345
1346 // Condition fails if Last-modified not exists
1347 if ((request->get_if_unmodified_since() < lm_value) || (lm_value == 0)) {
1348 return HTTP_STATUS_PRECONDITION_FAILED;
1349 } else {
1350 response_code = response->status_get();
1351 }
1352 }
1353
1354 // There is no If-match, and If-unmodified-since passed,
1355 // so return the original response code
1356 if (response_code != HTTP_STATUS_NONE) {
1357 return response_code;
1358 }
1359
1360 // Handling If-Range header:
1361 // As Range && If-Range don't occur often, we want to put the
1362 // If-Range code in the end
1363 if (request->presence(MIME_PRESENCE_RANGE) && request->presence(MIME_PRESENCE_IF_RANGE)) {
1364 int raw_len, comma_sep_list_len;
1365
1366 const char *if_value = request->value_get(MIME_FIELD_IF_RANGE, MIME_LEN_IF_RANGE, &comma_sep_list_len);
1367
1368 // this is an ETag, similar to If-Match
1369 if (!if_value || if_value[0] == '"' || (comma_sep_list_len > 1 && if_value[1] == '/')) {
1370 if (!if_value) {
1371 if_value = "";
1372 comma_sep_list_len = 0;
1373 }
1374
1375 const char *raw_etags = response->value_get(MIME_FIELD_ETAG, MIME_LEN_ETAG, &raw_len);
1376
1377 if (!raw_etags) {
1378 raw_etags = "";
1379 raw_len = 0;
1380 }
1381
1382 if (do_strings_match_strongly(raw_etags, raw_len, if_value, comma_sep_list_len)) {
1383 return response->status_get();
1384 } else {
1385 return HTTP_STATUS_RANGE_NOT_SATISFIABLE;
1386 }
1387 }
1388 // this a Date, similar to If-Unmodified-Since but must be an exact match
1389 else {
1390 // lm_value is zero if Last-modified not exists
1391 ink_time_t lm_value = response->get_last_modified();
1392
1393 // condition fails if Last-modified not exists
1394 if ((request->get_if_range_date() != lm_value) || (lm_value == 0)) {
1395 return HTTP_STATUS_RANGE_NOT_SATISFIABLE;
1396 } else {
1397 return response->status_get();
1398 }
1399 }
1400 }
1401
1402 return response->status_get();
1403 }
1404