1 /** @file
2
3 A brief file description
4
5 @section license License
6
7 Licensed to the Apache Software Foundation (ASF) under one
8 or more contributor license agreements. See the NOTICE file
9 distributed with this work for additional information
10 regarding copyright ownership. The ASF licenses this file
11 to you under the Apache License, Version 2.0 (the
12 "License"); you may not use this file except in compliance
13 with the License. You may obtain a copy of the License at
14
15 http://www.apache.org/licenses/LICENSE-2.0
16
17 Unless required by applicable law or agreed to in writing, software
18 distributed under the License is distributed on an "AS IS" BASIS,
19 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
20 See the License for the specific language governing permissions and
21 limitations under the License.
22 */
23
24 #include "tscore/ink_platform.h"
25 #include "tscore/HashFNV.h"
26 #include "tscore/Diags.h"
27 #include "tscore/ink_memory.h"
28 #include <cstdio>
29 #include "tscore/Allocator.h"
30 #include "HTTP.h"
31 #include "HdrToken.h"
32 #include "MIME.h"
33 #include "tscore/Regex.h"
34 #include "URL.h"
35
36 /*
37 You SHOULD add to _hdrtoken_commonly_tokenized_strs, with the same ordering
38 ** important, ordering matters **
39
40 You want a regexp like 'Accept' after "greedier" choices so it doesn't match 'Accept-Ranges' earlier than
41 it should. The regexp are anchored (^Accept), but I dont see a way with the current system to
42 match the word ONLY without making _hdrtoken_strs a real PCRE, but then that breaks the hashing
43 hdrtoken_hash("^Accept$") != hdrtoken_hash("Accept")
44
45 So, the current hack is to have "Accept" follow "Accept-.*", lame, I know
46
47 /ericb
48 */
49
50 static const char *_hdrtoken_strs[] = {
51 // MIME Field names
52 "Accept-Charset", "Accept-Encoding", "Accept-Language", "Accept-Ranges", "Accept", "Age", "Allow",
53 "Approved", // NNTP
54 "Authorization",
55 "Bytes", // NNTP
56 "Cache-Control", "Client-ip", "Connection", "Content-Base", "Content-Encoding", "Content-Language", "Content-Length",
57 "Content-Location", "Content-MD5", "Content-Range", "Content-Type",
58 "Control", // NNTP
59 "Cookie", "Date",
60 "Distribution", // NNTP
61 "Etag", "Expect", "Expires",
62 "Followup-To", // NNTP
63 "From", "Host", "If-Match", "If-Modified-Since", "If-None-Match", "If-Range", "If-Unmodified-Since", "Keep-Alive",
64 "Keywords", // NNTP
65 "Last-Modified",
66 "Lines", // NNTP
67 "Location", "Max-Forwards",
68 "Message-ID", // NNTP
69 "MIME-Version",
70 "Newsgroups", // NNTP
71 "Organization", // NNTP
72 "Path", // NNTP
73 "Pragma", "Proxy-Authenticate", "Proxy-Authorization", "Proxy-Connection", "Public", "Range",
74 "References", // NNTP
75 "Referer",
76 "Reply-To", // NNTP
77 "Retry-After",
78 "Sender", // NNTP
79 "Server", "Set-Cookie",
80 "Subject", // NNTP
81 "Summary", // NNTP
82 "Transfer-Encoding", "Upgrade", "User-Agent", "Vary", "Via", "Warning", "Www-Authenticate",
83 "Xref", // NNTP
84 "@Ats-Internal", // Internal Hack
85
86 // Accept-Encoding
87 "compress", "deflate", "gzip", "identity",
88
89 // Cache-Control flags
90 "max-age", "max-stale", "min-fresh", "must-revalidate", "no-cache", "no-store", "no-transform", "only-if-cached", "private",
91 "proxy-revalidate", "s-maxage", "need-revalidate-once",
92
93 // HTTP miscellaneous
94 "none", "chunked", "close",
95
96 // WS
97 "websocket", "Sec-WebSocket-Key", "Sec-WebSocket-Version",
98
99 // HTTP/2 cleartext
100 MIME_UPGRADE_H2C_TOKEN, "HTTP2-Settings",
101
102 // URL schemes
103 "file", "ftp", "gopher", "https", "http", "mailto", "news", "nntp", "prospero", "telnet", "tunnel", "wais", "pnm", "rtspu",
104 "rtsp", "mmsu", "mmst", "mms", "wss", "ws",
105
106 // HTTP methods
107 "CONNECT", "DELETE", "GET", "POST", "HEAD", "OPTIONS", "PURGE", "PUT", "TRACE", "PUSH",
108
109 // Header extensions
110 "X-ID", "X-Forwarded-For", "TE", "Strict-Transport-Security", "100-continue",
111
112 // RFC-2739
113 "Forwarded",
114
115 // RFC-8470
116 "Early-Data"};
117
118 static HdrTokenTypeBinding _hdrtoken_strs_type_initializers[] = {
119 {"file", HDRTOKEN_TYPE_SCHEME},
120 {"ftp", HDRTOKEN_TYPE_SCHEME},
121 {"gopher", HDRTOKEN_TYPE_SCHEME},
122 {"http", HDRTOKEN_TYPE_SCHEME},
123 {"https", HDRTOKEN_TYPE_SCHEME},
124 {"mailto", HDRTOKEN_TYPE_SCHEME},
125 {"news", HDRTOKEN_TYPE_SCHEME},
126 {"nntp", HDRTOKEN_TYPE_SCHEME},
127 {"prospero", HDRTOKEN_TYPE_SCHEME},
128 {"telnet", HDRTOKEN_TYPE_SCHEME},
129 {"tunnel", HDRTOKEN_TYPE_SCHEME},
130 {"wais", HDRTOKEN_TYPE_SCHEME},
131 {"pnm", HDRTOKEN_TYPE_SCHEME},
132 {"rtsp", HDRTOKEN_TYPE_SCHEME},
133 {"rtspu", HDRTOKEN_TYPE_SCHEME},
134 {"mms", HDRTOKEN_TYPE_SCHEME},
135 {"mmsu", HDRTOKEN_TYPE_SCHEME},
136 {"mmst", HDRTOKEN_TYPE_SCHEME},
137 {"wss", HDRTOKEN_TYPE_SCHEME},
138 {"ws", HDRTOKEN_TYPE_SCHEME},
139
140 {"CONNECT", HDRTOKEN_TYPE_METHOD},
141 {"DELETE", HDRTOKEN_TYPE_METHOD},
142 {"GET", HDRTOKEN_TYPE_METHOD},
143 {"HEAD", HDRTOKEN_TYPE_METHOD},
144 {"OPTIONS", HDRTOKEN_TYPE_METHOD},
145 {"POST", HDRTOKEN_TYPE_METHOD},
146 {"PURGE", HDRTOKEN_TYPE_METHOD},
147 {"PUT", HDRTOKEN_TYPE_METHOD},
148 {"TRACE", HDRTOKEN_TYPE_METHOD},
149 {"PUSH", HDRTOKEN_TYPE_METHOD},
150
151 {"max-age", HDRTOKEN_TYPE_CACHE_CONTROL},
152 {"max-stale", HDRTOKEN_TYPE_CACHE_CONTROL},
153 {"min-fresh", HDRTOKEN_TYPE_CACHE_CONTROL},
154 {"must-revalidate", HDRTOKEN_TYPE_CACHE_CONTROL},
155 {"no-cache", HDRTOKEN_TYPE_CACHE_CONTROL},
156 {"no-store", HDRTOKEN_TYPE_CACHE_CONTROL},
157 {"no-transform", HDRTOKEN_TYPE_CACHE_CONTROL},
158 {"only-if-cached", HDRTOKEN_TYPE_CACHE_CONTROL},
159 {"private", HDRTOKEN_TYPE_CACHE_CONTROL},
160 {"proxy-revalidate", HDRTOKEN_TYPE_CACHE_CONTROL},
161 {"public", HDRTOKEN_TYPE_CACHE_CONTROL},
162 {"s-maxage", HDRTOKEN_TYPE_CACHE_CONTROL},
163 {"need-revalidate-once", HDRTOKEN_TYPE_CACHE_CONTROL},
164
165 {(char *)nullptr, static_cast<HdrTokenType>(0)},
166 };
167
168 static HdrTokenFieldInfo _hdrtoken_strs_field_initializers[] = {
169 {"Accept", MIME_SLOTID_ACCEPT, MIME_PRESENCE_ACCEPT, (HTIF_COMMAS | HTIF_MULTVALS)},
170 {"Accept-Charset", MIME_SLOTID_ACCEPT_CHARSET, MIME_PRESENCE_ACCEPT_CHARSET, (HTIF_COMMAS | HTIF_MULTVALS)},
171 {"Accept-Encoding", MIME_SLOTID_ACCEPT_ENCODING, MIME_PRESENCE_ACCEPT_ENCODING, (HTIF_COMMAS | HTIF_MULTVALS)},
172 {"Accept-Language", MIME_SLOTID_ACCEPT_LANGUAGE, MIME_PRESENCE_ACCEPT_LANGUAGE, (HTIF_COMMAS | HTIF_MULTVALS)},
173 {"Accept-Ranges", MIME_SLOTID_NONE, MIME_PRESENCE_ACCEPT_RANGES, (HTIF_COMMAS | HTIF_MULTVALS)},
174 {"Age", MIME_SLOTID_AGE, MIME_PRESENCE_AGE, HTIF_NONE},
175 {"Allow", MIME_SLOTID_NONE, MIME_PRESENCE_ALLOW, (HTIF_COMMAS | HTIF_MULTVALS)},
176 {"Approved", MIME_SLOTID_NONE, MIME_PRESENCE_NONE, HTIF_NONE},
177 {"Authorization", MIME_SLOTID_AUTHORIZATION, MIME_PRESENCE_AUTHORIZATION, HTIF_NONE},
178 {"Bytes", MIME_SLOTID_NONE, MIME_PRESENCE_BYTES, HTIF_NONE},
179 {"Cache-Control", MIME_SLOTID_CACHE_CONTROL, MIME_PRESENCE_CACHE_CONTROL, (HTIF_COMMAS | HTIF_MULTVALS)},
180 {"Client-ip", MIME_SLOTID_CLIENT_IP, MIME_PRESENCE_CLIENT_IP, HTIF_NONE},
181 {"Connection", MIME_SLOTID_CONNECTION, MIME_PRESENCE_CONNECTION, (HTIF_COMMAS | HTIF_MULTVALS | HTIF_HOPBYHOP)},
182 {"Content-Base", MIME_SLOTID_NONE, MIME_PRESENCE_NONE, HTIF_NONE},
183 {"Content-Encoding", MIME_SLOTID_CONTENT_ENCODING, MIME_PRESENCE_CONTENT_ENCODING, (HTIF_COMMAS | HTIF_MULTVALS)},
184 {"Content-Language", MIME_SLOTID_CONTENT_LANGUAGE, MIME_PRESENCE_CONTENT_LANGUAGE, (HTIF_COMMAS | HTIF_MULTVALS)},
185 {"Content-Length", MIME_SLOTID_CONTENT_LENGTH, MIME_PRESENCE_CONTENT_LENGTH, HTIF_NONE},
186 {"Content-Location", MIME_SLOTID_NONE, MIME_PRESENCE_CONTENT_LOCATION, HTIF_NONE},
187 {"Content-MD5", MIME_SLOTID_NONE, MIME_PRESENCE_CONTENT_MD5, HTIF_NONE},
188 {"Content-Range", MIME_SLOTID_NONE, MIME_PRESENCE_CONTENT_RANGE, HTIF_NONE},
189 {"Content-Type", MIME_SLOTID_CONTENT_TYPE, MIME_PRESENCE_CONTENT_TYPE, HTIF_NONE},
190 {"Control", MIME_SLOTID_NONE, MIME_PRESENCE_NONE, HTIF_NONE},
191 {"Cookie", MIME_SLOTID_COOKIE, MIME_PRESENCE_COOKIE, (HTIF_MULTVALS)},
192 {"Date", MIME_SLOTID_DATE, MIME_PRESENCE_DATE, HTIF_NONE},
193 {"Distribution", MIME_SLOTID_NONE, MIME_PRESENCE_NONE, HTIF_NONE},
194 {"Etag", MIME_SLOTID_NONE, MIME_PRESENCE_ETAG, HTIF_NONE},
195 {"Expires", MIME_SLOTID_EXPIRES, MIME_PRESENCE_EXPIRES, HTIF_NONE},
196 {"Followup-To", MIME_SLOTID_NONE, MIME_PRESENCE_NONE, HTIF_NONE},
197 {"From", MIME_SLOTID_NONE, MIME_PRESENCE_FROM, HTIF_NONE},
198 {"Host", MIME_SLOTID_NONE, MIME_PRESENCE_HOST, HTIF_NONE},
199 {"If-Match", MIME_SLOTID_IF_MATCH, MIME_PRESENCE_IF_MATCH, (HTIF_COMMAS | HTIF_MULTVALS)},
200 {"If-Modified-Since", MIME_SLOTID_IF_MODIFIED_SINCE, MIME_PRESENCE_IF_MODIFIED_SINCE, HTIF_NONE},
201 {"If-None-Match", MIME_SLOTID_IF_NONE_MATCH, MIME_PRESENCE_IF_NONE_MATCH, (HTIF_COMMAS | HTIF_MULTVALS)},
202 {"If-Range", MIME_SLOTID_IF_RANGE, MIME_PRESENCE_IF_RANGE, HTIF_NONE},
203 {"If-Unmodified-Since", MIME_SLOTID_IF_UNMODIFIED_SINCE, MIME_PRESENCE_IF_UNMODIFIED_SINCE, HTIF_NONE},
204 {"Keep-Alive", MIME_SLOTID_NONE, MIME_PRESENCE_KEEP_ALIVE, (HTIF_HOPBYHOP)},
205 {"Keywords", MIME_SLOTID_NONE, MIME_PRESENCE_KEYWORDS, HTIF_NONE},
206 {"Last-Modified", MIME_SLOTID_LAST_MODIFIED, MIME_PRESENCE_LAST_MODIFIED, HTIF_NONE},
207 {"Lines", MIME_SLOTID_NONE, MIME_PRESENCE_LINES, HTIF_NONE},
208 {"Location", MIME_SLOTID_NONE, MIME_PRESENCE_LOCATION, (HTIF_MULTVALS)},
209 {"Max-Forwards", MIME_SLOTID_NONE, MIME_PRESENCE_MAX_FORWARDS, HTIF_NONE},
210 {"Message-ID", MIME_SLOTID_NONE, MIME_PRESENCE_NONE, HTIF_NONE},
211 {"Newsgroups", MIME_SLOTID_NONE, MIME_PRESENCE_NONE, HTIF_NONE},
212 {"Organization", MIME_SLOTID_NONE, MIME_PRESENCE_NONE, HTIF_NONE},
213 {"Path", MIME_SLOTID_NONE, MIME_PRESENCE_PATH, HTIF_NONE},
214 {"Pragma", MIME_SLOTID_PRAGMA, MIME_PRESENCE_PRAGMA, (HTIF_COMMAS | HTIF_MULTVALS)},
215 {"Proxy-Authenticate", MIME_SLOTID_NONE, MIME_PRESENCE_PROXY_AUTHENTICATE, (HTIF_HOPBYHOP | HTIF_PROXYAUTH)},
216 {"Proxy-Authorization", MIME_SLOTID_NONE, MIME_PRESENCE_PROXY_AUTHORIZATION, (HTIF_HOPBYHOP | HTIF_PROXYAUTH)},
217 {"Proxy-Connection", MIME_SLOTID_PROXY_CONNECTION, MIME_PRESENCE_PROXY_CONNECTION, (HTIF_COMMAS | HTIF_MULTVALS | HTIF_HOPBYHOP)},
218 {"Public", MIME_SLOTID_NONE, MIME_PRESENCE_PUBLIC, (HTIF_COMMAS | HTIF_MULTVALS)},
219 {"Range", MIME_SLOTID_RANGE, MIME_PRESENCE_RANGE, (HTIF_COMMAS | HTIF_MULTVALS)},
220 {"References", MIME_SLOTID_NONE, MIME_PRESENCE_NONE, HTIF_NONE},
221 {"Referer", MIME_SLOTID_NONE, MIME_PRESENCE_REFERER, HTIF_NONE},
222 {"Reply-To", MIME_SLOTID_NONE, MIME_PRESENCE_NONE, HTIF_NONE},
223 {"Retry-After", MIME_SLOTID_NONE, MIME_PRESENCE_NONE, HTIF_NONE},
224 {"Sender", MIME_SLOTID_NONE, MIME_PRESENCE_NONE, HTIF_NONE},
225 {"Server", MIME_SLOTID_NONE, MIME_PRESENCE_SERVER, HTIF_NONE},
226 {"Set-Cookie", MIME_SLOTID_SET_COOKIE, MIME_PRESENCE_SET_COOKIE, (HTIF_MULTVALS)},
227 {"Strict-Transport-Security", MIME_SLOTID_NONE, MIME_PRESENCE_NONE, (HTIF_MULTVALS)},
228 {"Subject", MIME_SLOTID_NONE, MIME_PRESENCE_SUBJECT, HTIF_NONE},
229 {"Summary", MIME_SLOTID_NONE, MIME_PRESENCE_SUMMARY, HTIF_NONE},
230 {"TE", MIME_SLOTID_TE, MIME_PRESENCE_TE, (HTIF_COMMAS | HTIF_MULTVALS | HTIF_HOPBYHOP)},
231 {"Transfer-Encoding", MIME_SLOTID_TRANSFER_ENCODING, MIME_PRESENCE_TRANSFER_ENCODING,
232 (HTIF_COMMAS | HTIF_MULTVALS | HTIF_HOPBYHOP)},
233 {"Upgrade", MIME_SLOTID_NONE, MIME_PRESENCE_UPGRADE, (HTIF_COMMAS | HTIF_MULTVALS | HTIF_HOPBYHOP)},
234 {"User-Agent", MIME_SLOTID_USER_AGENT, MIME_PRESENCE_USER_AGENT, HTIF_NONE},
235 {"Vary", MIME_SLOTID_VARY, MIME_PRESENCE_VARY, (HTIF_COMMAS | HTIF_MULTVALS)},
236 {"Via", MIME_SLOTID_VIA, MIME_PRESENCE_VIA, (HTIF_COMMAS | HTIF_MULTVALS)},
237 {"Warning", MIME_SLOTID_NONE, MIME_PRESENCE_WARNING, (HTIF_COMMAS | HTIF_MULTVALS)},
238 {"Www-Authenticate", MIME_SLOTID_WWW_AUTHENTICATE, MIME_PRESENCE_WWW_AUTHENTICATE, HTIF_NONE},
239 {"Xref", MIME_SLOTID_NONE, MIME_PRESENCE_XREF, HTIF_NONE},
240 {"X-ID", MIME_SLOTID_NONE, MIME_PRESENCE_NONE, (HTIF_COMMAS | HTIF_MULTVALS | HTIF_HOPBYHOP)},
241 {"X-Forwarded-For", MIME_SLOTID_NONE, MIME_PRESENCE_NONE, (HTIF_COMMAS | HTIF_MULTVALS)},
242 {"Forwarded", MIME_SLOTID_NONE, MIME_PRESENCE_NONE, (HTIF_COMMAS | HTIF_MULTVALS)},
243 {"Sec-WebSocket-Key", MIME_SLOTID_NONE, MIME_PRESENCE_NONE, HTIF_NONE},
244 {"Sec-WebSocket-Version", MIME_SLOTID_NONE, MIME_PRESENCE_NONE, HTIF_NONE},
245 {nullptr, 0, 0, 0},
246 };
247
248 const char *_hdrtoken_strs_heap_f = nullptr; // storage first byte
249 const char *_hdrtoken_strs_heap_l = nullptr; // storage last byte
250
251 int hdrtoken_num_wks = SIZEOF(_hdrtoken_strs); // # of well-known strings
252
253 const char *hdrtoken_strs[SIZEOF(_hdrtoken_strs)]; // wks_idx -> heap ptr
254 int hdrtoken_str_lengths[SIZEOF(_hdrtoken_strs)]; // wks_idx -> length
255 HdrTokenType hdrtoken_str_token_types[SIZEOF(_hdrtoken_strs)]; // wks_idx -> token type
256 int32_t hdrtoken_str_slotids[SIZEOF(_hdrtoken_strs)]; // wks_idx -> slot id
257 uint64_t hdrtoken_str_masks[SIZEOF(_hdrtoken_strs)]; // wks_idx -> presence mask
258 uint32_t hdrtoken_str_flags[SIZEOF(_hdrtoken_strs)]; // wks_idx -> flags
259
260 DFA *hdrtoken_strs_dfa = nullptr;
261
262 /***********************************************************************
263 * *
264 * H A S H T A B L E *
265 * *
266 ***********************************************************************/
267
268 #define HDRTOKEN_HASH_TABLE_SIZE 65536
269
270 struct HdrTokenHashBucket {
271 const char *wks;
272 uint32_t hash;
273 };
274
275 HdrTokenHashBucket hdrtoken_hash_table[HDRTOKEN_HASH_TABLE_SIZE];
276
277 /**
278 basic FNV hash
279 **/
280 #define TINY_MASK(x) (((uint32_t)1 << (x)) - 1)
281
282 inline uint32_t
hash_to_slot(uint32_t hash)283 hash_to_slot(uint32_t hash)
284 {
285 return ((hash >> 15) ^ hash) & TINY_MASK(15);
286 }
287
288 inline uint32_t
hdrtoken_hash(const unsigned char * string,unsigned int length)289 hdrtoken_hash(const unsigned char *string, unsigned int length)
290 {
291 ATSHash32FNV1a fnv;
292 fnv.update(string, length, ATSHash::nocase());
293 fnv.final();
294 return fnv.get();
295 }
296
297 /*-------------------------------------------------------------------------
298 -------------------------------------------------------------------------*/
299
300 // WARNING: Indexes into this array are stored on disk for cached objects. New strings must be added at the end of the array to
301 // avoid changing the indexes of pre-existing entries, unless the cache format version number is increased.
302 //
303 static const char *_hdrtoken_commonly_tokenized_strs[] = {
304 // MIME Field names
305 "Accept-Charset", "Accept-Encoding", "Accept-Language", "Accept-Ranges", "Accept", "Age", "Allow",
306 "Approved", // NNTP
307 "Authorization",
308 "Bytes", // NNTP
309 "Cache-Control", "Client-ip", "Connection", "Content-Base", "Content-Encoding", "Content-Language", "Content-Length",
310 "Content-Location", "Content-MD5", "Content-Range", "Content-Type",
311 "Control", // NNTP
312 "Cookie", "Date",
313 "Distribution", // NNTP
314 "Etag", "Expect", "Expires",
315 "Followup-To", // NNTP
316 "From", "Host", "If-Match", "If-Modified-Since", "If-None-Match", "If-Range", "If-Unmodified-Since", "Keep-Alive",
317 "Keywords", // NNTP
318 "Last-Modified",
319 "Lines", // NNTP
320 "Location", "Max-Forwards",
321 "Message-ID", // NNTP
322 "MIME-Version",
323 "Newsgroups", // NNTP
324 "Organization", // NNTP
325 "Path", // NNTP
326 "Pragma", "Proxy-Authenticate", "Proxy-Authorization", "Proxy-Connection", "Public", "Range",
327 "References", // NNTP
328 "Referer",
329 "Reply-To", // NNTP
330 "Retry-After",
331 "Sender", // NNTP
332 "Server", "Set-Cookie",
333 "Subject", // NNTP
334 "Summary", // NNTP
335 "Transfer-Encoding", "Upgrade", "User-Agent", "Vary", "Via", "Warning", "Www-Authenticate",
336 "Xref", // NNTP
337 "@Ats-Internal", // Internal Hack
338
339 // Accept-Encoding
340 "compress", "deflate", "gzip", "identity",
341
342 // Cache-Control flags
343 "max-age", "max-stale", "min-fresh", "must-revalidate", "no-cache", "no-store", "no-transform", "only-if-cached", "private",
344 "proxy-revalidate", "s-maxage", "need-revalidate-once",
345
346 // HTTP miscellaneous
347 "none", "chunked", "close",
348
349 // WS
350 "websocket", "Sec-WebSocket-Key", "Sec-WebSocket-Version",
351
352 // HTTP/2 cleartext
353 MIME_UPGRADE_H2C_TOKEN, "HTTP2-Settings",
354
355 // URL schemes
356 "file", "ftp", "gopher", "https", "http", "mailto", "news", "nntp", "prospero", "telnet", "tunnel", "wais", "pnm", "rtspu",
357 "rtsp", "mmsu", "mmst", "mms", "wss", "ws",
358
359 // HTTP methods
360 "CONNECT", "DELETE", "GET", "POST", "HEAD", "OPTIONS", "PURGE", "PUT", "TRACE", "PUSH",
361
362 // Header extensions
363 "X-ID", "X-Forwarded-For", "TE", "Strict-Transport-Security", "100-continue",
364
365 // RFC-2739
366 "Forwarded",
367
368 // RFC-8470
369 "Early-Data"};
370
371 /*-------------------------------------------------------------------------
372 -------------------------------------------------------------------------*/
373
374 void
hdrtoken_hash_init()375 hdrtoken_hash_init()
376 {
377 uint32_t i;
378 int num_collisions;
379
380 memset(hdrtoken_hash_table, 0, sizeof(hdrtoken_hash_table));
381 num_collisions = 0;
382
383 for (i = 0; i < static_cast<int> SIZEOF(_hdrtoken_commonly_tokenized_strs); i++) {
384 // convert the common string to the well-known token
385 unsigned const char *wks;
386 int wks_idx =
387 hdrtoken_tokenize_dfa(_hdrtoken_commonly_tokenized_strs[i], static_cast<int>(strlen(_hdrtoken_commonly_tokenized_strs[i])),
388 reinterpret_cast<const char **>(&wks));
389 ink_release_assert(wks_idx >= 0);
390
391 uint32_t hash = hdrtoken_hash(wks, hdrtoken_str_lengths[wks_idx]);
392 uint32_t slot = hash_to_slot(hash);
393
394 if (hdrtoken_hash_table[slot].wks) {
395 printf("ERROR: hdrtoken_hash_table[%u] collision: '%s' replacing '%s'\n", slot, reinterpret_cast<const char *>(wks),
396 hdrtoken_hash_table[slot].wks);
397 ++num_collisions;
398 }
399 hdrtoken_hash_table[slot].wks = reinterpret_cast<const char *>(wks);
400 hdrtoken_hash_table[slot].hash = hash;
401 }
402
403 if (num_collisions > 0) {
404 abort();
405 }
406 }
407
408 /***********************************************************************
409 * *
410 * M A I N H D R T O K E N C O D E *
411 * *
412 ***********************************************************************/
413
414 /**
415 @return returns 0 for n=0, unit*n for n <= unit
416 */
417
418 static inline unsigned int
snap_up_to_multiple(unsigned int n,unsigned int unit)419 snap_up_to_multiple(unsigned int n, unsigned int unit)
420 {
421 return ((n + (unit - 1)) / unit) * unit;
422 }
423
424 /**
425 */
426 void
hdrtoken_init()427 hdrtoken_init()
428 {
429 static int inited = 0;
430
431 int i;
432
433 if (!inited) {
434 inited = 1;
435
436 hdrtoken_strs_dfa = new DFA;
437 hdrtoken_strs_dfa->compile(_hdrtoken_strs, SIZEOF(_hdrtoken_strs), (RE_CASE_INSENSITIVE));
438
439 // all the tokenized hdrtoken strings are placed in a special heap,
440 // and each string is prepended with a HdrTokenHeapPrefix ---
441 // this makes it easy to tell that a string is a tokenized
442 // string (because its address is within the heap), and
443 // makes it easy to find the length, index, flags, mask, and
444 // other info from the prefix.
445
446 int heap_size = 0;
447 for (i = 0; i < static_cast<int> SIZEOF(_hdrtoken_strs); i++) {
448 hdrtoken_str_lengths[i] = static_cast<int>(strlen(_hdrtoken_strs[i]));
449 int sstr_len = snap_up_to_multiple(hdrtoken_str_lengths[i] + 1, sizeof(HdrTokenHeapPrefix));
450 int packed_prefix_str_len = sizeof(HdrTokenHeapPrefix) + sstr_len;
451 heap_size += packed_prefix_str_len;
452 }
453
454 _hdrtoken_strs_heap_f = static_cast<const char *>(ats_malloc(heap_size));
455 _hdrtoken_strs_heap_l = _hdrtoken_strs_heap_f + heap_size - 1;
456
457 char *heap_ptr = const_cast<char *>(_hdrtoken_strs_heap_f);
458
459 for (i = 0; i < static_cast<int> SIZEOF(_hdrtoken_strs); i++) {
460 HdrTokenHeapPrefix prefix;
461
462 memset(&prefix, 0, sizeof(HdrTokenHeapPrefix));
463
464 prefix.wks_idx = i;
465 prefix.wks_length = hdrtoken_str_lengths[i];
466 prefix.wks_token_type = HDRTOKEN_TYPE_OTHER; // default, can override later
467 prefix.wks_info.name = nullptr; // default, can override later
468 prefix.wks_info.slotid = MIME_SLOTID_NONE; // default, can override later
469 prefix.wks_info.mask = TOK_64_CONST(0); // default, can override later
470 prefix.wks_info.flags = HTIF_MULTVALS; // default, can override later
471
472 int sstr_len = snap_up_to_multiple(hdrtoken_str_lengths[i] + 1, sizeof(HdrTokenHeapPrefix));
473
474 *reinterpret_cast<HdrTokenHeapPrefix *>(heap_ptr) = prefix; // set string prefix
475 heap_ptr += sizeof(HdrTokenHeapPrefix); // advance heap ptr past index
476 hdrtoken_strs[i] = heap_ptr; // record string pointer
477 // coverity[secure_coding]
478 ink_strlcpy(const_cast<char *>(hdrtoken_strs[i]), _hdrtoken_strs[i],
479 heap_size - sizeof(HdrTokenHeapPrefix)); // copy string into heap
480 heap_ptr += sstr_len; // advance heap ptr past string
481 heap_size -= sstr_len;
482 }
483
484 // Set the token types for certain tokens
485 for (i = 0; _hdrtoken_strs_type_initializers[i].name != nullptr; i++) {
486 int wks_idx;
487 HdrTokenHeapPrefix *prefix;
488
489 wks_idx = hdrtoken_tokenize_dfa(_hdrtoken_strs_type_initializers[i].name,
490 static_cast<int>(strlen(_hdrtoken_strs_type_initializers[i].name)));
491
492 ink_assert((wks_idx >= 0) && (wks_idx < (int)SIZEOF(hdrtoken_strs)));
493 // coverity[negative_returns]
494 prefix = hdrtoken_index_to_prefix(wks_idx);
495 prefix->wks_token_type = _hdrtoken_strs_type_initializers[i].type;
496 }
497
498 // Set special data for field names
499 for (i = 0; _hdrtoken_strs_field_initializers[i].name != nullptr; i++) {
500 int wks_idx;
501 HdrTokenHeapPrefix *prefix;
502
503 wks_idx = hdrtoken_tokenize_dfa(_hdrtoken_strs_field_initializers[i].name,
504 static_cast<int>(strlen(_hdrtoken_strs_field_initializers[i].name)));
505
506 ink_assert((wks_idx >= 0) && (wks_idx < (int)SIZEOF(hdrtoken_strs)));
507 prefix = hdrtoken_index_to_prefix(wks_idx);
508 prefix->wks_info.slotid = _hdrtoken_strs_field_initializers[i].slotid;
509 prefix->wks_info.flags = _hdrtoken_strs_field_initializers[i].flags;
510 prefix->wks_info.mask = _hdrtoken_strs_field_initializers[i].mask;
511 }
512
513 for (i = 0; i < static_cast<int> SIZEOF(_hdrtoken_strs); i++) {
514 HdrTokenHeapPrefix *prefix = hdrtoken_index_to_prefix(i);
515 prefix->wks_info.name = hdrtoken_strs[i];
516 hdrtoken_str_token_types[i] = prefix->wks_token_type; // parallel array for speed
517 hdrtoken_str_slotids[i] = prefix->wks_info.slotid; // parallel array for speed
518 hdrtoken_str_masks[i] = prefix->wks_info.mask; // parallel array for speed
519 hdrtoken_str_flags[i] = prefix->wks_info.flags; // parallel array for speed
520 }
521
522 hdrtoken_hash_init();
523 }
524 }
525
526 /*-------------------------------------------------------------------------
527 -------------------------------------------------------------------------*/
528
529 int
hdrtoken_tokenize_dfa(const char * string,int string_len,const char ** wks_string_out)530 hdrtoken_tokenize_dfa(const char *string, int string_len, const char **wks_string_out)
531 {
532 int wks_idx;
533
534 wks_idx = hdrtoken_strs_dfa->match({string, size_t(string_len)});
535
536 if (wks_idx < 0) {
537 wks_idx = -1;
538 }
539 if (wks_string_out) {
540 if (wks_idx >= 0) {
541 *wks_string_out = hdrtoken_index_to_wks(wks_idx);
542 } else {
543 *wks_string_out = nullptr;
544 }
545 }
546 // printf("hdrtoken_tokenize_dfa(%d,*s) - return %d\n",string_len,string,wks_idx);
547
548 return wks_idx;
549 }
550
551 /*-------------------------------------------------------------------------
552 Have to work around that methods are case insensitive while the DFA is
553 case insensitive.
554 -------------------------------------------------------------------------*/
555
556 int
hdrtoken_method_tokenize(const char * string,int string_len)557 hdrtoken_method_tokenize(const char *string, int string_len)
558 {
559 const char *string_out;
560 int retval = -1;
561 if (hdrtoken_is_wks(string)) {
562 retval = hdrtoken_wks_to_index(string);
563 return retval;
564 }
565 retval = hdrtoken_tokenize(string, string_len, &string_out);
566 if (retval >= 0) {
567 if (strncmp(string, string_out, string_len) != 0) {
568 // Not a case match
569 retval = -1;
570 }
571 }
572 return retval;
573 }
574
575 /*-------------------------------------------------------------------------
576 -------------------------------------------------------------------------*/
577
578 int
hdrtoken_tokenize(const char * string,int string_len,const char ** wks_string_out)579 hdrtoken_tokenize(const char *string, int string_len, const char **wks_string_out)
580 {
581 int wks_idx;
582 HdrTokenHashBucket *bucket;
583
584 ink_assert(string != nullptr);
585
586 if (hdrtoken_is_wks(string)) {
587 wks_idx = hdrtoken_wks_to_index(string);
588 if (wks_string_out) {
589 *wks_string_out = string;
590 }
591 return wks_idx;
592 }
593
594 uint32_t hash = hdrtoken_hash(reinterpret_cast<const unsigned char *>(string), static_cast<unsigned int>(string_len));
595 uint32_t slot = hash_to_slot(hash);
596
597 bucket = &(hdrtoken_hash_table[slot]);
598 if ((bucket->wks != nullptr) && (bucket->hash == hash) && (hdrtoken_wks_to_length(bucket->wks) == string_len)) {
599 wks_idx = hdrtoken_wks_to_index(bucket->wks);
600 if (wks_string_out) {
601 *wks_string_out = bucket->wks;
602 }
603 return wks_idx;
604 }
605
606 Debug("hdr_token", "Did not find a WKS for '%.*s'", string_len, string);
607 return -1;
608 }
609
610 /*-------------------------------------------------------------------------
611 -------------------------------------------------------------------------*/
612
613 const char *
hdrtoken_string_to_wks(const char * string)614 hdrtoken_string_to_wks(const char *string)
615 {
616 const char *wks = nullptr;
617 hdrtoken_tokenize(string, static_cast<int>(strlen(string)), &wks);
618 return wks;
619 }
620
621 /*-------------------------------------------------------------------------
622 -------------------------------------------------------------------------*/
623
624 const char *
hdrtoken_string_to_wks(const char * string,int length)625 hdrtoken_string_to_wks(const char *string, int length)
626 {
627 const char *wks = nullptr;
628 hdrtoken_tokenize(string, length, &wks);
629 return wks;
630 }
631