1 /** @file
2 
3   A brief file description
4 
5   @section license License
6 
7   Licensed to the Apache Software Foundation (ASF) under one
8   or more contributor license agreements.  See the NOTICE file
9   distributed with this work for additional information
10   regarding copyright ownership.  The ASF licenses this file
11   to you under the Apache License, Version 2.0 (the
12   "License"); you may not use this file except in compliance
13   with the License.  You may obtain a copy of the License at
14 
15       http://www.apache.org/licenses/LICENSE-2.0
16 
17   Unless required by applicable law or agreed to in writing, software
18   distributed under the License is distributed on an "AS IS" BASIS,
19   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
20   See the License for the specific language governing permissions and
21   limitations under the License.
22  */
23 
24 #include "tscore/ink_platform.h"
25 #include "tscore/HashFNV.h"
26 #include "tscore/Diags.h"
27 #include "tscore/ink_memory.h"
28 #include <cstdio>
29 #include "tscore/Allocator.h"
30 #include "HTTP.h"
31 #include "HdrToken.h"
32 #include "MIME.h"
33 #include "tscore/Regex.h"
34 #include "URL.h"
35 
36 /*
37  You SHOULD add to _hdrtoken_commonly_tokenized_strs, with the same ordering
38  ** important, ordering matters **
39 
40  You want a regexp like 'Accept' after "greedier" choices so it doesn't match 'Accept-Ranges' earlier than
41  it should. The regexp are anchored (^Accept), but I dont see a way with the current system to
42  match the word ONLY without making _hdrtoken_strs a real PCRE, but then that breaks the hashing
43  hdrtoken_hash("^Accept$") != hdrtoken_hash("Accept")
44 
45  So, the current hack is to have "Accept" follow "Accept-.*", lame, I know
46 
47   /ericb
48 */
49 
50 static const char *_hdrtoken_strs[] = {
51   // MIME Field names
52   "Accept-Charset", "Accept-Encoding", "Accept-Language", "Accept-Ranges", "Accept", "Age", "Allow",
53   "Approved", // NNTP
54   "Authorization",
55   "Bytes", // NNTP
56   "Cache-Control", "Client-ip", "Connection", "Content-Base", "Content-Encoding", "Content-Language", "Content-Length",
57   "Content-Location", "Content-MD5", "Content-Range", "Content-Type",
58   "Control", // NNTP
59   "Cookie", "Date",
60   "Distribution", // NNTP
61   "Etag", "Expect", "Expires",
62   "Followup-To", // NNTP
63   "From", "Host", "If-Match", "If-Modified-Since", "If-None-Match", "If-Range", "If-Unmodified-Since", "Keep-Alive",
64   "Keywords", // NNTP
65   "Last-Modified",
66   "Lines", // NNTP
67   "Location", "Max-Forwards",
68   "Message-ID", // NNTP
69   "MIME-Version",
70   "Newsgroups",   // NNTP
71   "Organization", // NNTP
72   "Path",         // NNTP
73   "Pragma", "Proxy-Authenticate", "Proxy-Authorization", "Proxy-Connection", "Public", "Range",
74   "References", // NNTP
75   "Referer",
76   "Reply-To", // NNTP
77   "Retry-After",
78   "Sender", // NNTP
79   "Server", "Set-Cookie",
80   "Subject", // NNTP
81   "Summary", // NNTP
82   "Transfer-Encoding", "Upgrade", "User-Agent", "Vary", "Via", "Warning", "Www-Authenticate",
83   "Xref",          // NNTP
84   "@Ats-Internal", // Internal Hack
85 
86   // Accept-Encoding
87   "compress", "deflate", "gzip", "identity",
88 
89   // Cache-Control flags
90   "max-age", "max-stale", "min-fresh", "must-revalidate", "no-cache", "no-store", "no-transform", "only-if-cached", "private",
91   "proxy-revalidate", "s-maxage", "need-revalidate-once",
92 
93   // HTTP miscellaneous
94   "none", "chunked", "close",
95 
96   // WS
97   "websocket", "Sec-WebSocket-Key", "Sec-WebSocket-Version",
98 
99   // HTTP/2 cleartext
100   MIME_UPGRADE_H2C_TOKEN, "HTTP2-Settings",
101 
102   // URL schemes
103   "file", "ftp", "gopher", "https", "http", "mailto", "news", "nntp", "prospero", "telnet", "tunnel", "wais", "pnm", "rtspu",
104   "rtsp", "mmsu", "mmst", "mms", "wss", "ws",
105 
106   // HTTP methods
107   "CONNECT", "DELETE", "GET", "POST", "HEAD", "OPTIONS", "PURGE", "PUT", "TRACE", "PUSH",
108 
109   // Header extensions
110   "X-ID", "X-Forwarded-For", "TE", "Strict-Transport-Security", "100-continue",
111 
112   // RFC-2739
113   "Forwarded",
114 
115   // RFC-8470
116   "Early-Data"};
117 
118 static HdrTokenTypeBinding _hdrtoken_strs_type_initializers[] = {
119   {"file", HDRTOKEN_TYPE_SCHEME},
120   {"ftp", HDRTOKEN_TYPE_SCHEME},
121   {"gopher", HDRTOKEN_TYPE_SCHEME},
122   {"http", HDRTOKEN_TYPE_SCHEME},
123   {"https", HDRTOKEN_TYPE_SCHEME},
124   {"mailto", HDRTOKEN_TYPE_SCHEME},
125   {"news", HDRTOKEN_TYPE_SCHEME},
126   {"nntp", HDRTOKEN_TYPE_SCHEME},
127   {"prospero", HDRTOKEN_TYPE_SCHEME},
128   {"telnet", HDRTOKEN_TYPE_SCHEME},
129   {"tunnel", HDRTOKEN_TYPE_SCHEME},
130   {"wais", HDRTOKEN_TYPE_SCHEME},
131   {"pnm", HDRTOKEN_TYPE_SCHEME},
132   {"rtsp", HDRTOKEN_TYPE_SCHEME},
133   {"rtspu", HDRTOKEN_TYPE_SCHEME},
134   {"mms", HDRTOKEN_TYPE_SCHEME},
135   {"mmsu", HDRTOKEN_TYPE_SCHEME},
136   {"mmst", HDRTOKEN_TYPE_SCHEME},
137   {"wss", HDRTOKEN_TYPE_SCHEME},
138   {"ws", HDRTOKEN_TYPE_SCHEME},
139 
140   {"CONNECT", HDRTOKEN_TYPE_METHOD},
141   {"DELETE", HDRTOKEN_TYPE_METHOD},
142   {"GET", HDRTOKEN_TYPE_METHOD},
143   {"HEAD", HDRTOKEN_TYPE_METHOD},
144   {"OPTIONS", HDRTOKEN_TYPE_METHOD},
145   {"POST", HDRTOKEN_TYPE_METHOD},
146   {"PURGE", HDRTOKEN_TYPE_METHOD},
147   {"PUT", HDRTOKEN_TYPE_METHOD},
148   {"TRACE", HDRTOKEN_TYPE_METHOD},
149   {"PUSH", HDRTOKEN_TYPE_METHOD},
150 
151   {"max-age", HDRTOKEN_TYPE_CACHE_CONTROL},
152   {"max-stale", HDRTOKEN_TYPE_CACHE_CONTROL},
153   {"min-fresh", HDRTOKEN_TYPE_CACHE_CONTROL},
154   {"must-revalidate", HDRTOKEN_TYPE_CACHE_CONTROL},
155   {"no-cache", HDRTOKEN_TYPE_CACHE_CONTROL},
156   {"no-store", HDRTOKEN_TYPE_CACHE_CONTROL},
157   {"no-transform", HDRTOKEN_TYPE_CACHE_CONTROL},
158   {"only-if-cached", HDRTOKEN_TYPE_CACHE_CONTROL},
159   {"private", HDRTOKEN_TYPE_CACHE_CONTROL},
160   {"proxy-revalidate", HDRTOKEN_TYPE_CACHE_CONTROL},
161   {"public", HDRTOKEN_TYPE_CACHE_CONTROL},
162   {"s-maxage", HDRTOKEN_TYPE_CACHE_CONTROL},
163   {"need-revalidate-once", HDRTOKEN_TYPE_CACHE_CONTROL},
164 
165   {(char *)nullptr, static_cast<HdrTokenType>(0)},
166 };
167 
168 static HdrTokenFieldInfo _hdrtoken_strs_field_initializers[] = {
169   {"Accept", MIME_SLOTID_ACCEPT, MIME_PRESENCE_ACCEPT, (HTIF_COMMAS | HTIF_MULTVALS)},
170   {"Accept-Charset", MIME_SLOTID_ACCEPT_CHARSET, MIME_PRESENCE_ACCEPT_CHARSET, (HTIF_COMMAS | HTIF_MULTVALS)},
171   {"Accept-Encoding", MIME_SLOTID_ACCEPT_ENCODING, MIME_PRESENCE_ACCEPT_ENCODING, (HTIF_COMMAS | HTIF_MULTVALS)},
172   {"Accept-Language", MIME_SLOTID_ACCEPT_LANGUAGE, MIME_PRESENCE_ACCEPT_LANGUAGE, (HTIF_COMMAS | HTIF_MULTVALS)},
173   {"Accept-Ranges", MIME_SLOTID_NONE, MIME_PRESENCE_ACCEPT_RANGES, (HTIF_COMMAS | HTIF_MULTVALS)},
174   {"Age", MIME_SLOTID_AGE, MIME_PRESENCE_AGE, HTIF_NONE},
175   {"Allow", MIME_SLOTID_NONE, MIME_PRESENCE_ALLOW, (HTIF_COMMAS | HTIF_MULTVALS)},
176   {"Approved", MIME_SLOTID_NONE, MIME_PRESENCE_NONE, HTIF_NONE},
177   {"Authorization", MIME_SLOTID_AUTHORIZATION, MIME_PRESENCE_AUTHORIZATION, HTIF_NONE},
178   {"Bytes", MIME_SLOTID_NONE, MIME_PRESENCE_BYTES, HTIF_NONE},
179   {"Cache-Control", MIME_SLOTID_CACHE_CONTROL, MIME_PRESENCE_CACHE_CONTROL, (HTIF_COMMAS | HTIF_MULTVALS)},
180   {"Client-ip", MIME_SLOTID_CLIENT_IP, MIME_PRESENCE_CLIENT_IP, HTIF_NONE},
181   {"Connection", MIME_SLOTID_CONNECTION, MIME_PRESENCE_CONNECTION, (HTIF_COMMAS | HTIF_MULTVALS | HTIF_HOPBYHOP)},
182   {"Content-Base", MIME_SLOTID_NONE, MIME_PRESENCE_NONE, HTIF_NONE},
183   {"Content-Encoding", MIME_SLOTID_CONTENT_ENCODING, MIME_PRESENCE_CONTENT_ENCODING, (HTIF_COMMAS | HTIF_MULTVALS)},
184   {"Content-Language", MIME_SLOTID_CONTENT_LANGUAGE, MIME_PRESENCE_CONTENT_LANGUAGE, (HTIF_COMMAS | HTIF_MULTVALS)},
185   {"Content-Length", MIME_SLOTID_CONTENT_LENGTH, MIME_PRESENCE_CONTENT_LENGTH, HTIF_NONE},
186   {"Content-Location", MIME_SLOTID_NONE, MIME_PRESENCE_CONTENT_LOCATION, HTIF_NONE},
187   {"Content-MD5", MIME_SLOTID_NONE, MIME_PRESENCE_CONTENT_MD5, HTIF_NONE},
188   {"Content-Range", MIME_SLOTID_NONE, MIME_PRESENCE_CONTENT_RANGE, HTIF_NONE},
189   {"Content-Type", MIME_SLOTID_CONTENT_TYPE, MIME_PRESENCE_CONTENT_TYPE, HTIF_NONE},
190   {"Control", MIME_SLOTID_NONE, MIME_PRESENCE_NONE, HTIF_NONE},
191   {"Cookie", MIME_SLOTID_COOKIE, MIME_PRESENCE_COOKIE, (HTIF_MULTVALS)},
192   {"Date", MIME_SLOTID_DATE, MIME_PRESENCE_DATE, HTIF_NONE},
193   {"Distribution", MIME_SLOTID_NONE, MIME_PRESENCE_NONE, HTIF_NONE},
194   {"Etag", MIME_SLOTID_NONE, MIME_PRESENCE_ETAG, HTIF_NONE},
195   {"Expires", MIME_SLOTID_EXPIRES, MIME_PRESENCE_EXPIRES, HTIF_NONE},
196   {"Followup-To", MIME_SLOTID_NONE, MIME_PRESENCE_NONE, HTIF_NONE},
197   {"From", MIME_SLOTID_NONE, MIME_PRESENCE_FROM, HTIF_NONE},
198   {"Host", MIME_SLOTID_NONE, MIME_PRESENCE_HOST, HTIF_NONE},
199   {"If-Match", MIME_SLOTID_IF_MATCH, MIME_PRESENCE_IF_MATCH, (HTIF_COMMAS | HTIF_MULTVALS)},
200   {"If-Modified-Since", MIME_SLOTID_IF_MODIFIED_SINCE, MIME_PRESENCE_IF_MODIFIED_SINCE, HTIF_NONE},
201   {"If-None-Match", MIME_SLOTID_IF_NONE_MATCH, MIME_PRESENCE_IF_NONE_MATCH, (HTIF_COMMAS | HTIF_MULTVALS)},
202   {"If-Range", MIME_SLOTID_IF_RANGE, MIME_PRESENCE_IF_RANGE, HTIF_NONE},
203   {"If-Unmodified-Since", MIME_SLOTID_IF_UNMODIFIED_SINCE, MIME_PRESENCE_IF_UNMODIFIED_SINCE, HTIF_NONE},
204   {"Keep-Alive", MIME_SLOTID_NONE, MIME_PRESENCE_KEEP_ALIVE, (HTIF_HOPBYHOP)},
205   {"Keywords", MIME_SLOTID_NONE, MIME_PRESENCE_KEYWORDS, HTIF_NONE},
206   {"Last-Modified", MIME_SLOTID_LAST_MODIFIED, MIME_PRESENCE_LAST_MODIFIED, HTIF_NONE},
207   {"Lines", MIME_SLOTID_NONE, MIME_PRESENCE_LINES, HTIF_NONE},
208   {"Location", MIME_SLOTID_NONE, MIME_PRESENCE_LOCATION, (HTIF_MULTVALS)},
209   {"Max-Forwards", MIME_SLOTID_NONE, MIME_PRESENCE_MAX_FORWARDS, HTIF_NONE},
210   {"Message-ID", MIME_SLOTID_NONE, MIME_PRESENCE_NONE, HTIF_NONE},
211   {"Newsgroups", MIME_SLOTID_NONE, MIME_PRESENCE_NONE, HTIF_NONE},
212   {"Organization", MIME_SLOTID_NONE, MIME_PRESENCE_NONE, HTIF_NONE},
213   {"Path", MIME_SLOTID_NONE, MIME_PRESENCE_PATH, HTIF_NONE},
214   {"Pragma", MIME_SLOTID_PRAGMA, MIME_PRESENCE_PRAGMA, (HTIF_COMMAS | HTIF_MULTVALS)},
215   {"Proxy-Authenticate", MIME_SLOTID_NONE, MIME_PRESENCE_PROXY_AUTHENTICATE, (HTIF_HOPBYHOP | HTIF_PROXYAUTH)},
216   {"Proxy-Authorization", MIME_SLOTID_NONE, MIME_PRESENCE_PROXY_AUTHORIZATION, (HTIF_HOPBYHOP | HTIF_PROXYAUTH)},
217   {"Proxy-Connection", MIME_SLOTID_PROXY_CONNECTION, MIME_PRESENCE_PROXY_CONNECTION, (HTIF_COMMAS | HTIF_MULTVALS | HTIF_HOPBYHOP)},
218   {"Public", MIME_SLOTID_NONE, MIME_PRESENCE_PUBLIC, (HTIF_COMMAS | HTIF_MULTVALS)},
219   {"Range", MIME_SLOTID_RANGE, MIME_PRESENCE_RANGE, (HTIF_COMMAS | HTIF_MULTVALS)},
220   {"References", MIME_SLOTID_NONE, MIME_PRESENCE_NONE, HTIF_NONE},
221   {"Referer", MIME_SLOTID_NONE, MIME_PRESENCE_REFERER, HTIF_NONE},
222   {"Reply-To", MIME_SLOTID_NONE, MIME_PRESENCE_NONE, HTIF_NONE},
223   {"Retry-After", MIME_SLOTID_NONE, MIME_PRESENCE_NONE, HTIF_NONE},
224   {"Sender", MIME_SLOTID_NONE, MIME_PRESENCE_NONE, HTIF_NONE},
225   {"Server", MIME_SLOTID_NONE, MIME_PRESENCE_SERVER, HTIF_NONE},
226   {"Set-Cookie", MIME_SLOTID_SET_COOKIE, MIME_PRESENCE_SET_COOKIE, (HTIF_MULTVALS)},
227   {"Strict-Transport-Security", MIME_SLOTID_NONE, MIME_PRESENCE_NONE, (HTIF_MULTVALS)},
228   {"Subject", MIME_SLOTID_NONE, MIME_PRESENCE_SUBJECT, HTIF_NONE},
229   {"Summary", MIME_SLOTID_NONE, MIME_PRESENCE_SUMMARY, HTIF_NONE},
230   {"TE", MIME_SLOTID_TE, MIME_PRESENCE_TE, (HTIF_COMMAS | HTIF_MULTVALS | HTIF_HOPBYHOP)},
231   {"Transfer-Encoding", MIME_SLOTID_TRANSFER_ENCODING, MIME_PRESENCE_TRANSFER_ENCODING,
232    (HTIF_COMMAS | HTIF_MULTVALS | HTIF_HOPBYHOP)},
233   {"Upgrade", MIME_SLOTID_NONE, MIME_PRESENCE_UPGRADE, (HTIF_COMMAS | HTIF_MULTVALS | HTIF_HOPBYHOP)},
234   {"User-Agent", MIME_SLOTID_USER_AGENT, MIME_PRESENCE_USER_AGENT, HTIF_NONE},
235   {"Vary", MIME_SLOTID_VARY, MIME_PRESENCE_VARY, (HTIF_COMMAS | HTIF_MULTVALS)},
236   {"Via", MIME_SLOTID_VIA, MIME_PRESENCE_VIA, (HTIF_COMMAS | HTIF_MULTVALS)},
237   {"Warning", MIME_SLOTID_NONE, MIME_PRESENCE_WARNING, (HTIF_COMMAS | HTIF_MULTVALS)},
238   {"Www-Authenticate", MIME_SLOTID_WWW_AUTHENTICATE, MIME_PRESENCE_WWW_AUTHENTICATE, HTIF_NONE},
239   {"Xref", MIME_SLOTID_NONE, MIME_PRESENCE_XREF, HTIF_NONE},
240   {"X-ID", MIME_SLOTID_NONE, MIME_PRESENCE_NONE, (HTIF_COMMAS | HTIF_MULTVALS | HTIF_HOPBYHOP)},
241   {"X-Forwarded-For", MIME_SLOTID_NONE, MIME_PRESENCE_NONE, (HTIF_COMMAS | HTIF_MULTVALS)},
242   {"Forwarded", MIME_SLOTID_NONE, MIME_PRESENCE_NONE, (HTIF_COMMAS | HTIF_MULTVALS)},
243   {"Sec-WebSocket-Key", MIME_SLOTID_NONE, MIME_PRESENCE_NONE, HTIF_NONE},
244   {"Sec-WebSocket-Version", MIME_SLOTID_NONE, MIME_PRESENCE_NONE, HTIF_NONE},
245   {nullptr, 0, 0, 0},
246 };
247 
248 const char *_hdrtoken_strs_heap_f = nullptr; // storage first byte
249 const char *_hdrtoken_strs_heap_l = nullptr; // storage last byte
250 
251 int hdrtoken_num_wks = SIZEOF(_hdrtoken_strs); // # of well-known strings
252 
253 const char *hdrtoken_strs[SIZEOF(_hdrtoken_strs)];             // wks_idx -> heap ptr
254 int hdrtoken_str_lengths[SIZEOF(_hdrtoken_strs)];              // wks_idx -> length
255 HdrTokenType hdrtoken_str_token_types[SIZEOF(_hdrtoken_strs)]; // wks_idx -> token type
256 int32_t hdrtoken_str_slotids[SIZEOF(_hdrtoken_strs)];          // wks_idx -> slot id
257 uint64_t hdrtoken_str_masks[SIZEOF(_hdrtoken_strs)];           // wks_idx -> presence mask
258 uint32_t hdrtoken_str_flags[SIZEOF(_hdrtoken_strs)];           // wks_idx -> flags
259 
260 DFA *hdrtoken_strs_dfa = nullptr;
261 
262 /***********************************************************************
263  *                                                                     *
264  *                        H A S H    T A B L E                         *
265  *                                                                     *
266  ***********************************************************************/
267 
268 #define HDRTOKEN_HASH_TABLE_SIZE 65536
269 
270 struct HdrTokenHashBucket {
271   const char *wks;
272   uint32_t hash;
273 };
274 
275 HdrTokenHashBucket hdrtoken_hash_table[HDRTOKEN_HASH_TABLE_SIZE];
276 
277 /**
278   basic FNV hash
279 **/
280 #define TINY_MASK(x) (((uint32_t)1 << (x)) - 1)
281 
282 inline uint32_t
hash_to_slot(uint32_t hash)283 hash_to_slot(uint32_t hash)
284 {
285   return ((hash >> 15) ^ hash) & TINY_MASK(15);
286 }
287 
288 inline uint32_t
hdrtoken_hash(const unsigned char * string,unsigned int length)289 hdrtoken_hash(const unsigned char *string, unsigned int length)
290 {
291   ATSHash32FNV1a fnv;
292   fnv.update(string, length, ATSHash::nocase());
293   fnv.final();
294   return fnv.get();
295 }
296 
297 /*-------------------------------------------------------------------------
298   -------------------------------------------------------------------------*/
299 
300 // WARNING:  Indexes into this array are stored on disk for cached objects.  New strings must be added at the end of the array to
301 // avoid changing the indexes of pre-existing entries, unless the cache format version number is increased.
302 //
303 static const char *_hdrtoken_commonly_tokenized_strs[] = {
304   // MIME Field names
305   "Accept-Charset", "Accept-Encoding", "Accept-Language", "Accept-Ranges", "Accept", "Age", "Allow",
306   "Approved", // NNTP
307   "Authorization",
308   "Bytes", // NNTP
309   "Cache-Control", "Client-ip", "Connection", "Content-Base", "Content-Encoding", "Content-Language", "Content-Length",
310   "Content-Location", "Content-MD5", "Content-Range", "Content-Type",
311   "Control", // NNTP
312   "Cookie", "Date",
313   "Distribution", // NNTP
314   "Etag", "Expect", "Expires",
315   "Followup-To", // NNTP
316   "From", "Host", "If-Match", "If-Modified-Since", "If-None-Match", "If-Range", "If-Unmodified-Since", "Keep-Alive",
317   "Keywords", // NNTP
318   "Last-Modified",
319   "Lines", // NNTP
320   "Location", "Max-Forwards",
321   "Message-ID", // NNTP
322   "MIME-Version",
323   "Newsgroups",   // NNTP
324   "Organization", // NNTP
325   "Path",         // NNTP
326   "Pragma", "Proxy-Authenticate", "Proxy-Authorization", "Proxy-Connection", "Public", "Range",
327   "References", // NNTP
328   "Referer",
329   "Reply-To", // NNTP
330   "Retry-After",
331   "Sender", // NNTP
332   "Server", "Set-Cookie",
333   "Subject", // NNTP
334   "Summary", // NNTP
335   "Transfer-Encoding", "Upgrade", "User-Agent", "Vary", "Via", "Warning", "Www-Authenticate",
336   "Xref",          // NNTP
337   "@Ats-Internal", // Internal Hack
338 
339   // Accept-Encoding
340   "compress", "deflate", "gzip", "identity",
341 
342   // Cache-Control flags
343   "max-age", "max-stale", "min-fresh", "must-revalidate", "no-cache", "no-store", "no-transform", "only-if-cached", "private",
344   "proxy-revalidate", "s-maxage", "need-revalidate-once",
345 
346   // HTTP miscellaneous
347   "none", "chunked", "close",
348 
349   // WS
350   "websocket", "Sec-WebSocket-Key", "Sec-WebSocket-Version",
351 
352   // HTTP/2 cleartext
353   MIME_UPGRADE_H2C_TOKEN, "HTTP2-Settings",
354 
355   // URL schemes
356   "file", "ftp", "gopher", "https", "http", "mailto", "news", "nntp", "prospero", "telnet", "tunnel", "wais", "pnm", "rtspu",
357   "rtsp", "mmsu", "mmst", "mms", "wss", "ws",
358 
359   // HTTP methods
360   "CONNECT", "DELETE", "GET", "POST", "HEAD", "OPTIONS", "PURGE", "PUT", "TRACE", "PUSH",
361 
362   // Header extensions
363   "X-ID", "X-Forwarded-For", "TE", "Strict-Transport-Security", "100-continue",
364 
365   // RFC-2739
366   "Forwarded",
367 
368   // RFC-8470
369   "Early-Data"};
370 
371 /*-------------------------------------------------------------------------
372   -------------------------------------------------------------------------*/
373 
374 void
hdrtoken_hash_init()375 hdrtoken_hash_init()
376 {
377   uint32_t i;
378   int num_collisions;
379 
380   memset(hdrtoken_hash_table, 0, sizeof(hdrtoken_hash_table));
381   num_collisions = 0;
382 
383   for (i = 0; i < static_cast<int> SIZEOF(_hdrtoken_commonly_tokenized_strs); i++) {
384     // convert the common string to the well-known token
385     unsigned const char *wks;
386     int wks_idx =
387       hdrtoken_tokenize_dfa(_hdrtoken_commonly_tokenized_strs[i], static_cast<int>(strlen(_hdrtoken_commonly_tokenized_strs[i])),
388                             reinterpret_cast<const char **>(&wks));
389     ink_release_assert(wks_idx >= 0);
390 
391     uint32_t hash = hdrtoken_hash(wks, hdrtoken_str_lengths[wks_idx]);
392     uint32_t slot = hash_to_slot(hash);
393 
394     if (hdrtoken_hash_table[slot].wks) {
395       printf("ERROR: hdrtoken_hash_table[%u] collision: '%s' replacing '%s'\n", slot, reinterpret_cast<const char *>(wks),
396              hdrtoken_hash_table[slot].wks);
397       ++num_collisions;
398     }
399     hdrtoken_hash_table[slot].wks  = reinterpret_cast<const char *>(wks);
400     hdrtoken_hash_table[slot].hash = hash;
401   }
402 
403   if (num_collisions > 0) {
404     abort();
405   }
406 }
407 
408 /***********************************************************************
409  *                                                                     *
410  *                 M A I N    H D R T O K E N    C O D E               *
411  *                                                                     *
412  ***********************************************************************/
413 
414 /**
415   @return returns 0 for n=0, unit*n for n <= unit
416 */
417 
418 static inline unsigned int
snap_up_to_multiple(unsigned int n,unsigned int unit)419 snap_up_to_multiple(unsigned int n, unsigned int unit)
420 {
421   return ((n + (unit - 1)) / unit) * unit;
422 }
423 
424 /**
425  */
426 void
hdrtoken_init()427 hdrtoken_init()
428 {
429   static int inited = 0;
430 
431   int i;
432 
433   if (!inited) {
434     inited = 1;
435 
436     hdrtoken_strs_dfa = new DFA;
437     hdrtoken_strs_dfa->compile(_hdrtoken_strs, SIZEOF(_hdrtoken_strs), (RE_CASE_INSENSITIVE));
438 
439     // all the tokenized hdrtoken strings are placed in a special heap,
440     // and each string is prepended with a HdrTokenHeapPrefix ---
441     // this makes it easy to tell that a string is a tokenized
442     // string (because its address is within the heap), and
443     // makes it easy to find the length, index, flags, mask, and
444     // other info from the prefix.
445 
446     int heap_size = 0;
447     for (i = 0; i < static_cast<int> SIZEOF(_hdrtoken_strs); i++) {
448       hdrtoken_str_lengths[i]   = static_cast<int>(strlen(_hdrtoken_strs[i]));
449       int sstr_len              = snap_up_to_multiple(hdrtoken_str_lengths[i] + 1, sizeof(HdrTokenHeapPrefix));
450       int packed_prefix_str_len = sizeof(HdrTokenHeapPrefix) + sstr_len;
451       heap_size += packed_prefix_str_len;
452     }
453 
454     _hdrtoken_strs_heap_f = static_cast<const char *>(ats_malloc(heap_size));
455     _hdrtoken_strs_heap_l = _hdrtoken_strs_heap_f + heap_size - 1;
456 
457     char *heap_ptr = const_cast<char *>(_hdrtoken_strs_heap_f);
458 
459     for (i = 0; i < static_cast<int> SIZEOF(_hdrtoken_strs); i++) {
460       HdrTokenHeapPrefix prefix;
461 
462       memset(&prefix, 0, sizeof(HdrTokenHeapPrefix));
463 
464       prefix.wks_idx         = i;
465       prefix.wks_length      = hdrtoken_str_lengths[i];
466       prefix.wks_token_type  = HDRTOKEN_TYPE_OTHER; // default, can override later
467       prefix.wks_info.name   = nullptr;             // default, can override later
468       prefix.wks_info.slotid = MIME_SLOTID_NONE;    // default, can override later
469       prefix.wks_info.mask   = TOK_64_CONST(0);     // default, can override later
470       prefix.wks_info.flags  = HTIF_MULTVALS;       // default, can override later
471 
472       int sstr_len = snap_up_to_multiple(hdrtoken_str_lengths[i] + 1, sizeof(HdrTokenHeapPrefix));
473 
474       *reinterpret_cast<HdrTokenHeapPrefix *>(heap_ptr) = prefix; // set string prefix
475       heap_ptr += sizeof(HdrTokenHeapPrefix);                     // advance heap ptr past index
476       hdrtoken_strs[i] = heap_ptr;                                // record string pointer
477       // coverity[secure_coding]
478       ink_strlcpy(const_cast<char *>(hdrtoken_strs[i]), _hdrtoken_strs[i],
479                   heap_size - sizeof(HdrTokenHeapPrefix)); // copy string into heap
480       heap_ptr += sstr_len;                                // advance heap ptr past string
481       heap_size -= sstr_len;
482     }
483 
484     // Set the token types for certain tokens
485     for (i = 0; _hdrtoken_strs_type_initializers[i].name != nullptr; i++) {
486       int wks_idx;
487       HdrTokenHeapPrefix *prefix;
488 
489       wks_idx = hdrtoken_tokenize_dfa(_hdrtoken_strs_type_initializers[i].name,
490                                       static_cast<int>(strlen(_hdrtoken_strs_type_initializers[i].name)));
491 
492       ink_assert((wks_idx >= 0) && (wks_idx < (int)SIZEOF(hdrtoken_strs)));
493       // coverity[negative_returns]
494       prefix                 = hdrtoken_index_to_prefix(wks_idx);
495       prefix->wks_token_type = _hdrtoken_strs_type_initializers[i].type;
496     }
497 
498     // Set special data for field names
499     for (i = 0; _hdrtoken_strs_field_initializers[i].name != nullptr; i++) {
500       int wks_idx;
501       HdrTokenHeapPrefix *prefix;
502 
503       wks_idx = hdrtoken_tokenize_dfa(_hdrtoken_strs_field_initializers[i].name,
504                                       static_cast<int>(strlen(_hdrtoken_strs_field_initializers[i].name)));
505 
506       ink_assert((wks_idx >= 0) && (wks_idx < (int)SIZEOF(hdrtoken_strs)));
507       prefix                  = hdrtoken_index_to_prefix(wks_idx);
508       prefix->wks_info.slotid = _hdrtoken_strs_field_initializers[i].slotid;
509       prefix->wks_info.flags  = _hdrtoken_strs_field_initializers[i].flags;
510       prefix->wks_info.mask   = _hdrtoken_strs_field_initializers[i].mask;
511     }
512 
513     for (i = 0; i < static_cast<int> SIZEOF(_hdrtoken_strs); i++) {
514       HdrTokenHeapPrefix *prefix  = hdrtoken_index_to_prefix(i);
515       prefix->wks_info.name       = hdrtoken_strs[i];
516       hdrtoken_str_token_types[i] = prefix->wks_token_type;  // parallel array for speed
517       hdrtoken_str_slotids[i]     = prefix->wks_info.slotid; // parallel array for speed
518       hdrtoken_str_masks[i]       = prefix->wks_info.mask;   // parallel array for speed
519       hdrtoken_str_flags[i]       = prefix->wks_info.flags;  // parallel array for speed
520     }
521 
522     hdrtoken_hash_init();
523   }
524 }
525 
526 /*-------------------------------------------------------------------------
527   -------------------------------------------------------------------------*/
528 
529 int
hdrtoken_tokenize_dfa(const char * string,int string_len,const char ** wks_string_out)530 hdrtoken_tokenize_dfa(const char *string, int string_len, const char **wks_string_out)
531 {
532   int wks_idx;
533 
534   wks_idx = hdrtoken_strs_dfa->match({string, size_t(string_len)});
535 
536   if (wks_idx < 0) {
537     wks_idx = -1;
538   }
539   if (wks_string_out) {
540     if (wks_idx >= 0) {
541       *wks_string_out = hdrtoken_index_to_wks(wks_idx);
542     } else {
543       *wks_string_out = nullptr;
544     }
545   }
546   // printf("hdrtoken_tokenize_dfa(%d,*s) - return %d\n",string_len,string,wks_idx);
547 
548   return wks_idx;
549 }
550 
551 /*-------------------------------------------------------------------------
552   Have to work around that methods are case insensitive while the DFA is
553   case insensitive.
554   -------------------------------------------------------------------------*/
555 
556 int
hdrtoken_method_tokenize(const char * string,int string_len)557 hdrtoken_method_tokenize(const char *string, int string_len)
558 {
559   const char *string_out;
560   int retval = -1;
561   if (hdrtoken_is_wks(string)) {
562     retval = hdrtoken_wks_to_index(string);
563     return retval;
564   }
565   retval = hdrtoken_tokenize(string, string_len, &string_out);
566   if (retval >= 0) {
567     if (strncmp(string, string_out, string_len) != 0) {
568       // Not a case match
569       retval = -1;
570     }
571   }
572   return retval;
573 }
574 
575 /*-------------------------------------------------------------------------
576   -------------------------------------------------------------------------*/
577 
578 int
hdrtoken_tokenize(const char * string,int string_len,const char ** wks_string_out)579 hdrtoken_tokenize(const char *string, int string_len, const char **wks_string_out)
580 {
581   int wks_idx;
582   HdrTokenHashBucket *bucket;
583 
584   ink_assert(string != nullptr);
585 
586   if (hdrtoken_is_wks(string)) {
587     wks_idx = hdrtoken_wks_to_index(string);
588     if (wks_string_out) {
589       *wks_string_out = string;
590     }
591     return wks_idx;
592   }
593 
594   uint32_t hash = hdrtoken_hash(reinterpret_cast<const unsigned char *>(string), static_cast<unsigned int>(string_len));
595   uint32_t slot = hash_to_slot(hash);
596 
597   bucket = &(hdrtoken_hash_table[slot]);
598   if ((bucket->wks != nullptr) && (bucket->hash == hash) && (hdrtoken_wks_to_length(bucket->wks) == string_len)) {
599     wks_idx = hdrtoken_wks_to_index(bucket->wks);
600     if (wks_string_out) {
601       *wks_string_out = bucket->wks;
602     }
603     return wks_idx;
604   }
605 
606   Debug("hdr_token", "Did not find a WKS for '%.*s'", string_len, string);
607   return -1;
608 }
609 
610 /*-------------------------------------------------------------------------
611   -------------------------------------------------------------------------*/
612 
613 const char *
hdrtoken_string_to_wks(const char * string)614 hdrtoken_string_to_wks(const char *string)
615 {
616   const char *wks = nullptr;
617   hdrtoken_tokenize(string, static_cast<int>(strlen(string)), &wks);
618   return wks;
619 }
620 
621 /*-------------------------------------------------------------------------
622   -------------------------------------------------------------------------*/
623 
624 const char *
hdrtoken_string_to_wks(const char * string,int length)625 hdrtoken_string_to_wks(const char *string, int length)
626 {
627   const char *wks = nullptr;
628   hdrtoken_tokenize(string, length, &wks);
629   return wks;
630 }
631