1 /** @file
2 
3   A brief file description
4 
5   @section license License
6 
7   Licensed to the Apache Software Foundation (ASF) under one
8   or more contributor license agreements.  See the NOTICE file
9   distributed with this work for additional information
10   regarding copyright ownership.  The ASF licenses this file
11   to you under the Apache License, Version 2.0 (the
12   "License"); you may not use this file except in compliance
13   with the License.  You may obtain a copy of the License at
14 
15       http://www.apache.org/licenses/LICENSE-2.0
16 
17   Unless required by applicable law or agreed to in writing, software
18   distributed under the License is distributed on an "AS IS" BASIS,
19   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
20   See the License for the specific language governing permissions and
21   limitations under the License.
22  */
23 
24 #include <cassert>
25 #include <new>
26 #include "tscore/ink_platform.h"
27 #include "tscore/ink_memory.h"
28 #include "tscore/TsBuffer.h"
29 #include "URL.h"
30 #include "MIME.h"
31 #include "HTTP.h"
32 #include "tscore/Diags.h"
33 
34 const char *URL_SCHEME_FILE;
35 const char *URL_SCHEME_FTP;
36 const char *URL_SCHEME_GOPHER;
37 const char *URL_SCHEME_HTTP;
38 const char *URL_SCHEME_HTTPS;
39 const char *URL_SCHEME_WSS;
40 const char *URL_SCHEME_WS;
41 const char *URL_SCHEME_MAILTO;
42 const char *URL_SCHEME_NEWS;
43 const char *URL_SCHEME_NNTP;
44 const char *URL_SCHEME_PROSPERO;
45 const char *URL_SCHEME_TELNET;
46 const char *URL_SCHEME_TUNNEL;
47 const char *URL_SCHEME_WAIS;
48 const char *URL_SCHEME_PNM;
49 const char *URL_SCHEME_RTSP;
50 const char *URL_SCHEME_RTSPU;
51 const char *URL_SCHEME_MMS;
52 const char *URL_SCHEME_MMSU;
53 const char *URL_SCHEME_MMST;
54 
55 int URL_WKSIDX_FILE;
56 int URL_WKSIDX_FTP;
57 int URL_WKSIDX_GOPHER;
58 int URL_WKSIDX_HTTP;
59 int URL_WKSIDX_HTTPS;
60 int URL_WKSIDX_WS;
61 int URL_WKSIDX_WSS;
62 int URL_WKSIDX_MAILTO;
63 int URL_WKSIDX_NEWS;
64 int URL_WKSIDX_NNTP;
65 int URL_WKSIDX_PROSPERO;
66 int URL_WKSIDX_TELNET;
67 int URL_WKSIDX_TUNNEL;
68 int URL_WKSIDX_WAIS;
69 int URL_WKSIDX_PNM;
70 int URL_WKSIDX_RTSP;
71 int URL_WKSIDX_RTSPU;
72 int URL_WKSIDX_MMS;
73 int URL_WKSIDX_MMSU;
74 int URL_WKSIDX_MMST;
75 
76 int URL_LEN_FILE;
77 int URL_LEN_FTP;
78 int URL_LEN_GOPHER;
79 int URL_LEN_HTTP;
80 int URL_LEN_HTTPS;
81 int URL_LEN_WS;
82 int URL_LEN_WSS;
83 int URL_LEN_MAILTO;
84 int URL_LEN_NEWS;
85 int URL_LEN_NNTP;
86 int URL_LEN_PROSPERO;
87 int URL_LEN_TELNET;
88 int URL_LEN_TUNNEL;
89 int URL_LEN_WAIS;
90 int URL_LEN_PNM;
91 int URL_LEN_RTSP;
92 int URL_LEN_RTSPU;
93 int URL_LEN_MMS;
94 int URL_LEN_MMSU;
95 int URL_LEN_MMST;
96 
97 // Whether we should implement url_CryptoHash_get() using url_CryptoHash_get_fast(). Note that
98 // url_CryptoHash_get_fast() does NOT produce the same result as url_CryptoHash_get_general().
99 static int url_hash_method = 0;
100 
101 // test to see if a character is a valid character for a host in a URI according to
102 // RFC 3986 and RFC 1034
103 inline static int
is_host_char(char c)104 is_host_char(char c)
105 {
106   return (ParseRules::is_alnum(c) || (c == '-') || (c == '.') || (c == '[') || (c == ']') || (c == '_') || (c == ':') ||
107           (c == '~') || (c == '%'));
108 }
109 
110 // Checks if `addr` is a valid FQDN string
111 bool
validate_host_name(std::string_view addr)112 validate_host_name(std::string_view addr)
113 {
114   return std::all_of(addr.begin(), addr.end(), &is_host_char);
115 }
116 
117 /*-------------------------------------------------------------------------
118   -------------------------------------------------------------------------*/
119 
120 void
url_init()121 url_init()
122 {
123   static int init = 1;
124 
125   if (init) {
126     init = 0;
127 
128     hdrtoken_init();
129 
130     URL_SCHEME_FILE     = hdrtoken_string_to_wks("file");
131     URL_SCHEME_FTP      = hdrtoken_string_to_wks("ftp");
132     URL_SCHEME_GOPHER   = hdrtoken_string_to_wks("gopher");
133     URL_SCHEME_HTTP     = hdrtoken_string_to_wks("http");
134     URL_SCHEME_HTTPS    = hdrtoken_string_to_wks("https");
135     URL_SCHEME_WSS      = hdrtoken_string_to_wks("wss");
136     URL_SCHEME_WS       = hdrtoken_string_to_wks("ws");
137     URL_SCHEME_MAILTO   = hdrtoken_string_to_wks("mailto");
138     URL_SCHEME_NEWS     = hdrtoken_string_to_wks("news");
139     URL_SCHEME_NNTP     = hdrtoken_string_to_wks("nntp");
140     URL_SCHEME_PROSPERO = hdrtoken_string_to_wks("prospero");
141     URL_SCHEME_TELNET   = hdrtoken_string_to_wks("telnet");
142     URL_SCHEME_TUNNEL   = hdrtoken_string_to_wks("tunnel");
143     URL_SCHEME_WAIS     = hdrtoken_string_to_wks("wais");
144     URL_SCHEME_PNM      = hdrtoken_string_to_wks("pnm");
145     URL_SCHEME_RTSP     = hdrtoken_string_to_wks("rtsp");
146     URL_SCHEME_RTSPU    = hdrtoken_string_to_wks("rtspu");
147     URL_SCHEME_MMS      = hdrtoken_string_to_wks("mms");
148     URL_SCHEME_MMSU     = hdrtoken_string_to_wks("mmsu");
149     URL_SCHEME_MMST     = hdrtoken_string_to_wks("mmst");
150 
151     ink_assert(URL_SCHEME_FILE && URL_SCHEME_FTP && URL_SCHEME_GOPHER && URL_SCHEME_HTTP && URL_SCHEME_HTTPS && URL_SCHEME_WS &&
152                URL_SCHEME_WSS && URL_SCHEME_MAILTO && URL_SCHEME_NEWS && URL_SCHEME_NNTP && URL_SCHEME_PROSPERO &&
153                URL_SCHEME_TELNET && URL_SCHEME_TUNNEL && URL_SCHEME_WAIS && URL_SCHEME_PNM && URL_SCHEME_RTSP && URL_SCHEME_RTSPU &&
154                URL_SCHEME_MMS && URL_SCHEME_MMSU && URL_SCHEME_MMST);
155 
156     URL_WKSIDX_FILE     = hdrtoken_wks_to_index(URL_SCHEME_FILE);
157     URL_WKSIDX_FTP      = hdrtoken_wks_to_index(URL_SCHEME_FTP);
158     URL_WKSIDX_GOPHER   = hdrtoken_wks_to_index(URL_SCHEME_GOPHER);
159     URL_WKSIDX_HTTP     = hdrtoken_wks_to_index(URL_SCHEME_HTTP);
160     URL_WKSIDX_HTTPS    = hdrtoken_wks_to_index(URL_SCHEME_HTTPS);
161     URL_WKSIDX_WS       = hdrtoken_wks_to_index(URL_SCHEME_WS);
162     URL_WKSIDX_WSS      = hdrtoken_wks_to_index(URL_SCHEME_WSS);
163     URL_WKSIDX_MAILTO   = hdrtoken_wks_to_index(URL_SCHEME_MAILTO);
164     URL_WKSIDX_NEWS     = hdrtoken_wks_to_index(URL_SCHEME_NEWS);
165     URL_WKSIDX_NNTP     = hdrtoken_wks_to_index(URL_SCHEME_NNTP);
166     URL_WKSIDX_PROSPERO = hdrtoken_wks_to_index(URL_SCHEME_PROSPERO);
167     URL_WKSIDX_TELNET   = hdrtoken_wks_to_index(URL_SCHEME_TELNET);
168     URL_WKSIDX_TUNNEL   = hdrtoken_wks_to_index(URL_SCHEME_TUNNEL);
169     URL_WKSIDX_WAIS     = hdrtoken_wks_to_index(URL_SCHEME_WAIS);
170     URL_WKSIDX_PNM      = hdrtoken_wks_to_index(URL_SCHEME_PNM);
171     URL_WKSIDX_RTSP     = hdrtoken_wks_to_index(URL_SCHEME_RTSP);
172     URL_WKSIDX_RTSPU    = hdrtoken_wks_to_index(URL_SCHEME_RTSPU);
173     URL_WKSIDX_MMS      = hdrtoken_wks_to_index(URL_SCHEME_MMS);
174     URL_WKSIDX_MMSU     = hdrtoken_wks_to_index(URL_SCHEME_MMSU);
175     URL_WKSIDX_MMST     = hdrtoken_wks_to_index(URL_SCHEME_MMST);
176 
177     URL_LEN_FILE     = hdrtoken_wks_to_length(URL_SCHEME_FILE);
178     URL_LEN_FTP      = hdrtoken_wks_to_length(URL_SCHEME_FTP);
179     URL_LEN_GOPHER   = hdrtoken_wks_to_length(URL_SCHEME_GOPHER);
180     URL_LEN_HTTP     = hdrtoken_wks_to_length(URL_SCHEME_HTTP);
181     URL_LEN_HTTPS    = hdrtoken_wks_to_length(URL_SCHEME_HTTPS);
182     URL_LEN_WS       = hdrtoken_wks_to_length(URL_SCHEME_WS);
183     URL_LEN_WSS      = hdrtoken_wks_to_length(URL_SCHEME_WSS);
184     URL_LEN_MAILTO   = hdrtoken_wks_to_length(URL_SCHEME_MAILTO);
185     URL_LEN_NEWS     = hdrtoken_wks_to_length(URL_SCHEME_NEWS);
186     URL_LEN_NNTP     = hdrtoken_wks_to_length(URL_SCHEME_NNTP);
187     URL_LEN_PROSPERO = hdrtoken_wks_to_length(URL_SCHEME_PROSPERO);
188     URL_LEN_TELNET   = hdrtoken_wks_to_length(URL_SCHEME_TELNET);
189     URL_LEN_TUNNEL   = hdrtoken_wks_to_length(URL_SCHEME_TUNNEL);
190     URL_LEN_WAIS     = hdrtoken_wks_to_length(URL_SCHEME_WAIS);
191     URL_LEN_PNM      = hdrtoken_wks_to_length(URL_SCHEME_PNM);
192     URL_LEN_RTSP     = hdrtoken_wks_to_length(URL_SCHEME_RTSP);
193     URL_LEN_RTSPU    = hdrtoken_wks_to_length(URL_SCHEME_RTSPU);
194     URL_LEN_MMS      = hdrtoken_wks_to_length(URL_SCHEME_MMS);
195     URL_LEN_MMSU     = hdrtoken_wks_to_length(URL_SCHEME_MMSU);
196     URL_LEN_MMST     = hdrtoken_wks_to_length(URL_SCHEME_MMST);
197   }
198 }
199 
200 /*-------------------------------------------------------------------------
201   -------------------------------------------------------------------------*/
202 
203 /***********************************************************************
204  *                                                                     *
205  *             U R L    C R E A T I O N    A N D    C O P Y            *
206  *                                                                     *
207  ***********************************************************************/
208 
209 URLImpl *
url_create(HdrHeap * heap)210 url_create(HdrHeap *heap)
211 {
212   URLImpl *url;
213 
214   url = (URLImpl *)heap->allocate_obj(sizeof(URLImpl), HDR_HEAP_OBJ_URL);
215   obj_clear_data((HdrHeapObjImpl *)url);
216   url->m_url_type       = URL_TYPE_NONE;
217   url->m_scheme_wks_idx = -1;
218   url_clear_string_ref(url);
219   return url;
220 }
221 
222 /*-------------------------------------------------------------------------
223   -------------------------------------------------------------------------*/
224 
225 void
url_clear(URLImpl * url_impl)226 url_clear(URLImpl *url_impl)
227 {
228   obj_clear_data((HdrHeapObjImpl *)url_impl);
229   url_impl->m_url_type       = URL_TYPE_NONE;
230   url_impl->m_scheme_wks_idx = -1;
231 }
232 
233 /*-------------------------------------------------------------------------
234   -------------------------------------------------------------------------*/
235 
236 URLImpl *
url_copy(URLImpl * s_url,HdrHeap * s_heap,HdrHeap * d_heap,bool inherit_strs)237 url_copy(URLImpl *s_url, HdrHeap *s_heap, HdrHeap *d_heap, bool inherit_strs)
238 {
239   URLImpl *d_url = url_create(d_heap);
240   url_copy_onto(s_url, s_heap, d_url, d_heap, inherit_strs);
241   return d_url;
242 }
243 
244 /*-------------------------------------------------------------------------
245   -------------------------------------------------------------------------*/
246 
247 void
url_copy_onto(URLImpl * s_url,HdrHeap * s_heap,URLImpl * d_url,HdrHeap * d_heap,bool inherit_strs)248 url_copy_onto(URLImpl *s_url, HdrHeap *s_heap, URLImpl *d_url, HdrHeap *d_heap, bool inherit_strs)
249 {
250   if (s_url != d_url) {
251     obj_copy_data((HdrHeapObjImpl *)s_url, (HdrHeapObjImpl *)d_url);
252     if (inherit_strs && (s_heap != d_heap)) {
253       d_heap->inherit_string_heaps(s_heap);
254     }
255   }
256 }
257 
258 /*-------------------------------------------------------------------------
259   -------------------------------------------------------------------------*/
260 
261 void
url_nuke_proxy_stuff(URLImpl * d_url)262 url_nuke_proxy_stuff(URLImpl *d_url)
263 {
264   d_url->m_len_scheme   = 0;
265   d_url->m_len_user     = 0;
266   d_url->m_len_password = 0;
267   d_url->m_len_host     = 0;
268   d_url->m_len_port     = 0;
269 
270   d_url->m_ptr_scheme   = nullptr;
271   d_url->m_ptr_user     = nullptr;
272   d_url->m_ptr_password = nullptr;
273   d_url->m_ptr_host     = nullptr;
274   d_url->m_ptr_port     = nullptr;
275 
276   d_url->m_scheme_wks_idx = -1;
277   d_url->m_port           = 0;
278 }
279 
280 /*-------------------------------------------------------------------------
281   -------------------------------------------------------------------------*/
282 
283 /**
284   This routine is like url_copy_onto, but clears the
285   scheme/host/user/pass/port components, resulting in a server-style URL.
286 
287 */
288 void
url_copy_onto_as_server_url(URLImpl * s_url,HdrHeap * s_heap,URLImpl * d_url,HdrHeap * d_heap,bool inherit_strs)289 url_copy_onto_as_server_url(URLImpl *s_url, HdrHeap *s_heap, URLImpl *d_url, HdrHeap *d_heap, bool inherit_strs)
290 {
291   url_nuke_proxy_stuff(d_url);
292 
293   d_url->m_ptr_path      = s_url->m_ptr_path;
294   d_url->m_path_is_empty = s_url->m_path_is_empty;
295   d_url->m_ptr_params    = s_url->m_ptr_params;
296   d_url->m_ptr_query     = s_url->m_ptr_query;
297   d_url->m_ptr_fragment  = s_url->m_ptr_fragment;
298   url_clear_string_ref(d_url);
299 
300   d_url->m_len_path     = s_url->m_len_path;
301   d_url->m_len_params   = s_url->m_len_params;
302   d_url->m_len_query    = s_url->m_len_query;
303   d_url->m_len_fragment = s_url->m_len_fragment;
304 
305   d_url->m_url_type  = s_url->m_url_type;
306   d_url->m_type_code = s_url->m_type_code;
307 
308   if (inherit_strs && (s_heap != d_heap)) {
309     d_heap->inherit_string_heaps(s_heap);
310   }
311 }
312 
313 /*-------------------------------------------------------------------------
314   -------------------------------------------------------------------------*/
315 
316 /***********************************************************************
317  *                                                                     *
318  *                        M A R S H A L I N G                          *
319  *                                                                     *
320  ***********************************************************************/
321 int
marshal(MarshalXlate * str_xlate,int num_xlate)322 URLImpl::marshal(MarshalXlate *str_xlate, int num_xlate)
323 {
324   HDR_MARSHAL_STR(m_ptr_scheme, str_xlate, num_xlate);
325   HDR_MARSHAL_STR(m_ptr_user, str_xlate, num_xlate);
326   HDR_MARSHAL_STR(m_ptr_password, str_xlate, num_xlate);
327   HDR_MARSHAL_STR(m_ptr_host, str_xlate, num_xlate);
328   HDR_MARSHAL_STR(m_ptr_port, str_xlate, num_xlate);
329   HDR_MARSHAL_STR(m_ptr_path, str_xlate, num_xlate);
330   HDR_MARSHAL_STR(m_ptr_params, str_xlate, num_xlate);
331   HDR_MARSHAL_STR(m_ptr_query, str_xlate, num_xlate);
332   HDR_MARSHAL_STR(m_ptr_fragment, str_xlate, num_xlate);
333   //    HDR_MARSHAL_STR(m_ptr_printed_string, str_xlate, num_xlate);
334   return 0;
335 }
336 
337 void
unmarshal(intptr_t offset)338 URLImpl::unmarshal(intptr_t offset)
339 {
340   HDR_UNMARSHAL_STR(m_ptr_scheme, offset);
341   HDR_UNMARSHAL_STR(m_ptr_user, offset);
342   HDR_UNMARSHAL_STR(m_ptr_password, offset);
343   HDR_UNMARSHAL_STR(m_ptr_host, offset);
344   HDR_UNMARSHAL_STR(m_ptr_port, offset);
345   HDR_UNMARSHAL_STR(m_ptr_path, offset);
346   HDR_UNMARSHAL_STR(m_ptr_params, offset);
347   HDR_UNMARSHAL_STR(m_ptr_query, offset);
348   HDR_UNMARSHAL_STR(m_ptr_fragment, offset);
349   //    HDR_UNMARSHAL_STR(m_ptr_printed_string, offset);
350 }
351 
352 void
rehome_strings(HdrHeap * new_heap)353 URLImpl::rehome_strings(HdrHeap *new_heap)
354 {
355   m_ptr_scheme         = new_heap->localize({m_ptr_scheme, m_len_scheme}).data();
356   m_ptr_user           = new_heap->localize({m_ptr_user, m_len_user}).data();
357   m_ptr_password       = new_heap->localize({m_ptr_password, m_len_password}).data();
358   m_ptr_host           = new_heap->localize({m_ptr_host, m_len_host}).data();
359   m_ptr_port           = new_heap->localize({m_ptr_port, m_len_port}).data();
360   m_ptr_path           = new_heap->localize({m_ptr_path, m_len_path}).data();
361   m_ptr_params         = new_heap->localize({m_ptr_params, m_len_params}).data();
362   m_ptr_query          = new_heap->localize({m_ptr_query, m_len_query}).data();
363   m_ptr_fragment       = new_heap->localize({m_ptr_fragment, m_len_fragment}).data();
364   m_ptr_printed_string = new_heap->localize({m_ptr_printed_string, m_len_printed_string}).data();
365 }
366 
367 void
move_strings(HdrStrHeap * new_heap)368 URLImpl::move_strings(HdrStrHeap *new_heap)
369 {
370   HDR_MOVE_STR(m_ptr_scheme, m_len_scheme);
371   HDR_MOVE_STR(m_ptr_user, m_len_user);
372   HDR_MOVE_STR(m_ptr_password, m_len_password);
373   HDR_MOVE_STR(m_ptr_host, m_len_host);
374   HDR_MOVE_STR(m_ptr_port, m_len_port);
375   HDR_MOVE_STR(m_ptr_path, m_len_path);
376   HDR_MOVE_STR(m_ptr_params, m_len_params);
377   HDR_MOVE_STR(m_ptr_query, m_len_query);
378   HDR_MOVE_STR(m_ptr_fragment, m_len_fragment);
379   HDR_MOVE_STR(m_ptr_printed_string, m_len_printed_string);
380 }
381 
382 size_t
strings_length()383 URLImpl::strings_length()
384 {
385   size_t ret = 0;
386 
387   ret += m_len_scheme;
388   ret += m_len_user;
389   ret += m_len_password;
390   ret += m_len_host;
391   ret += m_len_port;
392   ret += m_len_path;
393   ret += m_len_params;
394   ret += m_len_query;
395   ret += m_len_fragment;
396   ret += m_len_printed_string;
397   return ret;
398 }
399 
400 void
check_strings(HeapCheck * heaps,int num_heaps)401 URLImpl::check_strings(HeapCheck *heaps, int num_heaps)
402 {
403   CHECK_STR(m_ptr_scheme, m_len_scheme, heaps, num_heaps);
404   CHECK_STR(m_ptr_user, m_len_user, heaps, num_heaps);
405   CHECK_STR(m_ptr_password, m_len_password, heaps, num_heaps);
406   CHECK_STR(m_ptr_host, m_len_host, heaps, num_heaps);
407   CHECK_STR(m_ptr_port, m_len_port, heaps, num_heaps);
408   CHECK_STR(m_ptr_path, m_len_path, heaps, num_heaps);
409   CHECK_STR(m_ptr_params, m_len_params, heaps, num_heaps);
410   CHECK_STR(m_ptr_query, m_len_query, heaps, num_heaps);
411   CHECK_STR(m_ptr_fragment, m_len_fragment, heaps, num_heaps);
412   //    CHECK_STR(m_ptr_printed_string, m_len_printed_string, heaps, num_heaps);
413 }
414 
415 /***********************************************************************
416  *                                                                     *
417  *                               S E T                                 *
418  *                                                                     *
419  ***********************************************************************/
420 
421 const char *
url_scheme_set(HdrHeap * heap,URLImpl * url,const char * scheme_str,int scheme_wks_idx,int length,bool copy_string)422 url_scheme_set(HdrHeap *heap, URLImpl *url, const char *scheme_str, int scheme_wks_idx, int length, bool copy_string)
423 {
424   const char *scheme_wks;
425   url_called_set(url);
426   if (length == 0) {
427     scheme_str = nullptr;
428   }
429 
430   mime_str_u16_set(heap, scheme_str, length, &(url->m_ptr_scheme), &(url->m_len_scheme), copy_string);
431 
432   url->m_scheme_wks_idx = scheme_wks_idx;
433   if (scheme_wks_idx >= 0) {
434     scheme_wks = hdrtoken_index_to_wks(scheme_wks_idx);
435   } else {
436     scheme_wks = nullptr;
437   }
438 
439   if (scheme_wks == URL_SCHEME_HTTP || scheme_wks == URL_SCHEME_WS) {
440     url->m_url_type = URL_TYPE_HTTP;
441   } else if (scheme_wks == URL_SCHEME_HTTPS || scheme_wks == URL_SCHEME_WSS) {
442     url->m_url_type = URL_TYPE_HTTPS;
443   } else {
444     url->m_url_type = URL_TYPE_HTTP;
445   }
446 
447   return scheme_wks; // tokenized string or NULL if not well known
448 }
449 
450 /*-------------------------------------------------------------------------
451   -------------------------------------------------------------------------*/
452 
453 void
url_user_set(HdrHeap * heap,URLImpl * url,const char * value,int length,bool copy_string)454 url_user_set(HdrHeap *heap, URLImpl *url, const char *value, int length, bool copy_string)
455 {
456   url_called_set(url);
457   if (length == 0) {
458     value = nullptr;
459   }
460   mime_str_u16_set(heap, value, length, &(url->m_ptr_user), &(url->m_len_user), copy_string);
461 }
462 
463 /*-------------------------------------------------------------------------
464   -------------------------------------------------------------------------*/
465 
466 void
url_password_set(HdrHeap * heap,URLImpl * url,const char * value,int length,bool copy_string)467 url_password_set(HdrHeap *heap, URLImpl *url, const char *value, int length, bool copy_string)
468 {
469   url_called_set(url);
470   if (length == 0) {
471     value = nullptr;
472   }
473   mime_str_u16_set(heap, value, length, &(url->m_ptr_password), &(url->m_len_password), copy_string);
474 }
475 
476 /*-------------------------------------------------------------------------
477   -------------------------------------------------------------------------*/
478 
479 void
url_host_set(HdrHeap * heap,URLImpl * url,const char * value,int length,bool copy_string)480 url_host_set(HdrHeap *heap, URLImpl *url, const char *value, int length, bool copy_string)
481 {
482   url_called_set(url);
483   if (length == 0) {
484     value = nullptr;
485   }
486   mime_str_u16_set(heap, value, length, &(url->m_ptr_host), &(url->m_len_host), copy_string);
487 }
488 
489 /*-------------------------------------------------------------------------
490   -------------------------------------------------------------------------*/
491 
492 void
url_port_set(HdrHeap * heap,URLImpl * url,const char * value,int length,bool copy_string)493 url_port_set(HdrHeap *heap, URLImpl *url, const char *value, int length, bool copy_string)
494 {
495   url_called_set(url);
496   if (length == 0) {
497     value = nullptr;
498   }
499   mime_str_u16_set(heap, value, length, &(url->m_ptr_port), &(url->m_len_port), copy_string);
500 
501   url->m_port = 0;
502   for (int i = 0; i < length; i++) {
503     if (!ParseRules::is_digit(value[i])) {
504       break;
505     }
506     url->m_port = url->m_port * 10 + (value[i] - '0');
507   }
508 }
509 
510 /*-------------------------------------------------------------------------
511   -------------------------------------------------------------------------*/
512 
513 void
url_port_set(HdrHeap * heap,URLImpl * url,unsigned int port)514 url_port_set(HdrHeap *heap, URLImpl *url, unsigned int port)
515 {
516   url_called_set(url);
517   if (port > 0) {
518     char value[6];
519     int length;
520 
521     length = ink_fast_itoa(port, value, sizeof(value));
522     mime_str_u16_set(heap, value, length, &(url->m_ptr_port), &(url->m_len_port), true);
523   } else {
524     mime_str_u16_set(heap, nullptr, 0, &(url->m_ptr_port), &(url->m_len_port), true);
525   }
526   url->m_port = port;
527 }
528 
529 /*-------------------------------------------------------------------------
530   -------------------------------------------------------------------------*/
531 
532 void
url_path_set(HdrHeap * heap,URLImpl * url,const char * value,int length,bool copy_string)533 url_path_set(HdrHeap *heap, URLImpl *url, const char *value, int length, bool copy_string)
534 {
535   url_called_set(url);
536   if (length == 0) {
537     value = nullptr;
538   }
539   mime_str_u16_set(heap, value, length, &(url->m_ptr_path), &(url->m_len_path), copy_string);
540 }
541 
542 /*-------------------------------------------------------------------------
543   -------------------------------------------------------------------------*/
544 
545 // empties params/query/fragment component
546 // url_{params|query|fragment}_set()
547 
548 void
url_params_set(HdrHeap * heap,URLImpl * url,const char * value,int length,bool copy_string)549 url_params_set(HdrHeap *heap, URLImpl *url, const char *value, int length, bool copy_string)
550 {
551   url_called_set(url);
552   mime_str_u16_set(heap, value, length, &(url->m_ptr_params), &(url->m_len_params), copy_string);
553 }
554 
555 /*-------------------------------------------------------------------------
556   -------------------------------------------------------------------------*/
557 
558 void
url_query_set(HdrHeap * heap,URLImpl * url,const char * value,int length,bool copy_string)559 url_query_set(HdrHeap *heap, URLImpl *url, const char *value, int length, bool copy_string)
560 {
561   url_called_set(url);
562   mime_str_u16_set(heap, value, length, &(url->m_ptr_query), &(url->m_len_query), copy_string);
563 }
564 
565 /*-------------------------------------------------------------------------
566   -------------------------------------------------------------------------*/
567 
568 void
url_fragment_set(HdrHeap * heap,URLImpl * url,const char * value,int length,bool copy_string)569 url_fragment_set(HdrHeap *heap, URLImpl *url, const char *value, int length, bool copy_string)
570 {
571   url_called_set(url);
572   mime_str_u16_set(heap, value, length, &(url->m_ptr_fragment), &(url->m_len_fragment), copy_string);
573 }
574 
575 /*-------------------------------------------------------------------------
576   -------------------------------------------------------------------------*/
577 
578 void
url_type_set(URLImpl * url,unsigned int typecode)579 url_type_set(URLImpl *url, unsigned int typecode)
580 {
581   url_called_set(url);
582   url->m_type_code = typecode;
583 }
584 
585 /*-------------------------------------------------------------------------
586   -------------------------------------------------------------------------*/
587 
588 /***********************************************************************
589  *                                                                     *
590  *                               G E T                                 *
591  *                                                                     *
592  ***********************************************************************/
593 
594 /*-------------------------------------------------------------------------
595   -------------------------------------------------------------------------*/
596 
597 void
url_called_set(URLImpl * url)598 url_called_set(URLImpl *url)
599 {
600   url->m_clean = !url->m_ptr_printed_string;
601 }
602 
603 void
url_clear_string_ref(URLImpl * url)604 url_clear_string_ref(URLImpl *url)
605 {
606   if (url->m_ptr_printed_string) {
607     url->m_len_printed_string = 0;
608     url->m_ptr_printed_string = nullptr;
609     url->m_clean              = true;
610   }
611   return;
612 }
613 
614 char *
url_string_get_ref(HdrHeap * heap,URLImpl * url,int * length,unsigned normalization_flags)615 url_string_get_ref(HdrHeap *heap, URLImpl *url, int *length, unsigned normalization_flags)
616 {
617   if (!url) {
618     return nullptr;
619   }
620 
621   if (url->m_ptr_printed_string && url->m_clean && (normalization_flags == url->m_normalization_flags)) {
622     if (length) {
623       *length = url->m_len_printed_string;
624     }
625     return const_cast<char *>(url->m_ptr_printed_string);
626   } else { // either not clean or never printed
627     int len = url_length_get(url, normalization_flags);
628     char *buf;
629     int index  = 0;
630     int offset = 0;
631 
632     /* stuff alloc'd here gets gc'd on HdrHeap::destroy() */
633     buf = heap->allocate_str(len + 1);
634     url_print(url, buf, len, &index, &offset, normalization_flags);
635     buf[len] = '\0';
636 
637     if (length) {
638       *length = len;
639     }
640     url->m_clean               = true; // reset since we have url_print()'ed again
641     url->m_len_printed_string  = len;
642     url->m_ptr_printed_string  = buf;
643     url->m_normalization_flags = normalization_flags;
644     return buf;
645   }
646 }
647 
648 char *
url_string_get(URLImpl * url,Arena * arena,int * length,HdrHeap * heap)649 url_string_get(URLImpl *url, Arena *arena, int *length, HdrHeap *heap)
650 {
651   int len = url_length_get(url);
652   char *buf;
653   char *buf2;
654   int index  = 0;
655   int offset = 0;
656 
657   buf = arena ? arena->str_alloc(len) : static_cast<char *>(ats_malloc(len + 1));
658 
659   url_print(url, buf, len, &index, &offset);
660   buf[len] = '\0';
661 
662   /* see string_get_ref() */
663   if (heap) {
664     buf2 = heap->allocate_str(len + 1);
665     memcpy(buf2, buf, len);
666     buf2[len]                 = '\0';
667     url->m_clean              = true; // reset since we have url_print()'ed again
668     url->m_len_printed_string = len;
669     url->m_ptr_printed_string = buf2;
670   }
671 
672   if (length) {
673     *length = len;
674   }
675   return buf;
676 }
677 
678 /*-------------------------------------------------------------------------
679   -------------------------------------------------------------------------*/
680 
681 char *
url_string_get_buf(URLImpl * url,char * dstbuf,int dstbuf_size,int * length)682 url_string_get_buf(URLImpl *url, char *dstbuf, int dstbuf_size, int *length)
683 {
684   int len    = url_length_get(url);
685   int index  = 0;
686   int offset = 0;
687   char *buf  = nullptr;
688 
689   if (dstbuf && dstbuf_size > 0) {
690     buf = dstbuf;
691     if (len >= dstbuf_size) {
692       len = dstbuf_size - 1;
693     }
694     url_print(url, dstbuf, len, &index, &offset);
695     buf[len] = 0;
696 
697     if (length) {
698       *length = len;
699     }
700   }
701   return buf;
702 }
703 
704 /*-------------------------------------------------------------------------
705   -------------------------------------------------------------------------*/
706 
707 const char *
url_user_get(URLImpl * url,int * length)708 url_user_get(URLImpl *url, int *length)
709 {
710   *length = url->m_len_user;
711   return url->m_ptr_user;
712 }
713 
714 /*-------------------------------------------------------------------------
715   -------------------------------------------------------------------------*/
716 
717 const char *
url_password_get(URLImpl * url,int * length)718 url_password_get(URLImpl *url, int *length)
719 {
720   *length = url->m_len_password;
721   return url->m_ptr_password;
722 }
723 
724 /*-------------------------------------------------------------------------
725   -------------------------------------------------------------------------*/
726 
727 const char *
url_host_get(URLImpl * url,int * length)728 url_host_get(URLImpl *url, int *length)
729 {
730   *length = url->m_len_host;
731   return url->m_ptr_host;
732 }
733 
734 /*-------------------------------------------------------------------------
735   -------------------------------------------------------------------------*/
736 
737 int
url_port_get(URLImpl * url)738 url_port_get(URLImpl *url)
739 {
740   return url->m_port;
741 }
742 
743 /*-------------------------------------------------------------------------
744   -------------------------------------------------------------------------*/
745 
746 const char *
url_path_get(URLImpl * url,int * length)747 url_path_get(URLImpl *url, int *length)
748 {
749   *length = url->m_len_path;
750   return url->m_ptr_path;
751 }
752 
753 /*-------------------------------------------------------------------------
754   -------------------------------------------------------------------------*/
755 
756 const char *
url_params_get(URLImpl * url,int * length)757 url_params_get(URLImpl *url, int *length)
758 {
759   *length = url->m_len_params;
760   return url->m_ptr_params;
761 }
762 
763 /*-------------------------------------------------------------------------
764   -------------------------------------------------------------------------*/
765 
766 const char *
url_query_get(URLImpl * url,int * length)767 url_query_get(URLImpl *url, int *length)
768 {
769   *length = url->m_len_query;
770   return url->m_ptr_query;
771 }
772 
773 /*-------------------------------------------------------------------------
774   -------------------------------------------------------------------------*/
775 
776 const char *
url_fragment_get(URLImpl * url,int * length)777 url_fragment_get(URLImpl *url, int *length)
778 {
779   *length = url->m_len_fragment;
780   return url->m_ptr_fragment;
781 }
782 
783 /*-------------------------------------------------------------------------
784   -------------------------------------------------------------------------*/
785 
786 int
url_type_get(URLImpl * url)787 url_type_get(URLImpl *url)
788 {
789   return url->m_type_code;
790 }
791 
792 /*-------------------------------------------------------------------------
793   -------------------------------------------------------------------------*/
794 
795 /***********************************************************************
796  *                                                                     *
797  *               U R L    S T R I N G    F U N C T I O N S             *
798  *                                                                     *
799  ***********************************************************************/
800 
801 /*-------------------------------------------------------------------------
802   -------------------------------------------------------------------------*/
803 
804 int
url_length_get(URLImpl * url,unsigned normalization_flags)805 url_length_get(URLImpl *url, unsigned normalization_flags)
806 {
807   int length = 0;
808 
809   if (url->m_ptr_scheme) {
810     length += url->m_len_scheme + 3; // +3 for "://"
811 
812   } else if (normalization_flags & URLNormalize::IMPLIED_SCHEME) {
813     if (URL_TYPE_HTTP == url->m_url_type) {
814       length += URL_LEN_HTTP + 3;
815 
816     } else if (URL_TYPE_HTTPS == url->m_url_type) {
817       length += URL_LEN_HTTPS + 3;
818     }
819   }
820 
821   if (url->m_ptr_user) {
822     length += url->m_len_user + 1; // +1 for "@"
823     if (url->m_ptr_password) {
824       length += url->m_len_password + 1; // +1 for ":"
825     }
826   }
827 
828   if (url->m_ptr_host) {
829     length += url->m_len_host;
830     if (url->m_ptr_port && url->m_port) {
831       length += url->m_len_port + 1; // +1 for ":"
832     }
833   }
834 
835   if (url->m_ptr_path) {
836     length += url->m_len_path;
837   }
838 
839   if (!url->m_path_is_empty) {
840     // m_ptr_path does not contain the initial "/" and thus m_len_path does not
841     // count it. We account for it here.
842     length += 1; // +1 for "/"
843   }
844 
845   if (url->m_ptr_params && url->m_len_params > 0) {
846     length += url->m_len_params + 1; // +1 for ";"
847   }
848 
849   if (url->m_ptr_query && url->m_len_query > 0) {
850     length += url->m_len_query + 1; // +1 for "?"
851   }
852 
853   if (url->m_ptr_fragment && url->m_len_fragment > 0) {
854     length += url->m_len_fragment + 1; // +1 for "#"
855   }
856 
857   return length;
858 }
859 
860 /*-------------------------------------------------------------------------
861   -------------------------------------------------------------------------*/
862 
863 char *
url_to_string(URLImpl * url,Arena * arena,int * length)864 url_to_string(URLImpl *url, Arena *arena, int *length)
865 {
866   int len;
867   int idx;
868   char *str;
869 
870   len = url_length_get(url) + 1;
871 
872   if (length) {
873     *length = len;
874   }
875 
876   if (arena) {
877     str = arena->str_alloc(len);
878   } else {
879     str = static_cast<char *>(ats_malloc(len + 1));
880   }
881 
882   idx = 0;
883 
884   if (url->m_ptr_scheme) {
885     memcpy(&str[idx], url->m_ptr_scheme, url->m_len_scheme);
886     idx += url->m_len_scheme;
887     if ((url->m_scheme_wks_idx >= 0) && (hdrtoken_index_to_wks(url->m_scheme_wks_idx) == URL_SCHEME_FILE)) {
888       str[idx++] = ':';
889     } else {
890       str[idx++] = ':';
891       str[idx++] = '/';
892       str[idx++] = '/';
893     }
894   }
895 
896   if (url->m_ptr_user) {
897     memcpy(&str[idx], url->m_ptr_user, url->m_len_user);
898     idx += url->m_len_user;
899     if (url->m_ptr_password) {
900       str[idx++] = ':';
901       memcpy(&str[idx], url->m_ptr_password, url->m_len_password);
902       idx += url->m_len_password;
903     }
904     str[idx++] = '@';
905   }
906 
907   if (url->m_ptr_host) {
908     memcpy(&str[idx], url->m_ptr_host, url->m_len_host);
909     idx += url->m_len_host;
910     if (url->m_ptr_port != nullptr) {
911       str[idx++] = ':';
912       memcpy(&str[idx], url->m_ptr_port, url->m_len_port);
913       idx += url->m_len_port;
914     }
915   }
916 
917   memcpy(&str[idx], url->m_ptr_path, url->m_len_path);
918   idx += url->m_len_path;
919 
920   if (url->m_ptr_params && url->m_len_params > 0) {
921     str[idx++] = ';';
922     memcpy(&str[idx], url->m_ptr_params, url->m_len_params);
923     idx += url->m_len_params;
924   }
925 
926   if (url->m_ptr_query && url->m_len_query > 0) {
927     str[idx++] = '?';
928     memcpy(&str[idx], url->m_ptr_query, url->m_len_query);
929     idx += url->m_len_query;
930   }
931 
932   if (url->m_ptr_fragment && url->m_len_fragment > 0) {
933     str[idx++] = '#';
934     memcpy(&str[idx], url->m_ptr_fragment, url->m_len_fragment);
935     idx += url->m_len_fragment;
936   }
937 
938   str[idx++] = '\0';
939 
940   ink_release_assert(idx == len);
941 
942   return str;
943 }
944 
945 /*-------------------------------------------------------------------------
946   -------------------------------------------------------------------------*/
947 
948 /***********************************************************************
949  *                                                                     *
950  *                     E S C A P E - H A N D L I N G                   *
951  *                                                                     *
952  ***********************************************************************/
953 
954 void
unescape_str(char * & buf,char * buf_e,const char * & str,const char * str_e,int & state)955 unescape_str(char *&buf, char *buf_e, const char *&str, const char *str_e, int &state)
956 {
957   int copy_len;
958   char *first_pct;
959   int buf_len = static_cast<int>(buf_e - buf);
960   int str_len = static_cast<int>(str_e - str);
961   int min_len = (str_len < buf_len ? str_len : buf_len);
962 
963   first_pct = ink_memcpy_until_char(buf, const_cast<char *>(str), min_len, '%');
964   copy_len  = static_cast<int>(first_pct - str);
965   str += copy_len;
966   buf += copy_len;
967   if (copy_len == min_len) {
968     return;
969   }
970 
971   while (str < str_e && (buf != buf_e)) {
972     switch (state) {
973     case 0:
974       if (str[0] == '%') {
975         str += 1;
976         state = 1;
977       } else {
978         *buf++ = str[0];
979         str += 1;
980       }
981       break;
982     case 1:
983       if (ParseRules::is_hex(str[0])) {
984         str += 1;
985         state = 2;
986       } else {
987         *buf++ = str[-1];
988         state  = 0;
989       }
990       break;
991     case 2:
992       if (ParseRules::is_hex(str[0])) {
993         int tmp;
994 
995         if (ParseRules::is_alpha(str[-1])) {
996           tmp = (ParseRules::ink_toupper(str[-1]) - 'A' + 10) * 16;
997         } else {
998           tmp = (str[-1] - '0') * 16;
999         }
1000         if (ParseRules::is_alpha(str[0])) {
1001           tmp += (ParseRules::ink_toupper(str[0]) - 'A' + 10);
1002         } else {
1003           tmp += str[0] - '0';
1004         }
1005 
1006         *buf++ = tmp;
1007         str += 1;
1008         state = 0;
1009       } else {
1010         *buf++ = str[-2];
1011         state  = 3;
1012       }
1013       break;
1014     case 3:
1015       *buf++ = str[-1];
1016       state  = 0;
1017       break;
1018     }
1019   }
1020 }
1021 
1022 /*-------------------------------------------------------------------------
1023   -------------------------------------------------------------------------*/
1024 
1025 void
unescape_str_tolower(char * & buf,char * end,const char * & str,const char * str_e,int & state)1026 unescape_str_tolower(char *&buf, char *end, const char *&str, const char *str_e, int &state)
1027 {
1028   while (str < str_e && (buf != end)) {
1029     switch (state) {
1030     case 0:
1031       if (str[0] == '%') {
1032         str += 1;
1033         state = 1;
1034       } else {
1035         *buf++ = ParseRules::ink_tolower(str[0]);
1036         str += 1;
1037       }
1038       break;
1039     case 1:
1040       if (ParseRules::is_hex(str[0])) {
1041         str += 1;
1042         state = 2;
1043       } else {
1044         *buf++ = ParseRules::ink_tolower(str[-1]);
1045         state  = 0;
1046       }
1047       break;
1048     case 2:
1049       if (ParseRules::is_hex(str[0])) {
1050         int tmp;
1051 
1052         if (ParseRules::is_alpha(str[-1])) {
1053           tmp = (ParseRules::ink_toupper(str[-1]) - 'A' + 10) * 16;
1054         } else {
1055           tmp = (str[-1] - '0') * 16;
1056         }
1057         if (ParseRules::is_alpha(str[0])) {
1058           tmp += (ParseRules::ink_toupper(str[0]) - 'A' + 10);
1059         } else {
1060           tmp += str[0] - '0';
1061         }
1062 
1063         *buf++ = tmp;
1064         str += 1;
1065         state = 0;
1066       } else {
1067         *buf++ = ParseRules::ink_tolower(str[-2]);
1068         state  = 3;
1069       }
1070       break;
1071     case 3:
1072       *buf++ = ParseRules::ink_tolower(str[-1]);
1073       state  = 0;
1074       break;
1075     }
1076   }
1077 }
1078 
1079 /*-------------------------------------------------------------------------
1080   -------------------------------------------------------------------------*/
1081 
1082 char *
url_unescapify(Arena * arena,const char * str,int length)1083 url_unescapify(Arena *arena, const char *str, int length)
1084 {
1085   char *buffer;
1086   char *t, *e;
1087   int s;
1088 
1089   if (length == -1) {
1090     length = static_cast<int>(strlen(str));
1091   }
1092 
1093   buffer = arena->str_alloc(length);
1094   t      = buffer;
1095   e      = buffer + length;
1096   s      = 0;
1097 
1098   unescape_str(t, e, str, str + length, s);
1099   *t = '\0';
1100 
1101   return buffer;
1102 }
1103 
1104 /*-------------------------------------------------------------------------
1105   -------------------------------------------------------------------------*/
1106 
1107 /***********************************************************************
1108  *                                                                     *
1109  *                            P A R S I N G                            *
1110  *                                                                     *
1111  ***********************************************************************/
1112 
1113 #define GETNEXT(label) \
1114   {                    \
1115     cur += 1;          \
1116     if (cur >= end) {  \
1117       goto label;      \
1118     }                  \
1119   }
1120 
1121 ParseResult
url_parse_scheme(HdrHeap * heap,URLImpl * url,const char ** start,const char * end,bool copy_strings_p)1122 url_parse_scheme(HdrHeap *heap, URLImpl *url, const char **start, const char *end, bool copy_strings_p)
1123 {
1124   const char *cur = *start;
1125   const char *scheme_wks;
1126   const char *scheme_start = nullptr;
1127   const char *scheme_end   = nullptr;
1128   int scheme_wks_idx;
1129 
1130   // Skip over spaces
1131   while (' ' == *cur && ++cur < end) {
1132   }
1133 
1134   if (cur < end) {
1135     scheme_start = scheme_end = cur;
1136 
1137     // If the URL is more complex then a path, parse to see if there is a scheme
1138     if ('/' != *cur) {
1139       // Search for a : it could be part of a scheme or a username:password
1140       while (':' != *cur && ++cur < end) {
1141       }
1142 
1143       // If there is a :// then there is a scheme
1144       if (cur + 2 < end && cur[1] == '/' && cur[2] == '/') { // found "://"
1145         scheme_end     = cur;
1146         scheme_wks_idx = hdrtoken_tokenize(scheme_start, scheme_end - scheme_start, &scheme_wks);
1147 
1148         if (!(scheme_wks_idx > 0 && hdrtoken_wks_to_token_type(scheme_wks) == HDRTOKEN_TYPE_SCHEME)) {
1149           // Unknown scheme, validate the scheme
1150 
1151           // RFC 3986 Section 3.1
1152           // These are the valid characters in a scheme:
1153           //   scheme      = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
1154           // return an error if there is another character in the scheme
1155           if (!ParseRules::is_alpha(*scheme_start)) {
1156             return PARSE_RESULT_ERROR;
1157           }
1158           for (cur = scheme_start + 1; cur < scheme_end; ++cur) {
1159             if (!(ParseRules::is_alnum(*cur) != 0 || *cur == '+' || *cur == '-' || *cur == '.')) {
1160               return PARSE_RESULT_ERROR;
1161             }
1162           }
1163         }
1164         url_scheme_set(heap, url, scheme_start, scheme_wks_idx, scheme_end - scheme_start, copy_strings_p);
1165       }
1166     }
1167     *start = scheme_end;
1168     return PARSE_RESULT_CONT;
1169   }
1170   return PARSE_RESULT_ERROR; // no non-whitespace found
1171 }
1172 
1173 // This implementation namespace is necessary because this function is tested by a Catch unit test
1174 // in another source file.
1175 //
1176 namespace UrlImpl
1177 {
1178 /**
1179  *  This method will return TRUE if the uri is strictly compliant with
1180  *  RFC 3986 and it will return FALSE if not.
1181  */
1182 bool
url_is_strictly_compliant(const char * start,const char * end)1183 url_is_strictly_compliant(const char *start, const char *end)
1184 {
1185   for (const char *i = start; i < end; ++i) {
1186     if (!ParseRules::is_uri(*i)) {
1187       Debug("http", "Non-RFC compliant character [0x%.2X] found in URL", (unsigned char)*i);
1188       return false;
1189     }
1190   }
1191   return true;
1192 }
1193 
1194 } // namespace UrlImpl
1195 using namespace UrlImpl;
1196 
1197 ParseResult
url_parse(HdrHeap * heap,URLImpl * url,const char ** start,const char * end,bool copy_strings_p,bool strict_uri_parsing,bool verify_host_characters)1198 url_parse(HdrHeap *heap, URLImpl *url, const char **start, const char *end, bool copy_strings_p, bool strict_uri_parsing,
1199           bool verify_host_characters)
1200 {
1201   if (strict_uri_parsing && !url_is_strictly_compliant(*start, end)) {
1202     return PARSE_RESULT_ERROR;
1203   }
1204 
1205   ParseResult zret = url_parse_scheme(heap, url, start, end, copy_strings_p);
1206   return PARSE_RESULT_CONT == zret ? url_parse_http(heap, url, start, end, copy_strings_p, verify_host_characters) : zret;
1207 }
1208 
1209 ParseResult
url_parse_regex(HdrHeap * heap,URLImpl * url,const char ** start,const char * end,bool copy_strings_p)1210 url_parse_regex(HdrHeap *heap, URLImpl *url, const char **start, const char *end, bool copy_strings_p)
1211 {
1212   ParseResult zret = url_parse_scheme(heap, url, start, end, copy_strings_p);
1213   return PARSE_RESULT_CONT == zret ? url_parse_http_regex(heap, url, start, end, copy_strings_p) : zret;
1214 }
1215 
1216 /**
1217   Parse internet URL.
1218 
1219   After this function completes, start will point to the first character after the
1220   host or @a end if there are not characters after it.
1221 
1222   @verbatim
1223   [://][user[:password]@]host[:port]
1224 
1225   some.place/
1226   some.place:80/
1227   foo@some.place:80/
1228   foo:bar@some.place:80/
1229   foo:bar@some.place/
1230   foo:42@some.place/
1231   @endverbatim
1232 
1233 */
1234 
1235 ParseResult
url_parse_internet(HdrHeap * heap,URLImpl * url,const char ** start,char const * end,bool copy_strings_p,bool verify_host_characters)1236 url_parse_internet(HdrHeap *heap, URLImpl *url, const char **start, char const *end, bool copy_strings_p,
1237                    bool verify_host_characters)
1238 {
1239   const char *cur = *start;
1240   const char *base;              // Base for host/port field.
1241   const char *bracket = nullptr; // marker for open bracket, if any.
1242   ts::ConstBuffer user, passw, host, port;
1243   static size_t const MAX_COLON = 8; // max # of valid colons.
1244   size_t n_colon                = 0;
1245   const char *last_colon        = nullptr; // pointer to last colon seen.
1246 
1247   // Do a quick check for "://"
1248   if (end - cur > 3 && (((':' ^ *cur) | ('/' ^ cur[1]) | ('/' ^ cur[2])) == 0)) {
1249     cur += 3;
1250   } else if (':' == *cur && (++cur >= end || ('/' == *cur && (++cur >= end || ('/' == *cur && ++cur >= end))))) {
1251     return PARSE_RESULT_ERROR;
1252   }
1253 
1254   base = cur;
1255   // skipped leading stuff, start real parsing.
1256   while (cur < end) {
1257     // Note: Each case is responsible for incrementing @a cur if
1258     // appropriate!
1259     switch (*cur) {
1260     case ']': // address close
1261       if (nullptr == bracket || n_colon >= MAX_COLON) {
1262         return PARSE_RESULT_ERROR;
1263       }
1264       ++cur;
1265       /* We keep the brackets because there are too many other places
1266          that depend on them and it's too painful to keep track if
1267          they should be used. I thought about being clever with
1268          stripping brackets from non-IPv6 content but that gets ugly
1269          as well. Just not worth it.
1270        */
1271       host.set(bracket, cur);
1272       // Spec requires This constitute the entire host so the next
1273       // character must be missing (EOS), slash, or colon.
1274       if (cur >= end || '/' == *cur) { // done which is OK
1275         last_colon = nullptr;
1276         break;
1277       } else if (':' != *cur) { // otherwise it must be a colon
1278         return PARSE_RESULT_ERROR;
1279       }
1280       /* We want to prevent more than 1 colon following so we set @a
1281          n_colon appropriately.
1282       */
1283       n_colon = MAX_COLON - 1;
1284     // FALL THROUGH
1285     case ':': // track colons, fail if too many.
1286       if (++n_colon > MAX_COLON) {
1287         return PARSE_RESULT_ERROR;
1288       }
1289       last_colon = cur;
1290       ++cur;
1291       break;
1292     case '@': // user/password marker.
1293       if (user || n_colon > 1) {
1294         return PARSE_RESULT_ERROR; // we already got one, or too many colons.
1295       }
1296       if (n_colon) {
1297         user.set(base, last_colon);
1298         passw.set(last_colon + 1, cur);
1299         n_colon    = 0;
1300         last_colon = nullptr;
1301       } else {
1302         user.set(base, cur);
1303       }
1304       ++cur;
1305       base = cur;
1306       break;
1307     case '[':                       // address open
1308       if (bracket || base != cur) { // must be first char in field
1309         return PARSE_RESULT_ERROR;
1310       }
1311       bracket = cur; // location and flag.
1312       ++cur;
1313       break;
1314     // RFC 3986, section 3.2:
1315     // The authority component is ...  terminated by the next slash ("/"),
1316     // question mark ("?"), or number sign ("#") character, or by the end of
1317     // the URI.
1318     case '/':
1319     case '?':
1320     case '#':
1321       end = cur; // We're done parsing authority, cause loop exit.
1322       break;
1323     default:
1324       ++cur;
1325       break;
1326     };
1327   }
1328   // Time to pick up the pieces. At this pointer cur._ptr is the first
1329   // character past the parse area.
1330 
1331   if (user) {
1332     url_user_set(heap, url, user._ptr, user._size, copy_strings_p);
1333     if (passw) {
1334       url_password_set(heap, url, passw._ptr, passw._size, copy_strings_p);
1335     }
1336   }
1337 
1338   // @a host not set means no brackets to mark explicit host.
1339   if (!host) {
1340     if (1 == n_colon || MAX_COLON == n_colon) { // presume port.
1341       host.set(base, last_colon);
1342     } else { // it's all host.
1343       host.set(base, cur);
1344       last_colon = nullptr; // prevent port setting.
1345     }
1346   }
1347   if (host._size) {
1348     if (!verify_host_characters || validate_host_name(std::string_view(host._ptr, host._size))) {
1349       url_host_set(heap, url, host._ptr, host._size, copy_strings_p);
1350     } else {
1351       return PARSE_RESULT_ERROR;
1352     }
1353   }
1354 
1355   if (last_colon) {
1356     ink_assert(n_colon);
1357     port.set(last_colon + 1, cur);
1358     if (!port._size) {
1359       return PARSE_RESULT_ERROR; // colon w/o port value.
1360     }
1361     url_port_set(heap, url, port._ptr, port._size, copy_strings_p);
1362   }
1363   *start = cur;
1364   return PARSE_RESULT_DONE;
1365 }
1366 
1367 /*-------------------------------------------------------------------------
1368   -------------------------------------------------------------------------*/
1369 
1370 // empties params/query/fragment component
1371 
1372 ParseResult
url_parse_http(HdrHeap * heap,URLImpl * url,const char ** start,const char * end,bool copy_strings,bool verify_host_characters)1373 url_parse_http(HdrHeap *heap, URLImpl *url, const char **start, const char *end, bool copy_strings, bool verify_host_characters)
1374 {
1375   ParseResult err;
1376   const char *cur;
1377   const char *path_start     = nullptr;
1378   const char *path_end       = nullptr;
1379   const char *params_start   = nullptr;
1380   const char *params_end     = nullptr;
1381   const char *query_start    = nullptr;
1382   const char *query_end      = nullptr;
1383   const char *fragment_start = nullptr;
1384   const char *fragment_end   = nullptr;
1385   char mask;
1386 
1387   err = url_parse_internet(heap, url, start, end, copy_strings, verify_host_characters);
1388   if (err < 0) {
1389     return err;
1390   }
1391 
1392   cur                     = *start;
1393   bool nothing_after_host = false;
1394   if (*start == end) {
1395     nothing_after_host = true;
1396     goto done;
1397   }
1398 
1399   if (*cur == '/') {
1400     path_start = cur;
1401   }
1402   mask = ';' & '?' & '#';
1403 parse_path2:
1404   if ((*cur & mask) == mask) {
1405     if (*cur == ';') {
1406       path_end = cur;
1407       goto parse_params1;
1408     }
1409     if (*cur == '?') {
1410       path_end = cur;
1411       goto parse_query1;
1412     }
1413     if (*cur == '#') {
1414       path_end = cur;
1415       goto parse_fragment1;
1416     }
1417   } else {
1418     ink_assert((*cur != ';') && (*cur != '?') && (*cur != '#'));
1419   }
1420   GETNEXT(done);
1421   goto parse_path2;
1422 
1423 parse_params1:
1424   params_start = cur + 1;
1425   GETNEXT(done);
1426 parse_params2:
1427   if (*cur == '?') {
1428     params_end = cur;
1429     goto parse_query1;
1430   }
1431   if (*cur == '#') {
1432     params_end = cur;
1433     goto parse_fragment1;
1434   }
1435   GETNEXT(done);
1436   goto parse_params2;
1437 
1438 parse_query1:
1439   query_start = cur + 1;
1440   GETNEXT(done);
1441 parse_query2:
1442   if (*cur == '#') {
1443     query_end = cur;
1444     goto parse_fragment1;
1445   }
1446   GETNEXT(done);
1447   goto parse_query2;
1448 
1449 parse_fragment1:
1450   fragment_start = cur + 1;
1451   GETNEXT(done);
1452   fragment_end = end;
1453 
1454 done:
1455   if (path_start) {
1456     // There was an explicit path set with '/'.
1457     if (!path_end) {
1458       path_end = cur;
1459     }
1460     if (path_start == path_end) {
1461       url->m_path_is_empty = true;
1462     } else {
1463       url->m_path_is_empty = false;
1464       // Per RFC 3986 section 3, the query string does not contain the initial
1465       // '?' nor does the fragment contain the initial '#'. The path however
1466       // does contain the initial '/' and a path can be empty, containing no
1467       // characters at all, not even the initial '/'. Our path_get interface,
1468       // however, has long not behaved accordingly, returning only the
1469       // characters after the first '/'. This does not allow users to tell
1470       // whether the path was absolutely empty. Further, callers have to
1471       // account for the missing first '/' character themselves, either in URL
1472       // length calculations or when piecing together their own URL. There are
1473       // various examples of this in core and in the plugins shipped with Traffic
1474       // Server.
1475       //
1476       // Correcting this behavior by having path_get return the entire path,
1477       // (inclusive of any first '/') and an empty string if there were no
1478       // characters specified in the path would break existing functionality,
1479       // including various plugins that expect this behavior. Rather than
1480       // correcting this behavior, therefore, we maintain the current
1481       // functionality but add state to determine whether the path was
1482       // absolutely empty so we can reconstruct such URLs.
1483       ++path_start;
1484     }
1485     url_path_set(heap, url, path_start, path_end - path_start, copy_strings);
1486   } else if (!nothing_after_host) {
1487     // There was no path set via '/': it is absolutely empty. However, if there
1488     // is no path, query, or fragment after the host, we by convention add a
1489     // slash after the authority.  Users of URL expect this behavior. Thus the
1490     // nothing_after_host check.
1491     url->m_path_is_empty = true;
1492   }
1493   if (params_start) {
1494     if (!params_end) {
1495       params_end = cur;
1496     }
1497     url_params_set(heap, url, params_start, params_end - params_start, copy_strings);
1498   }
1499   if (query_start) {
1500     // There was a query string marked by '?'.
1501     if (!query_end) {
1502       query_end = cur;
1503     }
1504     url_query_set(heap, url, query_start, query_end - query_start, copy_strings);
1505   }
1506   if (fragment_start) {
1507     // There was a fragment string marked by '#'.
1508     if (!fragment_end) {
1509       fragment_end = cur;
1510     }
1511     url_fragment_set(heap, url, fragment_start, fragment_end - fragment_start, copy_strings);
1512   }
1513 
1514   *start = cur;
1515   return PARSE_RESULT_DONE;
1516 }
1517 
1518 ParseResult
url_parse_http_regex(HdrHeap * heap,URLImpl * url,const char ** start,const char * end,bool copy_strings)1519 url_parse_http_regex(HdrHeap *heap, URLImpl *url, const char **start, const char *end, bool copy_strings)
1520 {
1521   const char *cur = *start;
1522   const char *host_end;
1523 
1524   // Do a quick check for "://" - our only format check.
1525   if (end - cur > 3 && (((':' ^ *cur) | ('/' ^ cur[1]) | ('/' ^ cur[2])) == 0)) {
1526     cur += 3;
1527   } else if (':' == *cur && (++cur >= end || ('/' == *cur && (++cur >= end || ('/' == *cur && ++cur >= end))))) {
1528     return PARSE_RESULT_ERROR;
1529   }
1530 
1531   // Grab everything until EOS or slash.
1532   const char *base = cur;
1533   cur              = static_cast<const char *>(memchr(cur, '/', end - cur));
1534   if (cur) {
1535     host_end = cur;
1536     ++cur;
1537   } else {
1538     host_end = cur = end;
1539   }
1540 
1541   // Did we find something for the host?
1542   if (base != host_end) {
1543     const char *port = nullptr;
1544     int port_len     = 0;
1545 
1546     // Check for port. Search from the end stopping on the first non-digit
1547     // or more than 5 digits and a delimiter.
1548     port                   = host_end - 1;
1549     const char *port_limit = host_end - 6;
1550     if (port_limit < base) {
1551       port_limit = base; // don't go past start.
1552     }
1553 
1554     while (port >= port_limit && isdigit(*port)) {
1555       --port;
1556     }
1557 
1558     // A port if we're still in the host area and we found a ':' as
1559     // the immediately preceeding character.
1560     if (port >= base && ':' == *port) {
1561       port_len = host_end - port - 1; // must compute this first.
1562       host_end = port;                // then point at colon.
1563       ++port;                         // drop colon from port.
1564       url_port_set(heap, url, port, port_len, copy_strings);
1565     }
1566 
1567     // Now we can set the host.
1568     url_host_set(heap, url, base, host_end - base, copy_strings);
1569   }
1570 
1571   // path is anything that's left.
1572   if (cur < end) {
1573     url_path_set(heap, url, cur, end - cur, copy_strings);
1574     cur = end;
1575   }
1576   *start = cur;
1577   return PARSE_RESULT_DONE;
1578 }
1579 
1580 /*-------------------------------------------------------------------------
1581   -------------------------------------------------------------------------*/
1582 
1583 /***********************************************************************
1584  *                                                                     *
1585  *                           P R I N T I N G                           *
1586  *                                                                     *
1587  ***********************************************************************/
1588 
1589 int
url_print(URLImpl * url,char * buf_start,int buf_length,int * buf_index_inout,int * buf_chars_to_skip_inout,unsigned normalization_flags)1590 url_print(URLImpl *url, char *buf_start, int buf_length, int *buf_index_inout, int *buf_chars_to_skip_inout,
1591           unsigned normalization_flags)
1592 {
1593 #define TRY(x) \
1594   if (!x)      \
1595   return 0
1596 
1597   bool scheme_added = false;
1598   if (url->m_ptr_scheme) {
1599     TRY(((normalization_flags & URLNormalize::LC_SCHEME_HOST) ? mime_mem_print_lc : mime_mem_print)(
1600       url->m_ptr_scheme, url->m_len_scheme, buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout));
1601     scheme_added = true;
1602 
1603   } else if (normalization_flags & URLNormalize::IMPLIED_SCHEME) {
1604     if (URL_TYPE_HTTP == url->m_url_type) {
1605       TRY(mime_mem_print(URL_SCHEME_HTTP, URL_LEN_HTTP, buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout));
1606       scheme_added = true;
1607 
1608     } else if (URL_TYPE_HTTPS == url->m_url_type) {
1609       TRY(mime_mem_print(URL_SCHEME_HTTPS, URL_LEN_HTTPS, buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout));
1610       scheme_added = true;
1611     }
1612   }
1613   if (scheme_added) {
1614     TRY(mime_mem_print("://", 3, buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout));
1615   }
1616 
1617   if (url->m_ptr_user) {
1618     TRY(mime_mem_print(url->m_ptr_user, url->m_len_user, buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout));
1619     if (url->m_ptr_password) {
1620       TRY(mime_mem_print(":", 1, buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout));
1621       TRY(
1622         mime_mem_print(url->m_ptr_password, url->m_len_password, buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout));
1623     }
1624     TRY(mime_mem_print("@", 1, buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout));
1625   }
1626 
1627   if (url->m_ptr_host) {
1628     // Force brackets for IPv6. Note colon must occur in first 5 characters.
1629     // But it can be less (e.g. "::1").
1630     int n          = url->m_len_host;
1631     bool bracket_p = '[' != *url->m_ptr_host && (nullptr != memchr(url->m_ptr_host, ':', n > 5 ? 5 : n));
1632     if (bracket_p) {
1633       TRY(mime_mem_print("[", 1, buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout));
1634     }
1635     TRY(((normalization_flags & URLNormalize::LC_SCHEME_HOST) ? mime_mem_print_lc : mime_mem_print)(
1636       url->m_ptr_host, url->m_len_host, buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout));
1637     if (bracket_p) {
1638       TRY(mime_mem_print("]", 1, buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout));
1639     }
1640     if (url->m_ptr_port && url->m_port) {
1641       TRY(mime_mem_print(":", 1, buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout));
1642       TRY(mime_mem_print(url->m_ptr_port, url->m_len_port, buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout));
1643     }
1644   }
1645 
1646   if (!url->m_path_is_empty) {
1647     TRY(mime_mem_print("/", 1, buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout));
1648   }
1649   if (url->m_ptr_path) {
1650     TRY(mime_mem_print(url->m_ptr_path, url->m_len_path, buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout));
1651   }
1652 
1653   if (url->m_ptr_params && url->m_len_params > 0) {
1654     TRY(mime_mem_print(";", 1, buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout));
1655     TRY(mime_mem_print(url->m_ptr_params, url->m_len_params, buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout));
1656   }
1657 
1658   if (url->m_ptr_query && url->m_len_query > 0) {
1659     TRY(mime_mem_print("?", 1, buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout));
1660     TRY(mime_mem_print(url->m_ptr_query, url->m_len_query, buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout));
1661   }
1662 
1663   if (url->m_ptr_fragment && url->m_len_fragment > 0) {
1664     TRY(mime_mem_print("#", 1, buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout));
1665     TRY(mime_mem_print(url->m_ptr_fragment, url->m_len_fragment, buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout));
1666   }
1667 
1668   return 1;
1669 
1670 #undef TRY
1671 }
1672 
1673 void
url_describe(HdrHeapObjImpl * raw,bool)1674 url_describe(HdrHeapObjImpl *raw, bool /* recurse ATS_UNUSED */)
1675 {
1676   URLImpl *obj = (URLImpl *)raw;
1677 
1678   Debug("http", "[URLTYPE: %d, SWKSIDX: %d,", obj->m_url_type, obj->m_scheme_wks_idx);
1679   Debug("http", "\tSCHEME: \"%.*s\", SCHEME_LEN: %d,", obj->m_len_scheme, (obj->m_ptr_scheme ? obj->m_ptr_scheme : "NULL"),
1680         obj->m_len_scheme);
1681   Debug("http", "\tUSER: \"%.*s\", USER_LEN: %d,", obj->m_len_user, (obj->m_ptr_user ? obj->m_ptr_user : "NULL"), obj->m_len_user);
1682   Debug("http", "\tPASSWORD: \"%.*s\", PASSWORD_LEN: %d,", obj->m_len_password,
1683         (obj->m_ptr_password ? obj->m_ptr_password : "NULL"), obj->m_len_password);
1684   Debug("http", "\tHOST: \"%.*s\", HOST_LEN: %d,", obj->m_len_host, (obj->m_ptr_host ? obj->m_ptr_host : "NULL"), obj->m_len_host);
1685   Debug("http", "\tPORT: \"%.*s\", PORT_LEN: %d, PORT_NUM: %d", obj->m_len_port, (obj->m_ptr_port ? obj->m_ptr_port : "NULL"),
1686         obj->m_len_port, obj->m_port);
1687   Debug("http", "\tPATH: \"%.*s\", PATH_LEN: %d,", obj->m_len_path, (obj->m_ptr_path ? obj->m_ptr_path : "NULL"), obj->m_len_path);
1688   Debug("http", "\tPARAMS: \"%.*s\", PARAMS_LEN: %d,", obj->m_len_params, (obj->m_ptr_params ? obj->m_ptr_params : "NULL"),
1689         obj->m_len_params);
1690   Debug("http", "\tQUERY: \"%.*s\", QUERY_LEN: %d,", obj->m_len_query, (obj->m_ptr_query ? obj->m_ptr_query : "NULL"),
1691         obj->m_len_query);
1692   Debug("http", "\tFRAGMENT: \"%.*s\", FRAGMENT_LEN: %d]", obj->m_len_fragment,
1693         (obj->m_ptr_fragment ? obj->m_ptr_fragment : "NULL"), obj->m_len_fragment);
1694 }
1695 
1696 /*-------------------------------------------------------------------------
1697   -------------------------------------------------------------------------*/
1698 
1699 /***********************************************************************
1700  *                                                                     *
1701  *                        U R L    D I G E S T S                       *
1702  *                                                                     *
1703  ***********************************************************************/
1704 
1705 static inline void
memcpy_tolower(char * d,const char * s,int n)1706 memcpy_tolower(char *d, const char *s, int n)
1707 {
1708   while (n--) {
1709     *d = ParseRules::ink_tolower(*s);
1710     s++;
1711     d++;
1712   }
1713 }
1714 
1715 #define BUFSIZE 512
1716 
1717 // fast path for CryptoHash, HTTP, no user/password/params/query,
1718 // no buffer overflow, no unescaping needed
1719 
1720 static inline void
url_CryptoHash_get_fast(const URLImpl * url,CryptoContext & ctx,CryptoHash * hash,cache_generation_t generation)1721 url_CryptoHash_get_fast(const URLImpl *url, CryptoContext &ctx, CryptoHash *hash, cache_generation_t generation)
1722 {
1723   char buffer[BUFSIZE];
1724   char *p;
1725 
1726   p = buffer;
1727   memcpy_tolower(p, url->m_ptr_scheme, url->m_len_scheme);
1728   p += url->m_len_scheme;
1729   *p++ = ':';
1730   *p++ = '/';
1731   *p++ = '/';
1732   // no user
1733   *p++ = ':';
1734   // no password
1735   *p++ = '@';
1736   memcpy_tolower(p, url->m_ptr_host, url->m_len_host);
1737   p += url->m_len_host;
1738   *p++ = '/';
1739   memcpy(p, url->m_ptr_path, url->m_len_path);
1740   p += url->m_len_path;
1741   *p++ = ';';
1742   // no params
1743   *p++ = '?';
1744   // no query
1745 
1746   ink_assert(sizeof(url->m_port) == 2);
1747   uint16_t port = static_cast<uint16_t>(url_canonicalize_port(url->m_url_type, url->m_port));
1748   *p++          = (reinterpret_cast<char *>(&port))[0];
1749   *p++          = (reinterpret_cast<char *>(&port))[1];
1750 
1751   ctx.update(buffer, p - buffer);
1752   if (generation != -1) {
1753     ctx.update(&generation, sizeof(generation));
1754   }
1755 
1756   ctx.finalize(*hash);
1757 }
1758 
1759 static inline void
url_CryptoHash_get_general(const URLImpl * url,CryptoContext & ctx,CryptoHash & hash,cache_generation_t generation)1760 url_CryptoHash_get_general(const URLImpl *url, CryptoContext &ctx, CryptoHash &hash, cache_generation_t generation)
1761 {
1762   char buffer[BUFSIZE];
1763   char *p, *e;
1764   const char *strs[13], *ends[13];
1765   const char *t;
1766   in_port_t port;
1767   int i, s;
1768 
1769   strs[0] = url->m_ptr_scheme;
1770   strs[1] = "://";
1771   strs[2] = url->m_ptr_user;
1772   strs[3] = ":";
1773   strs[4] = url->m_ptr_password;
1774   strs[5] = "@";
1775   strs[6] = url->m_ptr_host;
1776   strs[7] = "/";
1777   strs[8] = url->m_ptr_path;
1778 
1779   ends[0] = strs[0] + url->m_len_scheme;
1780   ends[1] = strs[1] + 3;
1781   ends[2] = strs[2] + url->m_len_user;
1782   ends[3] = strs[3] + 1;
1783   ends[4] = strs[4] + url->m_len_password;
1784   ends[5] = strs[5] + 1;
1785   ends[6] = strs[6] + url->m_len_host;
1786   ends[7] = strs[7] + 1;
1787   ends[8] = strs[8] + url->m_len_path;
1788 
1789   strs[9]  = ";";
1790   strs[10] = url->m_ptr_params;
1791   strs[11] = "?";
1792   strs[12] = url->m_ptr_query;
1793   ends[9]  = strs[9] + 1;
1794   ends[10] = strs[10] + url->m_len_params;
1795   ends[11] = strs[11] + 1;
1796   ends[12] = strs[12] + url->m_len_query;
1797 
1798   p = buffer;
1799   e = buffer + BUFSIZE;
1800 
1801   for (i = 0; i < 13; i++) {
1802     if (strs[i]) {
1803       t = strs[i];
1804       s = 0;
1805 
1806       while (t < ends[i]) {
1807         if ((i == 0) || (i == 6)) { // scheme and host
1808           unescape_str_tolower(p, e, t, ends[i], s);
1809         } else {
1810           unescape_str(p, e, t, ends[i], s);
1811         }
1812 
1813         if (p == e) {
1814           ctx.update(buffer, BUFSIZE);
1815           p = buffer;
1816         }
1817       }
1818     }
1819   }
1820 
1821   if (p != buffer) {
1822     ctx.update(buffer, p - buffer);
1823   }
1824   int buffer_len = static_cast<int>(p - buffer);
1825   port           = url_canonicalize_port(url->m_url_type, url->m_port);
1826 
1827   ctx.update(&port, sizeof(port));
1828   if (generation != -1) {
1829     ctx.update(&generation, sizeof(generation));
1830     Debug("url_cachekey", "Final url string for cache hash key %.*s%d%d", buffer_len, buffer, port, static_cast<int>(generation));
1831   } else {
1832     Debug("url_cachekey", "Final url string for cache hash key %.*s%d", buffer_len, buffer, port);
1833   }
1834   ctx.finalize(hash);
1835 }
1836 
1837 void
url_CryptoHash_get(const URLImpl * url,CryptoHash * hash,cache_generation_t generation)1838 url_CryptoHash_get(const URLImpl *url, CryptoHash *hash, cache_generation_t generation)
1839 {
1840   URLHashContext ctx;
1841   if ((url_hash_method != 0) && (url->m_url_type == URL_TYPE_HTTP) &&
1842       ((url->m_len_user + url->m_len_password + url->m_len_params + url->m_len_query) == 0) &&
1843       (3 + 1 + 1 + 1 + 1 + 1 + 2 + url->m_len_scheme + url->m_len_host + url->m_len_path < BUFSIZE) &&
1844       (memchr(url->m_ptr_host, '%', url->m_len_host) == nullptr) && (memchr(url->m_ptr_path, '%', url->m_len_path) == nullptr)) {
1845     url_CryptoHash_get_fast(url, ctx, hash, generation);
1846 #ifdef DEBUG
1847     CryptoHash hash_general;
1848     url_CryptoHash_get_general(url, ctx, hash_general, generation);
1849     ink_assert(*hash == hash_general);
1850 #endif
1851   } else {
1852     url_CryptoHash_get_general(url, ctx, *hash, generation);
1853   }
1854 }
1855 
1856 #undef BUFSIZE
1857 
1858 /*-------------------------------------------------------------------------
1859   -------------------------------------------------------------------------*/
1860 
1861 void
url_host_CryptoHash_get(URLImpl * url,CryptoHash * hash)1862 url_host_CryptoHash_get(URLImpl *url, CryptoHash *hash)
1863 {
1864   CryptoContext ctx;
1865 
1866   if (url->m_ptr_scheme) {
1867     ctx.update(url->m_ptr_scheme, url->m_len_scheme);
1868   }
1869 
1870   ctx.update("://", 3);
1871 
1872   if (url->m_ptr_host) {
1873     ctx.update(url->m_ptr_host, url->m_len_host);
1874   }
1875 
1876   ctx.update(":", 1);
1877 
1878   // [amc] Why is this <int> and not <in_port_t>?
1879   // Especially since it's in_port_t for url_CryptoHash_get.
1880   int port = url_canonicalize_port(url->m_url_type, url->m_port);
1881   ctx.update(&port, sizeof(port));
1882   ctx.finalize(*hash);
1883 }
1884