1 /** @file
2
3 A brief file description
4
5 @section license License
6
7 Licensed to the Apache Software Foundation (ASF) under one
8 or more contributor license agreements. See the NOTICE file
9 distributed with this work for additional information
10 regarding copyright ownership. The ASF licenses this file
11 to you under the Apache License, Version 2.0 (the
12 "License"); you may not use this file except in compliance
13 with the License. You may obtain a copy of the License at
14
15 http://www.apache.org/licenses/LICENSE-2.0
16
17 Unless required by applicable law or agreed to in writing, software
18 distributed under the License is distributed on an "AS IS" BASIS,
19 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
20 See the License for the specific language governing permissions and
21 limitations under the License.
22 */
23
24 #include <cassert>
25 #include <new>
26 #include "tscore/ink_platform.h"
27 #include "tscore/ink_memory.h"
28 #include "tscore/TsBuffer.h"
29 #include "URL.h"
30 #include "MIME.h"
31 #include "HTTP.h"
32 #include "tscore/Diags.h"
33
34 const char *URL_SCHEME_FILE;
35 const char *URL_SCHEME_FTP;
36 const char *URL_SCHEME_GOPHER;
37 const char *URL_SCHEME_HTTP;
38 const char *URL_SCHEME_HTTPS;
39 const char *URL_SCHEME_WSS;
40 const char *URL_SCHEME_WS;
41 const char *URL_SCHEME_MAILTO;
42 const char *URL_SCHEME_NEWS;
43 const char *URL_SCHEME_NNTP;
44 const char *URL_SCHEME_PROSPERO;
45 const char *URL_SCHEME_TELNET;
46 const char *URL_SCHEME_TUNNEL;
47 const char *URL_SCHEME_WAIS;
48 const char *URL_SCHEME_PNM;
49 const char *URL_SCHEME_RTSP;
50 const char *URL_SCHEME_RTSPU;
51 const char *URL_SCHEME_MMS;
52 const char *URL_SCHEME_MMSU;
53 const char *URL_SCHEME_MMST;
54
55 int URL_WKSIDX_FILE;
56 int URL_WKSIDX_FTP;
57 int URL_WKSIDX_GOPHER;
58 int URL_WKSIDX_HTTP;
59 int URL_WKSIDX_HTTPS;
60 int URL_WKSIDX_WS;
61 int URL_WKSIDX_WSS;
62 int URL_WKSIDX_MAILTO;
63 int URL_WKSIDX_NEWS;
64 int URL_WKSIDX_NNTP;
65 int URL_WKSIDX_PROSPERO;
66 int URL_WKSIDX_TELNET;
67 int URL_WKSIDX_TUNNEL;
68 int URL_WKSIDX_WAIS;
69 int URL_WKSIDX_PNM;
70 int URL_WKSIDX_RTSP;
71 int URL_WKSIDX_RTSPU;
72 int URL_WKSIDX_MMS;
73 int URL_WKSIDX_MMSU;
74 int URL_WKSIDX_MMST;
75
76 int URL_LEN_FILE;
77 int URL_LEN_FTP;
78 int URL_LEN_GOPHER;
79 int URL_LEN_HTTP;
80 int URL_LEN_HTTPS;
81 int URL_LEN_WS;
82 int URL_LEN_WSS;
83 int URL_LEN_MAILTO;
84 int URL_LEN_NEWS;
85 int URL_LEN_NNTP;
86 int URL_LEN_PROSPERO;
87 int URL_LEN_TELNET;
88 int URL_LEN_TUNNEL;
89 int URL_LEN_WAIS;
90 int URL_LEN_PNM;
91 int URL_LEN_RTSP;
92 int URL_LEN_RTSPU;
93 int URL_LEN_MMS;
94 int URL_LEN_MMSU;
95 int URL_LEN_MMST;
96
97 // Whether we should implement url_CryptoHash_get() using url_CryptoHash_get_fast(). Note that
98 // url_CryptoHash_get_fast() does NOT produce the same result as url_CryptoHash_get_general().
99 static int url_hash_method = 0;
100
101 // test to see if a character is a valid character for a host in a URI according to
102 // RFC 3986 and RFC 1034
103 inline static int
is_host_char(char c)104 is_host_char(char c)
105 {
106 return (ParseRules::is_alnum(c) || (c == '-') || (c == '.') || (c == '[') || (c == ']') || (c == '_') || (c == ':') ||
107 (c == '~') || (c == '%'));
108 }
109
110 // Checks if `addr` is a valid FQDN string
111 bool
validate_host_name(std::string_view addr)112 validate_host_name(std::string_view addr)
113 {
114 return std::all_of(addr.begin(), addr.end(), &is_host_char);
115 }
116
117 /*-------------------------------------------------------------------------
118 -------------------------------------------------------------------------*/
119
120 void
url_init()121 url_init()
122 {
123 static int init = 1;
124
125 if (init) {
126 init = 0;
127
128 hdrtoken_init();
129
130 URL_SCHEME_FILE = hdrtoken_string_to_wks("file");
131 URL_SCHEME_FTP = hdrtoken_string_to_wks("ftp");
132 URL_SCHEME_GOPHER = hdrtoken_string_to_wks("gopher");
133 URL_SCHEME_HTTP = hdrtoken_string_to_wks("http");
134 URL_SCHEME_HTTPS = hdrtoken_string_to_wks("https");
135 URL_SCHEME_WSS = hdrtoken_string_to_wks("wss");
136 URL_SCHEME_WS = hdrtoken_string_to_wks("ws");
137 URL_SCHEME_MAILTO = hdrtoken_string_to_wks("mailto");
138 URL_SCHEME_NEWS = hdrtoken_string_to_wks("news");
139 URL_SCHEME_NNTP = hdrtoken_string_to_wks("nntp");
140 URL_SCHEME_PROSPERO = hdrtoken_string_to_wks("prospero");
141 URL_SCHEME_TELNET = hdrtoken_string_to_wks("telnet");
142 URL_SCHEME_TUNNEL = hdrtoken_string_to_wks("tunnel");
143 URL_SCHEME_WAIS = hdrtoken_string_to_wks("wais");
144 URL_SCHEME_PNM = hdrtoken_string_to_wks("pnm");
145 URL_SCHEME_RTSP = hdrtoken_string_to_wks("rtsp");
146 URL_SCHEME_RTSPU = hdrtoken_string_to_wks("rtspu");
147 URL_SCHEME_MMS = hdrtoken_string_to_wks("mms");
148 URL_SCHEME_MMSU = hdrtoken_string_to_wks("mmsu");
149 URL_SCHEME_MMST = hdrtoken_string_to_wks("mmst");
150
151 ink_assert(URL_SCHEME_FILE && URL_SCHEME_FTP && URL_SCHEME_GOPHER && URL_SCHEME_HTTP && URL_SCHEME_HTTPS && URL_SCHEME_WS &&
152 URL_SCHEME_WSS && URL_SCHEME_MAILTO && URL_SCHEME_NEWS && URL_SCHEME_NNTP && URL_SCHEME_PROSPERO &&
153 URL_SCHEME_TELNET && URL_SCHEME_TUNNEL && URL_SCHEME_WAIS && URL_SCHEME_PNM && URL_SCHEME_RTSP && URL_SCHEME_RTSPU &&
154 URL_SCHEME_MMS && URL_SCHEME_MMSU && URL_SCHEME_MMST);
155
156 URL_WKSIDX_FILE = hdrtoken_wks_to_index(URL_SCHEME_FILE);
157 URL_WKSIDX_FTP = hdrtoken_wks_to_index(URL_SCHEME_FTP);
158 URL_WKSIDX_GOPHER = hdrtoken_wks_to_index(URL_SCHEME_GOPHER);
159 URL_WKSIDX_HTTP = hdrtoken_wks_to_index(URL_SCHEME_HTTP);
160 URL_WKSIDX_HTTPS = hdrtoken_wks_to_index(URL_SCHEME_HTTPS);
161 URL_WKSIDX_WS = hdrtoken_wks_to_index(URL_SCHEME_WS);
162 URL_WKSIDX_WSS = hdrtoken_wks_to_index(URL_SCHEME_WSS);
163 URL_WKSIDX_MAILTO = hdrtoken_wks_to_index(URL_SCHEME_MAILTO);
164 URL_WKSIDX_NEWS = hdrtoken_wks_to_index(URL_SCHEME_NEWS);
165 URL_WKSIDX_NNTP = hdrtoken_wks_to_index(URL_SCHEME_NNTP);
166 URL_WKSIDX_PROSPERO = hdrtoken_wks_to_index(URL_SCHEME_PROSPERO);
167 URL_WKSIDX_TELNET = hdrtoken_wks_to_index(URL_SCHEME_TELNET);
168 URL_WKSIDX_TUNNEL = hdrtoken_wks_to_index(URL_SCHEME_TUNNEL);
169 URL_WKSIDX_WAIS = hdrtoken_wks_to_index(URL_SCHEME_WAIS);
170 URL_WKSIDX_PNM = hdrtoken_wks_to_index(URL_SCHEME_PNM);
171 URL_WKSIDX_RTSP = hdrtoken_wks_to_index(URL_SCHEME_RTSP);
172 URL_WKSIDX_RTSPU = hdrtoken_wks_to_index(URL_SCHEME_RTSPU);
173 URL_WKSIDX_MMS = hdrtoken_wks_to_index(URL_SCHEME_MMS);
174 URL_WKSIDX_MMSU = hdrtoken_wks_to_index(URL_SCHEME_MMSU);
175 URL_WKSIDX_MMST = hdrtoken_wks_to_index(URL_SCHEME_MMST);
176
177 URL_LEN_FILE = hdrtoken_wks_to_length(URL_SCHEME_FILE);
178 URL_LEN_FTP = hdrtoken_wks_to_length(URL_SCHEME_FTP);
179 URL_LEN_GOPHER = hdrtoken_wks_to_length(URL_SCHEME_GOPHER);
180 URL_LEN_HTTP = hdrtoken_wks_to_length(URL_SCHEME_HTTP);
181 URL_LEN_HTTPS = hdrtoken_wks_to_length(URL_SCHEME_HTTPS);
182 URL_LEN_WS = hdrtoken_wks_to_length(URL_SCHEME_WS);
183 URL_LEN_WSS = hdrtoken_wks_to_length(URL_SCHEME_WSS);
184 URL_LEN_MAILTO = hdrtoken_wks_to_length(URL_SCHEME_MAILTO);
185 URL_LEN_NEWS = hdrtoken_wks_to_length(URL_SCHEME_NEWS);
186 URL_LEN_NNTP = hdrtoken_wks_to_length(URL_SCHEME_NNTP);
187 URL_LEN_PROSPERO = hdrtoken_wks_to_length(URL_SCHEME_PROSPERO);
188 URL_LEN_TELNET = hdrtoken_wks_to_length(URL_SCHEME_TELNET);
189 URL_LEN_TUNNEL = hdrtoken_wks_to_length(URL_SCHEME_TUNNEL);
190 URL_LEN_WAIS = hdrtoken_wks_to_length(URL_SCHEME_WAIS);
191 URL_LEN_PNM = hdrtoken_wks_to_length(URL_SCHEME_PNM);
192 URL_LEN_RTSP = hdrtoken_wks_to_length(URL_SCHEME_RTSP);
193 URL_LEN_RTSPU = hdrtoken_wks_to_length(URL_SCHEME_RTSPU);
194 URL_LEN_MMS = hdrtoken_wks_to_length(URL_SCHEME_MMS);
195 URL_LEN_MMSU = hdrtoken_wks_to_length(URL_SCHEME_MMSU);
196 URL_LEN_MMST = hdrtoken_wks_to_length(URL_SCHEME_MMST);
197 }
198 }
199
200 /*-------------------------------------------------------------------------
201 -------------------------------------------------------------------------*/
202
203 /***********************************************************************
204 * *
205 * U R L C R E A T I O N A N D C O P Y *
206 * *
207 ***********************************************************************/
208
209 URLImpl *
url_create(HdrHeap * heap)210 url_create(HdrHeap *heap)
211 {
212 URLImpl *url;
213
214 url = (URLImpl *)heap->allocate_obj(sizeof(URLImpl), HDR_HEAP_OBJ_URL);
215 obj_clear_data((HdrHeapObjImpl *)url);
216 url->m_url_type = URL_TYPE_NONE;
217 url->m_scheme_wks_idx = -1;
218 url_clear_string_ref(url);
219 return url;
220 }
221
222 /*-------------------------------------------------------------------------
223 -------------------------------------------------------------------------*/
224
225 void
url_clear(URLImpl * url_impl)226 url_clear(URLImpl *url_impl)
227 {
228 obj_clear_data((HdrHeapObjImpl *)url_impl);
229 url_impl->m_url_type = URL_TYPE_NONE;
230 url_impl->m_scheme_wks_idx = -1;
231 }
232
233 /*-------------------------------------------------------------------------
234 -------------------------------------------------------------------------*/
235
236 URLImpl *
url_copy(URLImpl * s_url,HdrHeap * s_heap,HdrHeap * d_heap,bool inherit_strs)237 url_copy(URLImpl *s_url, HdrHeap *s_heap, HdrHeap *d_heap, bool inherit_strs)
238 {
239 URLImpl *d_url = url_create(d_heap);
240 url_copy_onto(s_url, s_heap, d_url, d_heap, inherit_strs);
241 return d_url;
242 }
243
244 /*-------------------------------------------------------------------------
245 -------------------------------------------------------------------------*/
246
247 void
url_copy_onto(URLImpl * s_url,HdrHeap * s_heap,URLImpl * d_url,HdrHeap * d_heap,bool inherit_strs)248 url_copy_onto(URLImpl *s_url, HdrHeap *s_heap, URLImpl *d_url, HdrHeap *d_heap, bool inherit_strs)
249 {
250 if (s_url != d_url) {
251 obj_copy_data((HdrHeapObjImpl *)s_url, (HdrHeapObjImpl *)d_url);
252 if (inherit_strs && (s_heap != d_heap)) {
253 d_heap->inherit_string_heaps(s_heap);
254 }
255 }
256 }
257
258 /*-------------------------------------------------------------------------
259 -------------------------------------------------------------------------*/
260
261 void
url_nuke_proxy_stuff(URLImpl * d_url)262 url_nuke_proxy_stuff(URLImpl *d_url)
263 {
264 d_url->m_len_scheme = 0;
265 d_url->m_len_user = 0;
266 d_url->m_len_password = 0;
267 d_url->m_len_host = 0;
268 d_url->m_len_port = 0;
269
270 d_url->m_ptr_scheme = nullptr;
271 d_url->m_ptr_user = nullptr;
272 d_url->m_ptr_password = nullptr;
273 d_url->m_ptr_host = nullptr;
274 d_url->m_ptr_port = nullptr;
275
276 d_url->m_scheme_wks_idx = -1;
277 d_url->m_port = 0;
278 }
279
280 /*-------------------------------------------------------------------------
281 -------------------------------------------------------------------------*/
282
283 /**
284 This routine is like url_copy_onto, but clears the
285 scheme/host/user/pass/port components, resulting in a server-style URL.
286
287 */
288 void
url_copy_onto_as_server_url(URLImpl * s_url,HdrHeap * s_heap,URLImpl * d_url,HdrHeap * d_heap,bool inherit_strs)289 url_copy_onto_as_server_url(URLImpl *s_url, HdrHeap *s_heap, URLImpl *d_url, HdrHeap *d_heap, bool inherit_strs)
290 {
291 url_nuke_proxy_stuff(d_url);
292
293 d_url->m_ptr_path = s_url->m_ptr_path;
294 d_url->m_path_is_empty = s_url->m_path_is_empty;
295 d_url->m_ptr_params = s_url->m_ptr_params;
296 d_url->m_ptr_query = s_url->m_ptr_query;
297 d_url->m_ptr_fragment = s_url->m_ptr_fragment;
298 url_clear_string_ref(d_url);
299
300 d_url->m_len_path = s_url->m_len_path;
301 d_url->m_len_params = s_url->m_len_params;
302 d_url->m_len_query = s_url->m_len_query;
303 d_url->m_len_fragment = s_url->m_len_fragment;
304
305 d_url->m_url_type = s_url->m_url_type;
306 d_url->m_type_code = s_url->m_type_code;
307
308 if (inherit_strs && (s_heap != d_heap)) {
309 d_heap->inherit_string_heaps(s_heap);
310 }
311 }
312
313 /*-------------------------------------------------------------------------
314 -------------------------------------------------------------------------*/
315
316 /***********************************************************************
317 * *
318 * M A R S H A L I N G *
319 * *
320 ***********************************************************************/
321 int
marshal(MarshalXlate * str_xlate,int num_xlate)322 URLImpl::marshal(MarshalXlate *str_xlate, int num_xlate)
323 {
324 HDR_MARSHAL_STR(m_ptr_scheme, str_xlate, num_xlate);
325 HDR_MARSHAL_STR(m_ptr_user, str_xlate, num_xlate);
326 HDR_MARSHAL_STR(m_ptr_password, str_xlate, num_xlate);
327 HDR_MARSHAL_STR(m_ptr_host, str_xlate, num_xlate);
328 HDR_MARSHAL_STR(m_ptr_port, str_xlate, num_xlate);
329 HDR_MARSHAL_STR(m_ptr_path, str_xlate, num_xlate);
330 HDR_MARSHAL_STR(m_ptr_params, str_xlate, num_xlate);
331 HDR_MARSHAL_STR(m_ptr_query, str_xlate, num_xlate);
332 HDR_MARSHAL_STR(m_ptr_fragment, str_xlate, num_xlate);
333 // HDR_MARSHAL_STR(m_ptr_printed_string, str_xlate, num_xlate);
334 return 0;
335 }
336
337 void
unmarshal(intptr_t offset)338 URLImpl::unmarshal(intptr_t offset)
339 {
340 HDR_UNMARSHAL_STR(m_ptr_scheme, offset);
341 HDR_UNMARSHAL_STR(m_ptr_user, offset);
342 HDR_UNMARSHAL_STR(m_ptr_password, offset);
343 HDR_UNMARSHAL_STR(m_ptr_host, offset);
344 HDR_UNMARSHAL_STR(m_ptr_port, offset);
345 HDR_UNMARSHAL_STR(m_ptr_path, offset);
346 HDR_UNMARSHAL_STR(m_ptr_params, offset);
347 HDR_UNMARSHAL_STR(m_ptr_query, offset);
348 HDR_UNMARSHAL_STR(m_ptr_fragment, offset);
349 // HDR_UNMARSHAL_STR(m_ptr_printed_string, offset);
350 }
351
352 void
rehome_strings(HdrHeap * new_heap)353 URLImpl::rehome_strings(HdrHeap *new_heap)
354 {
355 m_ptr_scheme = new_heap->localize({m_ptr_scheme, m_len_scheme}).data();
356 m_ptr_user = new_heap->localize({m_ptr_user, m_len_user}).data();
357 m_ptr_password = new_heap->localize({m_ptr_password, m_len_password}).data();
358 m_ptr_host = new_heap->localize({m_ptr_host, m_len_host}).data();
359 m_ptr_port = new_heap->localize({m_ptr_port, m_len_port}).data();
360 m_ptr_path = new_heap->localize({m_ptr_path, m_len_path}).data();
361 m_ptr_params = new_heap->localize({m_ptr_params, m_len_params}).data();
362 m_ptr_query = new_heap->localize({m_ptr_query, m_len_query}).data();
363 m_ptr_fragment = new_heap->localize({m_ptr_fragment, m_len_fragment}).data();
364 m_ptr_printed_string = new_heap->localize({m_ptr_printed_string, m_len_printed_string}).data();
365 }
366
367 void
move_strings(HdrStrHeap * new_heap)368 URLImpl::move_strings(HdrStrHeap *new_heap)
369 {
370 HDR_MOVE_STR(m_ptr_scheme, m_len_scheme);
371 HDR_MOVE_STR(m_ptr_user, m_len_user);
372 HDR_MOVE_STR(m_ptr_password, m_len_password);
373 HDR_MOVE_STR(m_ptr_host, m_len_host);
374 HDR_MOVE_STR(m_ptr_port, m_len_port);
375 HDR_MOVE_STR(m_ptr_path, m_len_path);
376 HDR_MOVE_STR(m_ptr_params, m_len_params);
377 HDR_MOVE_STR(m_ptr_query, m_len_query);
378 HDR_MOVE_STR(m_ptr_fragment, m_len_fragment);
379 HDR_MOVE_STR(m_ptr_printed_string, m_len_printed_string);
380 }
381
382 size_t
strings_length()383 URLImpl::strings_length()
384 {
385 size_t ret = 0;
386
387 ret += m_len_scheme;
388 ret += m_len_user;
389 ret += m_len_password;
390 ret += m_len_host;
391 ret += m_len_port;
392 ret += m_len_path;
393 ret += m_len_params;
394 ret += m_len_query;
395 ret += m_len_fragment;
396 ret += m_len_printed_string;
397 return ret;
398 }
399
400 void
check_strings(HeapCheck * heaps,int num_heaps)401 URLImpl::check_strings(HeapCheck *heaps, int num_heaps)
402 {
403 CHECK_STR(m_ptr_scheme, m_len_scheme, heaps, num_heaps);
404 CHECK_STR(m_ptr_user, m_len_user, heaps, num_heaps);
405 CHECK_STR(m_ptr_password, m_len_password, heaps, num_heaps);
406 CHECK_STR(m_ptr_host, m_len_host, heaps, num_heaps);
407 CHECK_STR(m_ptr_port, m_len_port, heaps, num_heaps);
408 CHECK_STR(m_ptr_path, m_len_path, heaps, num_heaps);
409 CHECK_STR(m_ptr_params, m_len_params, heaps, num_heaps);
410 CHECK_STR(m_ptr_query, m_len_query, heaps, num_heaps);
411 CHECK_STR(m_ptr_fragment, m_len_fragment, heaps, num_heaps);
412 // CHECK_STR(m_ptr_printed_string, m_len_printed_string, heaps, num_heaps);
413 }
414
415 /***********************************************************************
416 * *
417 * S E T *
418 * *
419 ***********************************************************************/
420
421 const char *
url_scheme_set(HdrHeap * heap,URLImpl * url,const char * scheme_str,int scheme_wks_idx,int length,bool copy_string)422 url_scheme_set(HdrHeap *heap, URLImpl *url, const char *scheme_str, int scheme_wks_idx, int length, bool copy_string)
423 {
424 const char *scheme_wks;
425 url_called_set(url);
426 if (length == 0) {
427 scheme_str = nullptr;
428 }
429
430 mime_str_u16_set(heap, scheme_str, length, &(url->m_ptr_scheme), &(url->m_len_scheme), copy_string);
431
432 url->m_scheme_wks_idx = scheme_wks_idx;
433 if (scheme_wks_idx >= 0) {
434 scheme_wks = hdrtoken_index_to_wks(scheme_wks_idx);
435 } else {
436 scheme_wks = nullptr;
437 }
438
439 if (scheme_wks == URL_SCHEME_HTTP || scheme_wks == URL_SCHEME_WS) {
440 url->m_url_type = URL_TYPE_HTTP;
441 } else if (scheme_wks == URL_SCHEME_HTTPS || scheme_wks == URL_SCHEME_WSS) {
442 url->m_url_type = URL_TYPE_HTTPS;
443 } else {
444 url->m_url_type = URL_TYPE_HTTP;
445 }
446
447 return scheme_wks; // tokenized string or NULL if not well known
448 }
449
450 /*-------------------------------------------------------------------------
451 -------------------------------------------------------------------------*/
452
453 void
url_user_set(HdrHeap * heap,URLImpl * url,const char * value,int length,bool copy_string)454 url_user_set(HdrHeap *heap, URLImpl *url, const char *value, int length, bool copy_string)
455 {
456 url_called_set(url);
457 if (length == 0) {
458 value = nullptr;
459 }
460 mime_str_u16_set(heap, value, length, &(url->m_ptr_user), &(url->m_len_user), copy_string);
461 }
462
463 /*-------------------------------------------------------------------------
464 -------------------------------------------------------------------------*/
465
466 void
url_password_set(HdrHeap * heap,URLImpl * url,const char * value,int length,bool copy_string)467 url_password_set(HdrHeap *heap, URLImpl *url, const char *value, int length, bool copy_string)
468 {
469 url_called_set(url);
470 if (length == 0) {
471 value = nullptr;
472 }
473 mime_str_u16_set(heap, value, length, &(url->m_ptr_password), &(url->m_len_password), copy_string);
474 }
475
476 /*-------------------------------------------------------------------------
477 -------------------------------------------------------------------------*/
478
479 void
url_host_set(HdrHeap * heap,URLImpl * url,const char * value,int length,bool copy_string)480 url_host_set(HdrHeap *heap, URLImpl *url, const char *value, int length, bool copy_string)
481 {
482 url_called_set(url);
483 if (length == 0) {
484 value = nullptr;
485 }
486 mime_str_u16_set(heap, value, length, &(url->m_ptr_host), &(url->m_len_host), copy_string);
487 }
488
489 /*-------------------------------------------------------------------------
490 -------------------------------------------------------------------------*/
491
492 void
url_port_set(HdrHeap * heap,URLImpl * url,const char * value,int length,bool copy_string)493 url_port_set(HdrHeap *heap, URLImpl *url, const char *value, int length, bool copy_string)
494 {
495 url_called_set(url);
496 if (length == 0) {
497 value = nullptr;
498 }
499 mime_str_u16_set(heap, value, length, &(url->m_ptr_port), &(url->m_len_port), copy_string);
500
501 url->m_port = 0;
502 for (int i = 0; i < length; i++) {
503 if (!ParseRules::is_digit(value[i])) {
504 break;
505 }
506 url->m_port = url->m_port * 10 + (value[i] - '0');
507 }
508 }
509
510 /*-------------------------------------------------------------------------
511 -------------------------------------------------------------------------*/
512
513 void
url_port_set(HdrHeap * heap,URLImpl * url,unsigned int port)514 url_port_set(HdrHeap *heap, URLImpl *url, unsigned int port)
515 {
516 url_called_set(url);
517 if (port > 0) {
518 char value[6];
519 int length;
520
521 length = ink_fast_itoa(port, value, sizeof(value));
522 mime_str_u16_set(heap, value, length, &(url->m_ptr_port), &(url->m_len_port), true);
523 } else {
524 mime_str_u16_set(heap, nullptr, 0, &(url->m_ptr_port), &(url->m_len_port), true);
525 }
526 url->m_port = port;
527 }
528
529 /*-------------------------------------------------------------------------
530 -------------------------------------------------------------------------*/
531
532 void
url_path_set(HdrHeap * heap,URLImpl * url,const char * value,int length,bool copy_string)533 url_path_set(HdrHeap *heap, URLImpl *url, const char *value, int length, bool copy_string)
534 {
535 url_called_set(url);
536 if (length == 0) {
537 value = nullptr;
538 }
539 mime_str_u16_set(heap, value, length, &(url->m_ptr_path), &(url->m_len_path), copy_string);
540 }
541
542 /*-------------------------------------------------------------------------
543 -------------------------------------------------------------------------*/
544
545 // empties params/query/fragment component
546 // url_{params|query|fragment}_set()
547
548 void
url_params_set(HdrHeap * heap,URLImpl * url,const char * value,int length,bool copy_string)549 url_params_set(HdrHeap *heap, URLImpl *url, const char *value, int length, bool copy_string)
550 {
551 url_called_set(url);
552 mime_str_u16_set(heap, value, length, &(url->m_ptr_params), &(url->m_len_params), copy_string);
553 }
554
555 /*-------------------------------------------------------------------------
556 -------------------------------------------------------------------------*/
557
558 void
url_query_set(HdrHeap * heap,URLImpl * url,const char * value,int length,bool copy_string)559 url_query_set(HdrHeap *heap, URLImpl *url, const char *value, int length, bool copy_string)
560 {
561 url_called_set(url);
562 mime_str_u16_set(heap, value, length, &(url->m_ptr_query), &(url->m_len_query), copy_string);
563 }
564
565 /*-------------------------------------------------------------------------
566 -------------------------------------------------------------------------*/
567
568 void
url_fragment_set(HdrHeap * heap,URLImpl * url,const char * value,int length,bool copy_string)569 url_fragment_set(HdrHeap *heap, URLImpl *url, const char *value, int length, bool copy_string)
570 {
571 url_called_set(url);
572 mime_str_u16_set(heap, value, length, &(url->m_ptr_fragment), &(url->m_len_fragment), copy_string);
573 }
574
575 /*-------------------------------------------------------------------------
576 -------------------------------------------------------------------------*/
577
578 void
url_type_set(URLImpl * url,unsigned int typecode)579 url_type_set(URLImpl *url, unsigned int typecode)
580 {
581 url_called_set(url);
582 url->m_type_code = typecode;
583 }
584
585 /*-------------------------------------------------------------------------
586 -------------------------------------------------------------------------*/
587
588 /***********************************************************************
589 * *
590 * G E T *
591 * *
592 ***********************************************************************/
593
594 /*-------------------------------------------------------------------------
595 -------------------------------------------------------------------------*/
596
597 void
url_called_set(URLImpl * url)598 url_called_set(URLImpl *url)
599 {
600 url->m_clean = !url->m_ptr_printed_string;
601 }
602
603 void
url_clear_string_ref(URLImpl * url)604 url_clear_string_ref(URLImpl *url)
605 {
606 if (url->m_ptr_printed_string) {
607 url->m_len_printed_string = 0;
608 url->m_ptr_printed_string = nullptr;
609 url->m_clean = true;
610 }
611 return;
612 }
613
614 char *
url_string_get_ref(HdrHeap * heap,URLImpl * url,int * length,unsigned normalization_flags)615 url_string_get_ref(HdrHeap *heap, URLImpl *url, int *length, unsigned normalization_flags)
616 {
617 if (!url) {
618 return nullptr;
619 }
620
621 if (url->m_ptr_printed_string && url->m_clean && (normalization_flags == url->m_normalization_flags)) {
622 if (length) {
623 *length = url->m_len_printed_string;
624 }
625 return const_cast<char *>(url->m_ptr_printed_string);
626 } else { // either not clean or never printed
627 int len = url_length_get(url, normalization_flags);
628 char *buf;
629 int index = 0;
630 int offset = 0;
631
632 /* stuff alloc'd here gets gc'd on HdrHeap::destroy() */
633 buf = heap->allocate_str(len + 1);
634 url_print(url, buf, len, &index, &offset, normalization_flags);
635 buf[len] = '\0';
636
637 if (length) {
638 *length = len;
639 }
640 url->m_clean = true; // reset since we have url_print()'ed again
641 url->m_len_printed_string = len;
642 url->m_ptr_printed_string = buf;
643 url->m_normalization_flags = normalization_flags;
644 return buf;
645 }
646 }
647
648 char *
url_string_get(URLImpl * url,Arena * arena,int * length,HdrHeap * heap)649 url_string_get(URLImpl *url, Arena *arena, int *length, HdrHeap *heap)
650 {
651 int len = url_length_get(url);
652 char *buf;
653 char *buf2;
654 int index = 0;
655 int offset = 0;
656
657 buf = arena ? arena->str_alloc(len) : static_cast<char *>(ats_malloc(len + 1));
658
659 url_print(url, buf, len, &index, &offset);
660 buf[len] = '\0';
661
662 /* see string_get_ref() */
663 if (heap) {
664 buf2 = heap->allocate_str(len + 1);
665 memcpy(buf2, buf, len);
666 buf2[len] = '\0';
667 url->m_clean = true; // reset since we have url_print()'ed again
668 url->m_len_printed_string = len;
669 url->m_ptr_printed_string = buf2;
670 }
671
672 if (length) {
673 *length = len;
674 }
675 return buf;
676 }
677
678 /*-------------------------------------------------------------------------
679 -------------------------------------------------------------------------*/
680
681 char *
url_string_get_buf(URLImpl * url,char * dstbuf,int dstbuf_size,int * length)682 url_string_get_buf(URLImpl *url, char *dstbuf, int dstbuf_size, int *length)
683 {
684 int len = url_length_get(url);
685 int index = 0;
686 int offset = 0;
687 char *buf = nullptr;
688
689 if (dstbuf && dstbuf_size > 0) {
690 buf = dstbuf;
691 if (len >= dstbuf_size) {
692 len = dstbuf_size - 1;
693 }
694 url_print(url, dstbuf, len, &index, &offset);
695 buf[len] = 0;
696
697 if (length) {
698 *length = len;
699 }
700 }
701 return buf;
702 }
703
704 /*-------------------------------------------------------------------------
705 -------------------------------------------------------------------------*/
706
707 const char *
url_user_get(URLImpl * url,int * length)708 url_user_get(URLImpl *url, int *length)
709 {
710 *length = url->m_len_user;
711 return url->m_ptr_user;
712 }
713
714 /*-------------------------------------------------------------------------
715 -------------------------------------------------------------------------*/
716
717 const char *
url_password_get(URLImpl * url,int * length)718 url_password_get(URLImpl *url, int *length)
719 {
720 *length = url->m_len_password;
721 return url->m_ptr_password;
722 }
723
724 /*-------------------------------------------------------------------------
725 -------------------------------------------------------------------------*/
726
727 const char *
url_host_get(URLImpl * url,int * length)728 url_host_get(URLImpl *url, int *length)
729 {
730 *length = url->m_len_host;
731 return url->m_ptr_host;
732 }
733
734 /*-------------------------------------------------------------------------
735 -------------------------------------------------------------------------*/
736
737 int
url_port_get(URLImpl * url)738 url_port_get(URLImpl *url)
739 {
740 return url->m_port;
741 }
742
743 /*-------------------------------------------------------------------------
744 -------------------------------------------------------------------------*/
745
746 const char *
url_path_get(URLImpl * url,int * length)747 url_path_get(URLImpl *url, int *length)
748 {
749 *length = url->m_len_path;
750 return url->m_ptr_path;
751 }
752
753 /*-------------------------------------------------------------------------
754 -------------------------------------------------------------------------*/
755
756 const char *
url_params_get(URLImpl * url,int * length)757 url_params_get(URLImpl *url, int *length)
758 {
759 *length = url->m_len_params;
760 return url->m_ptr_params;
761 }
762
763 /*-------------------------------------------------------------------------
764 -------------------------------------------------------------------------*/
765
766 const char *
url_query_get(URLImpl * url,int * length)767 url_query_get(URLImpl *url, int *length)
768 {
769 *length = url->m_len_query;
770 return url->m_ptr_query;
771 }
772
773 /*-------------------------------------------------------------------------
774 -------------------------------------------------------------------------*/
775
776 const char *
url_fragment_get(URLImpl * url,int * length)777 url_fragment_get(URLImpl *url, int *length)
778 {
779 *length = url->m_len_fragment;
780 return url->m_ptr_fragment;
781 }
782
783 /*-------------------------------------------------------------------------
784 -------------------------------------------------------------------------*/
785
786 int
url_type_get(URLImpl * url)787 url_type_get(URLImpl *url)
788 {
789 return url->m_type_code;
790 }
791
792 /*-------------------------------------------------------------------------
793 -------------------------------------------------------------------------*/
794
795 /***********************************************************************
796 * *
797 * U R L S T R I N G F U N C T I O N S *
798 * *
799 ***********************************************************************/
800
801 /*-------------------------------------------------------------------------
802 -------------------------------------------------------------------------*/
803
804 int
url_length_get(URLImpl * url,unsigned normalization_flags)805 url_length_get(URLImpl *url, unsigned normalization_flags)
806 {
807 int length = 0;
808
809 if (url->m_ptr_scheme) {
810 length += url->m_len_scheme + 3; // +3 for "://"
811
812 } else if (normalization_flags & URLNormalize::IMPLIED_SCHEME) {
813 if (URL_TYPE_HTTP == url->m_url_type) {
814 length += URL_LEN_HTTP + 3;
815
816 } else if (URL_TYPE_HTTPS == url->m_url_type) {
817 length += URL_LEN_HTTPS + 3;
818 }
819 }
820
821 if (url->m_ptr_user) {
822 length += url->m_len_user + 1; // +1 for "@"
823 if (url->m_ptr_password) {
824 length += url->m_len_password + 1; // +1 for ":"
825 }
826 }
827
828 if (url->m_ptr_host) {
829 length += url->m_len_host;
830 if (url->m_ptr_port && url->m_port) {
831 length += url->m_len_port + 1; // +1 for ":"
832 }
833 }
834
835 if (url->m_ptr_path) {
836 length += url->m_len_path;
837 }
838
839 if (!url->m_path_is_empty) {
840 // m_ptr_path does not contain the initial "/" and thus m_len_path does not
841 // count it. We account for it here.
842 length += 1; // +1 for "/"
843 }
844
845 if (url->m_ptr_params && url->m_len_params > 0) {
846 length += url->m_len_params + 1; // +1 for ";"
847 }
848
849 if (url->m_ptr_query && url->m_len_query > 0) {
850 length += url->m_len_query + 1; // +1 for "?"
851 }
852
853 if (url->m_ptr_fragment && url->m_len_fragment > 0) {
854 length += url->m_len_fragment + 1; // +1 for "#"
855 }
856
857 return length;
858 }
859
860 /*-------------------------------------------------------------------------
861 -------------------------------------------------------------------------*/
862
863 char *
url_to_string(URLImpl * url,Arena * arena,int * length)864 url_to_string(URLImpl *url, Arena *arena, int *length)
865 {
866 int len;
867 int idx;
868 char *str;
869
870 len = url_length_get(url) + 1;
871
872 if (length) {
873 *length = len;
874 }
875
876 if (arena) {
877 str = arena->str_alloc(len);
878 } else {
879 str = static_cast<char *>(ats_malloc(len + 1));
880 }
881
882 idx = 0;
883
884 if (url->m_ptr_scheme) {
885 memcpy(&str[idx], url->m_ptr_scheme, url->m_len_scheme);
886 idx += url->m_len_scheme;
887 if ((url->m_scheme_wks_idx >= 0) && (hdrtoken_index_to_wks(url->m_scheme_wks_idx) == URL_SCHEME_FILE)) {
888 str[idx++] = ':';
889 } else {
890 str[idx++] = ':';
891 str[idx++] = '/';
892 str[idx++] = '/';
893 }
894 }
895
896 if (url->m_ptr_user) {
897 memcpy(&str[idx], url->m_ptr_user, url->m_len_user);
898 idx += url->m_len_user;
899 if (url->m_ptr_password) {
900 str[idx++] = ':';
901 memcpy(&str[idx], url->m_ptr_password, url->m_len_password);
902 idx += url->m_len_password;
903 }
904 str[idx++] = '@';
905 }
906
907 if (url->m_ptr_host) {
908 memcpy(&str[idx], url->m_ptr_host, url->m_len_host);
909 idx += url->m_len_host;
910 if (url->m_ptr_port != nullptr) {
911 str[idx++] = ':';
912 memcpy(&str[idx], url->m_ptr_port, url->m_len_port);
913 idx += url->m_len_port;
914 }
915 }
916
917 memcpy(&str[idx], url->m_ptr_path, url->m_len_path);
918 idx += url->m_len_path;
919
920 if (url->m_ptr_params && url->m_len_params > 0) {
921 str[idx++] = ';';
922 memcpy(&str[idx], url->m_ptr_params, url->m_len_params);
923 idx += url->m_len_params;
924 }
925
926 if (url->m_ptr_query && url->m_len_query > 0) {
927 str[idx++] = '?';
928 memcpy(&str[idx], url->m_ptr_query, url->m_len_query);
929 idx += url->m_len_query;
930 }
931
932 if (url->m_ptr_fragment && url->m_len_fragment > 0) {
933 str[idx++] = '#';
934 memcpy(&str[idx], url->m_ptr_fragment, url->m_len_fragment);
935 idx += url->m_len_fragment;
936 }
937
938 str[idx++] = '\0';
939
940 ink_release_assert(idx == len);
941
942 return str;
943 }
944
945 /*-------------------------------------------------------------------------
946 -------------------------------------------------------------------------*/
947
948 /***********************************************************************
949 * *
950 * E S C A P E - H A N D L I N G *
951 * *
952 ***********************************************************************/
953
954 void
unescape_str(char * & buf,char * buf_e,const char * & str,const char * str_e,int & state)955 unescape_str(char *&buf, char *buf_e, const char *&str, const char *str_e, int &state)
956 {
957 int copy_len;
958 char *first_pct;
959 int buf_len = static_cast<int>(buf_e - buf);
960 int str_len = static_cast<int>(str_e - str);
961 int min_len = (str_len < buf_len ? str_len : buf_len);
962
963 first_pct = ink_memcpy_until_char(buf, const_cast<char *>(str), min_len, '%');
964 copy_len = static_cast<int>(first_pct - str);
965 str += copy_len;
966 buf += copy_len;
967 if (copy_len == min_len) {
968 return;
969 }
970
971 while (str < str_e && (buf != buf_e)) {
972 switch (state) {
973 case 0:
974 if (str[0] == '%') {
975 str += 1;
976 state = 1;
977 } else {
978 *buf++ = str[0];
979 str += 1;
980 }
981 break;
982 case 1:
983 if (ParseRules::is_hex(str[0])) {
984 str += 1;
985 state = 2;
986 } else {
987 *buf++ = str[-1];
988 state = 0;
989 }
990 break;
991 case 2:
992 if (ParseRules::is_hex(str[0])) {
993 int tmp;
994
995 if (ParseRules::is_alpha(str[-1])) {
996 tmp = (ParseRules::ink_toupper(str[-1]) - 'A' + 10) * 16;
997 } else {
998 tmp = (str[-1] - '0') * 16;
999 }
1000 if (ParseRules::is_alpha(str[0])) {
1001 tmp += (ParseRules::ink_toupper(str[0]) - 'A' + 10);
1002 } else {
1003 tmp += str[0] - '0';
1004 }
1005
1006 *buf++ = tmp;
1007 str += 1;
1008 state = 0;
1009 } else {
1010 *buf++ = str[-2];
1011 state = 3;
1012 }
1013 break;
1014 case 3:
1015 *buf++ = str[-1];
1016 state = 0;
1017 break;
1018 }
1019 }
1020 }
1021
1022 /*-------------------------------------------------------------------------
1023 -------------------------------------------------------------------------*/
1024
1025 void
unescape_str_tolower(char * & buf,char * end,const char * & str,const char * str_e,int & state)1026 unescape_str_tolower(char *&buf, char *end, const char *&str, const char *str_e, int &state)
1027 {
1028 while (str < str_e && (buf != end)) {
1029 switch (state) {
1030 case 0:
1031 if (str[0] == '%') {
1032 str += 1;
1033 state = 1;
1034 } else {
1035 *buf++ = ParseRules::ink_tolower(str[0]);
1036 str += 1;
1037 }
1038 break;
1039 case 1:
1040 if (ParseRules::is_hex(str[0])) {
1041 str += 1;
1042 state = 2;
1043 } else {
1044 *buf++ = ParseRules::ink_tolower(str[-1]);
1045 state = 0;
1046 }
1047 break;
1048 case 2:
1049 if (ParseRules::is_hex(str[0])) {
1050 int tmp;
1051
1052 if (ParseRules::is_alpha(str[-1])) {
1053 tmp = (ParseRules::ink_toupper(str[-1]) - 'A' + 10) * 16;
1054 } else {
1055 tmp = (str[-1] - '0') * 16;
1056 }
1057 if (ParseRules::is_alpha(str[0])) {
1058 tmp += (ParseRules::ink_toupper(str[0]) - 'A' + 10);
1059 } else {
1060 tmp += str[0] - '0';
1061 }
1062
1063 *buf++ = tmp;
1064 str += 1;
1065 state = 0;
1066 } else {
1067 *buf++ = ParseRules::ink_tolower(str[-2]);
1068 state = 3;
1069 }
1070 break;
1071 case 3:
1072 *buf++ = ParseRules::ink_tolower(str[-1]);
1073 state = 0;
1074 break;
1075 }
1076 }
1077 }
1078
1079 /*-------------------------------------------------------------------------
1080 -------------------------------------------------------------------------*/
1081
1082 char *
url_unescapify(Arena * arena,const char * str,int length)1083 url_unescapify(Arena *arena, const char *str, int length)
1084 {
1085 char *buffer;
1086 char *t, *e;
1087 int s;
1088
1089 if (length == -1) {
1090 length = static_cast<int>(strlen(str));
1091 }
1092
1093 buffer = arena->str_alloc(length);
1094 t = buffer;
1095 e = buffer + length;
1096 s = 0;
1097
1098 unescape_str(t, e, str, str + length, s);
1099 *t = '\0';
1100
1101 return buffer;
1102 }
1103
1104 /*-------------------------------------------------------------------------
1105 -------------------------------------------------------------------------*/
1106
1107 /***********************************************************************
1108 * *
1109 * P A R S I N G *
1110 * *
1111 ***********************************************************************/
1112
1113 #define GETNEXT(label) \
1114 { \
1115 cur += 1; \
1116 if (cur >= end) { \
1117 goto label; \
1118 } \
1119 }
1120
1121 ParseResult
url_parse_scheme(HdrHeap * heap,URLImpl * url,const char ** start,const char * end,bool copy_strings_p)1122 url_parse_scheme(HdrHeap *heap, URLImpl *url, const char **start, const char *end, bool copy_strings_p)
1123 {
1124 const char *cur = *start;
1125 const char *scheme_wks;
1126 const char *scheme_start = nullptr;
1127 const char *scheme_end = nullptr;
1128 int scheme_wks_idx;
1129
1130 // Skip over spaces
1131 while (' ' == *cur && ++cur < end) {
1132 }
1133
1134 if (cur < end) {
1135 scheme_start = scheme_end = cur;
1136
1137 // If the URL is more complex then a path, parse to see if there is a scheme
1138 if ('/' != *cur) {
1139 // Search for a : it could be part of a scheme or a username:password
1140 while (':' != *cur && ++cur < end) {
1141 }
1142
1143 // If there is a :// then there is a scheme
1144 if (cur + 2 < end && cur[1] == '/' && cur[2] == '/') { // found "://"
1145 scheme_end = cur;
1146 scheme_wks_idx = hdrtoken_tokenize(scheme_start, scheme_end - scheme_start, &scheme_wks);
1147
1148 if (!(scheme_wks_idx > 0 && hdrtoken_wks_to_token_type(scheme_wks) == HDRTOKEN_TYPE_SCHEME)) {
1149 // Unknown scheme, validate the scheme
1150
1151 // RFC 3986 Section 3.1
1152 // These are the valid characters in a scheme:
1153 // scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
1154 // return an error if there is another character in the scheme
1155 if (!ParseRules::is_alpha(*scheme_start)) {
1156 return PARSE_RESULT_ERROR;
1157 }
1158 for (cur = scheme_start + 1; cur < scheme_end; ++cur) {
1159 if (!(ParseRules::is_alnum(*cur) != 0 || *cur == '+' || *cur == '-' || *cur == '.')) {
1160 return PARSE_RESULT_ERROR;
1161 }
1162 }
1163 }
1164 url_scheme_set(heap, url, scheme_start, scheme_wks_idx, scheme_end - scheme_start, copy_strings_p);
1165 }
1166 }
1167 *start = scheme_end;
1168 return PARSE_RESULT_CONT;
1169 }
1170 return PARSE_RESULT_ERROR; // no non-whitespace found
1171 }
1172
1173 // This implementation namespace is necessary because this function is tested by a Catch unit test
1174 // in another source file.
1175 //
1176 namespace UrlImpl
1177 {
1178 /**
1179 * This method will return TRUE if the uri is strictly compliant with
1180 * RFC 3986 and it will return FALSE if not.
1181 */
1182 bool
url_is_strictly_compliant(const char * start,const char * end)1183 url_is_strictly_compliant(const char *start, const char *end)
1184 {
1185 for (const char *i = start; i < end; ++i) {
1186 if (!ParseRules::is_uri(*i)) {
1187 Debug("http", "Non-RFC compliant character [0x%.2X] found in URL", (unsigned char)*i);
1188 return false;
1189 }
1190 }
1191 return true;
1192 }
1193
1194 } // namespace UrlImpl
1195 using namespace UrlImpl;
1196
1197 ParseResult
url_parse(HdrHeap * heap,URLImpl * url,const char ** start,const char * end,bool copy_strings_p,bool strict_uri_parsing,bool verify_host_characters)1198 url_parse(HdrHeap *heap, URLImpl *url, const char **start, const char *end, bool copy_strings_p, bool strict_uri_parsing,
1199 bool verify_host_characters)
1200 {
1201 if (strict_uri_parsing && !url_is_strictly_compliant(*start, end)) {
1202 return PARSE_RESULT_ERROR;
1203 }
1204
1205 ParseResult zret = url_parse_scheme(heap, url, start, end, copy_strings_p);
1206 return PARSE_RESULT_CONT == zret ? url_parse_http(heap, url, start, end, copy_strings_p, verify_host_characters) : zret;
1207 }
1208
1209 ParseResult
url_parse_regex(HdrHeap * heap,URLImpl * url,const char ** start,const char * end,bool copy_strings_p)1210 url_parse_regex(HdrHeap *heap, URLImpl *url, const char **start, const char *end, bool copy_strings_p)
1211 {
1212 ParseResult zret = url_parse_scheme(heap, url, start, end, copy_strings_p);
1213 return PARSE_RESULT_CONT == zret ? url_parse_http_regex(heap, url, start, end, copy_strings_p) : zret;
1214 }
1215
1216 /**
1217 Parse internet URL.
1218
1219 After this function completes, start will point to the first character after the
1220 host or @a end if there are not characters after it.
1221
1222 @verbatim
1223 [://][user[:password]@]host[:port]
1224
1225 some.place/
1226 some.place:80/
1227 foo@some.place:80/
1228 foo:bar@some.place:80/
1229 foo:bar@some.place/
1230 foo:42@some.place/
1231 @endverbatim
1232
1233 */
1234
1235 ParseResult
url_parse_internet(HdrHeap * heap,URLImpl * url,const char ** start,char const * end,bool copy_strings_p,bool verify_host_characters)1236 url_parse_internet(HdrHeap *heap, URLImpl *url, const char **start, char const *end, bool copy_strings_p,
1237 bool verify_host_characters)
1238 {
1239 const char *cur = *start;
1240 const char *base; // Base for host/port field.
1241 const char *bracket = nullptr; // marker for open bracket, if any.
1242 ts::ConstBuffer user, passw, host, port;
1243 static size_t const MAX_COLON = 8; // max # of valid colons.
1244 size_t n_colon = 0;
1245 const char *last_colon = nullptr; // pointer to last colon seen.
1246
1247 // Do a quick check for "://"
1248 if (end - cur > 3 && (((':' ^ *cur) | ('/' ^ cur[1]) | ('/' ^ cur[2])) == 0)) {
1249 cur += 3;
1250 } else if (':' == *cur && (++cur >= end || ('/' == *cur && (++cur >= end || ('/' == *cur && ++cur >= end))))) {
1251 return PARSE_RESULT_ERROR;
1252 }
1253
1254 base = cur;
1255 // skipped leading stuff, start real parsing.
1256 while (cur < end) {
1257 // Note: Each case is responsible for incrementing @a cur if
1258 // appropriate!
1259 switch (*cur) {
1260 case ']': // address close
1261 if (nullptr == bracket || n_colon >= MAX_COLON) {
1262 return PARSE_RESULT_ERROR;
1263 }
1264 ++cur;
1265 /* We keep the brackets because there are too many other places
1266 that depend on them and it's too painful to keep track if
1267 they should be used. I thought about being clever with
1268 stripping brackets from non-IPv6 content but that gets ugly
1269 as well. Just not worth it.
1270 */
1271 host.set(bracket, cur);
1272 // Spec requires This constitute the entire host so the next
1273 // character must be missing (EOS), slash, or colon.
1274 if (cur >= end || '/' == *cur) { // done which is OK
1275 last_colon = nullptr;
1276 break;
1277 } else if (':' != *cur) { // otherwise it must be a colon
1278 return PARSE_RESULT_ERROR;
1279 }
1280 /* We want to prevent more than 1 colon following so we set @a
1281 n_colon appropriately.
1282 */
1283 n_colon = MAX_COLON - 1;
1284 // FALL THROUGH
1285 case ':': // track colons, fail if too many.
1286 if (++n_colon > MAX_COLON) {
1287 return PARSE_RESULT_ERROR;
1288 }
1289 last_colon = cur;
1290 ++cur;
1291 break;
1292 case '@': // user/password marker.
1293 if (user || n_colon > 1) {
1294 return PARSE_RESULT_ERROR; // we already got one, or too many colons.
1295 }
1296 if (n_colon) {
1297 user.set(base, last_colon);
1298 passw.set(last_colon + 1, cur);
1299 n_colon = 0;
1300 last_colon = nullptr;
1301 } else {
1302 user.set(base, cur);
1303 }
1304 ++cur;
1305 base = cur;
1306 break;
1307 case '[': // address open
1308 if (bracket || base != cur) { // must be first char in field
1309 return PARSE_RESULT_ERROR;
1310 }
1311 bracket = cur; // location and flag.
1312 ++cur;
1313 break;
1314 // RFC 3986, section 3.2:
1315 // The authority component is ... terminated by the next slash ("/"),
1316 // question mark ("?"), or number sign ("#") character, or by the end of
1317 // the URI.
1318 case '/':
1319 case '?':
1320 case '#':
1321 end = cur; // We're done parsing authority, cause loop exit.
1322 break;
1323 default:
1324 ++cur;
1325 break;
1326 };
1327 }
1328 // Time to pick up the pieces. At this pointer cur._ptr is the first
1329 // character past the parse area.
1330
1331 if (user) {
1332 url_user_set(heap, url, user._ptr, user._size, copy_strings_p);
1333 if (passw) {
1334 url_password_set(heap, url, passw._ptr, passw._size, copy_strings_p);
1335 }
1336 }
1337
1338 // @a host not set means no brackets to mark explicit host.
1339 if (!host) {
1340 if (1 == n_colon || MAX_COLON == n_colon) { // presume port.
1341 host.set(base, last_colon);
1342 } else { // it's all host.
1343 host.set(base, cur);
1344 last_colon = nullptr; // prevent port setting.
1345 }
1346 }
1347 if (host._size) {
1348 if (!verify_host_characters || validate_host_name(std::string_view(host._ptr, host._size))) {
1349 url_host_set(heap, url, host._ptr, host._size, copy_strings_p);
1350 } else {
1351 return PARSE_RESULT_ERROR;
1352 }
1353 }
1354
1355 if (last_colon) {
1356 ink_assert(n_colon);
1357 port.set(last_colon + 1, cur);
1358 if (!port._size) {
1359 return PARSE_RESULT_ERROR; // colon w/o port value.
1360 }
1361 url_port_set(heap, url, port._ptr, port._size, copy_strings_p);
1362 }
1363 *start = cur;
1364 return PARSE_RESULT_DONE;
1365 }
1366
1367 /*-------------------------------------------------------------------------
1368 -------------------------------------------------------------------------*/
1369
1370 // empties params/query/fragment component
1371
1372 ParseResult
url_parse_http(HdrHeap * heap,URLImpl * url,const char ** start,const char * end,bool copy_strings,bool verify_host_characters)1373 url_parse_http(HdrHeap *heap, URLImpl *url, const char **start, const char *end, bool copy_strings, bool verify_host_characters)
1374 {
1375 ParseResult err;
1376 const char *cur;
1377 const char *path_start = nullptr;
1378 const char *path_end = nullptr;
1379 const char *params_start = nullptr;
1380 const char *params_end = nullptr;
1381 const char *query_start = nullptr;
1382 const char *query_end = nullptr;
1383 const char *fragment_start = nullptr;
1384 const char *fragment_end = nullptr;
1385 char mask;
1386
1387 err = url_parse_internet(heap, url, start, end, copy_strings, verify_host_characters);
1388 if (err < 0) {
1389 return err;
1390 }
1391
1392 cur = *start;
1393 bool nothing_after_host = false;
1394 if (*start == end) {
1395 nothing_after_host = true;
1396 goto done;
1397 }
1398
1399 if (*cur == '/') {
1400 path_start = cur;
1401 }
1402 mask = ';' & '?' & '#';
1403 parse_path2:
1404 if ((*cur & mask) == mask) {
1405 if (*cur == ';') {
1406 path_end = cur;
1407 goto parse_params1;
1408 }
1409 if (*cur == '?') {
1410 path_end = cur;
1411 goto parse_query1;
1412 }
1413 if (*cur == '#') {
1414 path_end = cur;
1415 goto parse_fragment1;
1416 }
1417 } else {
1418 ink_assert((*cur != ';') && (*cur != '?') && (*cur != '#'));
1419 }
1420 GETNEXT(done);
1421 goto parse_path2;
1422
1423 parse_params1:
1424 params_start = cur + 1;
1425 GETNEXT(done);
1426 parse_params2:
1427 if (*cur == '?') {
1428 params_end = cur;
1429 goto parse_query1;
1430 }
1431 if (*cur == '#') {
1432 params_end = cur;
1433 goto parse_fragment1;
1434 }
1435 GETNEXT(done);
1436 goto parse_params2;
1437
1438 parse_query1:
1439 query_start = cur + 1;
1440 GETNEXT(done);
1441 parse_query2:
1442 if (*cur == '#') {
1443 query_end = cur;
1444 goto parse_fragment1;
1445 }
1446 GETNEXT(done);
1447 goto parse_query2;
1448
1449 parse_fragment1:
1450 fragment_start = cur + 1;
1451 GETNEXT(done);
1452 fragment_end = end;
1453
1454 done:
1455 if (path_start) {
1456 // There was an explicit path set with '/'.
1457 if (!path_end) {
1458 path_end = cur;
1459 }
1460 if (path_start == path_end) {
1461 url->m_path_is_empty = true;
1462 } else {
1463 url->m_path_is_empty = false;
1464 // Per RFC 3986 section 3, the query string does not contain the initial
1465 // '?' nor does the fragment contain the initial '#'. The path however
1466 // does contain the initial '/' and a path can be empty, containing no
1467 // characters at all, not even the initial '/'. Our path_get interface,
1468 // however, has long not behaved accordingly, returning only the
1469 // characters after the first '/'. This does not allow users to tell
1470 // whether the path was absolutely empty. Further, callers have to
1471 // account for the missing first '/' character themselves, either in URL
1472 // length calculations or when piecing together their own URL. There are
1473 // various examples of this in core and in the plugins shipped with Traffic
1474 // Server.
1475 //
1476 // Correcting this behavior by having path_get return the entire path,
1477 // (inclusive of any first '/') and an empty string if there were no
1478 // characters specified in the path would break existing functionality,
1479 // including various plugins that expect this behavior. Rather than
1480 // correcting this behavior, therefore, we maintain the current
1481 // functionality but add state to determine whether the path was
1482 // absolutely empty so we can reconstruct such URLs.
1483 ++path_start;
1484 }
1485 url_path_set(heap, url, path_start, path_end - path_start, copy_strings);
1486 } else if (!nothing_after_host) {
1487 // There was no path set via '/': it is absolutely empty. However, if there
1488 // is no path, query, or fragment after the host, we by convention add a
1489 // slash after the authority. Users of URL expect this behavior. Thus the
1490 // nothing_after_host check.
1491 url->m_path_is_empty = true;
1492 }
1493 if (params_start) {
1494 if (!params_end) {
1495 params_end = cur;
1496 }
1497 url_params_set(heap, url, params_start, params_end - params_start, copy_strings);
1498 }
1499 if (query_start) {
1500 // There was a query string marked by '?'.
1501 if (!query_end) {
1502 query_end = cur;
1503 }
1504 url_query_set(heap, url, query_start, query_end - query_start, copy_strings);
1505 }
1506 if (fragment_start) {
1507 // There was a fragment string marked by '#'.
1508 if (!fragment_end) {
1509 fragment_end = cur;
1510 }
1511 url_fragment_set(heap, url, fragment_start, fragment_end - fragment_start, copy_strings);
1512 }
1513
1514 *start = cur;
1515 return PARSE_RESULT_DONE;
1516 }
1517
1518 ParseResult
url_parse_http_regex(HdrHeap * heap,URLImpl * url,const char ** start,const char * end,bool copy_strings)1519 url_parse_http_regex(HdrHeap *heap, URLImpl *url, const char **start, const char *end, bool copy_strings)
1520 {
1521 const char *cur = *start;
1522 const char *host_end;
1523
1524 // Do a quick check for "://" - our only format check.
1525 if (end - cur > 3 && (((':' ^ *cur) | ('/' ^ cur[1]) | ('/' ^ cur[2])) == 0)) {
1526 cur += 3;
1527 } else if (':' == *cur && (++cur >= end || ('/' == *cur && (++cur >= end || ('/' == *cur && ++cur >= end))))) {
1528 return PARSE_RESULT_ERROR;
1529 }
1530
1531 // Grab everything until EOS or slash.
1532 const char *base = cur;
1533 cur = static_cast<const char *>(memchr(cur, '/', end - cur));
1534 if (cur) {
1535 host_end = cur;
1536 ++cur;
1537 } else {
1538 host_end = cur = end;
1539 }
1540
1541 // Did we find something for the host?
1542 if (base != host_end) {
1543 const char *port = nullptr;
1544 int port_len = 0;
1545
1546 // Check for port. Search from the end stopping on the first non-digit
1547 // or more than 5 digits and a delimiter.
1548 port = host_end - 1;
1549 const char *port_limit = host_end - 6;
1550 if (port_limit < base) {
1551 port_limit = base; // don't go past start.
1552 }
1553
1554 while (port >= port_limit && isdigit(*port)) {
1555 --port;
1556 }
1557
1558 // A port if we're still in the host area and we found a ':' as
1559 // the immediately preceeding character.
1560 if (port >= base && ':' == *port) {
1561 port_len = host_end - port - 1; // must compute this first.
1562 host_end = port; // then point at colon.
1563 ++port; // drop colon from port.
1564 url_port_set(heap, url, port, port_len, copy_strings);
1565 }
1566
1567 // Now we can set the host.
1568 url_host_set(heap, url, base, host_end - base, copy_strings);
1569 }
1570
1571 // path is anything that's left.
1572 if (cur < end) {
1573 url_path_set(heap, url, cur, end - cur, copy_strings);
1574 cur = end;
1575 }
1576 *start = cur;
1577 return PARSE_RESULT_DONE;
1578 }
1579
1580 /*-------------------------------------------------------------------------
1581 -------------------------------------------------------------------------*/
1582
1583 /***********************************************************************
1584 * *
1585 * P R I N T I N G *
1586 * *
1587 ***********************************************************************/
1588
1589 int
url_print(URLImpl * url,char * buf_start,int buf_length,int * buf_index_inout,int * buf_chars_to_skip_inout,unsigned normalization_flags)1590 url_print(URLImpl *url, char *buf_start, int buf_length, int *buf_index_inout, int *buf_chars_to_skip_inout,
1591 unsigned normalization_flags)
1592 {
1593 #define TRY(x) \
1594 if (!x) \
1595 return 0
1596
1597 bool scheme_added = false;
1598 if (url->m_ptr_scheme) {
1599 TRY(((normalization_flags & URLNormalize::LC_SCHEME_HOST) ? mime_mem_print_lc : mime_mem_print)(
1600 url->m_ptr_scheme, url->m_len_scheme, buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout));
1601 scheme_added = true;
1602
1603 } else if (normalization_flags & URLNormalize::IMPLIED_SCHEME) {
1604 if (URL_TYPE_HTTP == url->m_url_type) {
1605 TRY(mime_mem_print(URL_SCHEME_HTTP, URL_LEN_HTTP, buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout));
1606 scheme_added = true;
1607
1608 } else if (URL_TYPE_HTTPS == url->m_url_type) {
1609 TRY(mime_mem_print(URL_SCHEME_HTTPS, URL_LEN_HTTPS, buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout));
1610 scheme_added = true;
1611 }
1612 }
1613 if (scheme_added) {
1614 TRY(mime_mem_print("://", 3, buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout));
1615 }
1616
1617 if (url->m_ptr_user) {
1618 TRY(mime_mem_print(url->m_ptr_user, url->m_len_user, buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout));
1619 if (url->m_ptr_password) {
1620 TRY(mime_mem_print(":", 1, buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout));
1621 TRY(
1622 mime_mem_print(url->m_ptr_password, url->m_len_password, buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout));
1623 }
1624 TRY(mime_mem_print("@", 1, buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout));
1625 }
1626
1627 if (url->m_ptr_host) {
1628 // Force brackets for IPv6. Note colon must occur in first 5 characters.
1629 // But it can be less (e.g. "::1").
1630 int n = url->m_len_host;
1631 bool bracket_p = '[' != *url->m_ptr_host && (nullptr != memchr(url->m_ptr_host, ':', n > 5 ? 5 : n));
1632 if (bracket_p) {
1633 TRY(mime_mem_print("[", 1, buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout));
1634 }
1635 TRY(((normalization_flags & URLNormalize::LC_SCHEME_HOST) ? mime_mem_print_lc : mime_mem_print)(
1636 url->m_ptr_host, url->m_len_host, buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout));
1637 if (bracket_p) {
1638 TRY(mime_mem_print("]", 1, buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout));
1639 }
1640 if (url->m_ptr_port && url->m_port) {
1641 TRY(mime_mem_print(":", 1, buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout));
1642 TRY(mime_mem_print(url->m_ptr_port, url->m_len_port, buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout));
1643 }
1644 }
1645
1646 if (!url->m_path_is_empty) {
1647 TRY(mime_mem_print("/", 1, buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout));
1648 }
1649 if (url->m_ptr_path) {
1650 TRY(mime_mem_print(url->m_ptr_path, url->m_len_path, buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout));
1651 }
1652
1653 if (url->m_ptr_params && url->m_len_params > 0) {
1654 TRY(mime_mem_print(";", 1, buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout));
1655 TRY(mime_mem_print(url->m_ptr_params, url->m_len_params, buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout));
1656 }
1657
1658 if (url->m_ptr_query && url->m_len_query > 0) {
1659 TRY(mime_mem_print("?", 1, buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout));
1660 TRY(mime_mem_print(url->m_ptr_query, url->m_len_query, buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout));
1661 }
1662
1663 if (url->m_ptr_fragment && url->m_len_fragment > 0) {
1664 TRY(mime_mem_print("#", 1, buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout));
1665 TRY(mime_mem_print(url->m_ptr_fragment, url->m_len_fragment, buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout));
1666 }
1667
1668 return 1;
1669
1670 #undef TRY
1671 }
1672
1673 void
url_describe(HdrHeapObjImpl * raw,bool)1674 url_describe(HdrHeapObjImpl *raw, bool /* recurse ATS_UNUSED */)
1675 {
1676 URLImpl *obj = (URLImpl *)raw;
1677
1678 Debug("http", "[URLTYPE: %d, SWKSIDX: %d,", obj->m_url_type, obj->m_scheme_wks_idx);
1679 Debug("http", "\tSCHEME: \"%.*s\", SCHEME_LEN: %d,", obj->m_len_scheme, (obj->m_ptr_scheme ? obj->m_ptr_scheme : "NULL"),
1680 obj->m_len_scheme);
1681 Debug("http", "\tUSER: \"%.*s\", USER_LEN: %d,", obj->m_len_user, (obj->m_ptr_user ? obj->m_ptr_user : "NULL"), obj->m_len_user);
1682 Debug("http", "\tPASSWORD: \"%.*s\", PASSWORD_LEN: %d,", obj->m_len_password,
1683 (obj->m_ptr_password ? obj->m_ptr_password : "NULL"), obj->m_len_password);
1684 Debug("http", "\tHOST: \"%.*s\", HOST_LEN: %d,", obj->m_len_host, (obj->m_ptr_host ? obj->m_ptr_host : "NULL"), obj->m_len_host);
1685 Debug("http", "\tPORT: \"%.*s\", PORT_LEN: %d, PORT_NUM: %d", obj->m_len_port, (obj->m_ptr_port ? obj->m_ptr_port : "NULL"),
1686 obj->m_len_port, obj->m_port);
1687 Debug("http", "\tPATH: \"%.*s\", PATH_LEN: %d,", obj->m_len_path, (obj->m_ptr_path ? obj->m_ptr_path : "NULL"), obj->m_len_path);
1688 Debug("http", "\tPARAMS: \"%.*s\", PARAMS_LEN: %d,", obj->m_len_params, (obj->m_ptr_params ? obj->m_ptr_params : "NULL"),
1689 obj->m_len_params);
1690 Debug("http", "\tQUERY: \"%.*s\", QUERY_LEN: %d,", obj->m_len_query, (obj->m_ptr_query ? obj->m_ptr_query : "NULL"),
1691 obj->m_len_query);
1692 Debug("http", "\tFRAGMENT: \"%.*s\", FRAGMENT_LEN: %d]", obj->m_len_fragment,
1693 (obj->m_ptr_fragment ? obj->m_ptr_fragment : "NULL"), obj->m_len_fragment);
1694 }
1695
1696 /*-------------------------------------------------------------------------
1697 -------------------------------------------------------------------------*/
1698
1699 /***********************************************************************
1700 * *
1701 * U R L D I G E S T S *
1702 * *
1703 ***********************************************************************/
1704
1705 static inline void
memcpy_tolower(char * d,const char * s,int n)1706 memcpy_tolower(char *d, const char *s, int n)
1707 {
1708 while (n--) {
1709 *d = ParseRules::ink_tolower(*s);
1710 s++;
1711 d++;
1712 }
1713 }
1714
1715 #define BUFSIZE 512
1716
1717 // fast path for CryptoHash, HTTP, no user/password/params/query,
1718 // no buffer overflow, no unescaping needed
1719
1720 static inline void
url_CryptoHash_get_fast(const URLImpl * url,CryptoContext & ctx,CryptoHash * hash,cache_generation_t generation)1721 url_CryptoHash_get_fast(const URLImpl *url, CryptoContext &ctx, CryptoHash *hash, cache_generation_t generation)
1722 {
1723 char buffer[BUFSIZE];
1724 char *p;
1725
1726 p = buffer;
1727 memcpy_tolower(p, url->m_ptr_scheme, url->m_len_scheme);
1728 p += url->m_len_scheme;
1729 *p++ = ':';
1730 *p++ = '/';
1731 *p++ = '/';
1732 // no user
1733 *p++ = ':';
1734 // no password
1735 *p++ = '@';
1736 memcpy_tolower(p, url->m_ptr_host, url->m_len_host);
1737 p += url->m_len_host;
1738 *p++ = '/';
1739 memcpy(p, url->m_ptr_path, url->m_len_path);
1740 p += url->m_len_path;
1741 *p++ = ';';
1742 // no params
1743 *p++ = '?';
1744 // no query
1745
1746 ink_assert(sizeof(url->m_port) == 2);
1747 uint16_t port = static_cast<uint16_t>(url_canonicalize_port(url->m_url_type, url->m_port));
1748 *p++ = (reinterpret_cast<char *>(&port))[0];
1749 *p++ = (reinterpret_cast<char *>(&port))[1];
1750
1751 ctx.update(buffer, p - buffer);
1752 if (generation != -1) {
1753 ctx.update(&generation, sizeof(generation));
1754 }
1755
1756 ctx.finalize(*hash);
1757 }
1758
1759 static inline void
url_CryptoHash_get_general(const URLImpl * url,CryptoContext & ctx,CryptoHash & hash,cache_generation_t generation)1760 url_CryptoHash_get_general(const URLImpl *url, CryptoContext &ctx, CryptoHash &hash, cache_generation_t generation)
1761 {
1762 char buffer[BUFSIZE];
1763 char *p, *e;
1764 const char *strs[13], *ends[13];
1765 const char *t;
1766 in_port_t port;
1767 int i, s;
1768
1769 strs[0] = url->m_ptr_scheme;
1770 strs[1] = "://";
1771 strs[2] = url->m_ptr_user;
1772 strs[3] = ":";
1773 strs[4] = url->m_ptr_password;
1774 strs[5] = "@";
1775 strs[6] = url->m_ptr_host;
1776 strs[7] = "/";
1777 strs[8] = url->m_ptr_path;
1778
1779 ends[0] = strs[0] + url->m_len_scheme;
1780 ends[1] = strs[1] + 3;
1781 ends[2] = strs[2] + url->m_len_user;
1782 ends[3] = strs[3] + 1;
1783 ends[4] = strs[4] + url->m_len_password;
1784 ends[5] = strs[5] + 1;
1785 ends[6] = strs[6] + url->m_len_host;
1786 ends[7] = strs[7] + 1;
1787 ends[8] = strs[8] + url->m_len_path;
1788
1789 strs[9] = ";";
1790 strs[10] = url->m_ptr_params;
1791 strs[11] = "?";
1792 strs[12] = url->m_ptr_query;
1793 ends[9] = strs[9] + 1;
1794 ends[10] = strs[10] + url->m_len_params;
1795 ends[11] = strs[11] + 1;
1796 ends[12] = strs[12] + url->m_len_query;
1797
1798 p = buffer;
1799 e = buffer + BUFSIZE;
1800
1801 for (i = 0; i < 13; i++) {
1802 if (strs[i]) {
1803 t = strs[i];
1804 s = 0;
1805
1806 while (t < ends[i]) {
1807 if ((i == 0) || (i == 6)) { // scheme and host
1808 unescape_str_tolower(p, e, t, ends[i], s);
1809 } else {
1810 unescape_str(p, e, t, ends[i], s);
1811 }
1812
1813 if (p == e) {
1814 ctx.update(buffer, BUFSIZE);
1815 p = buffer;
1816 }
1817 }
1818 }
1819 }
1820
1821 if (p != buffer) {
1822 ctx.update(buffer, p - buffer);
1823 }
1824 int buffer_len = static_cast<int>(p - buffer);
1825 port = url_canonicalize_port(url->m_url_type, url->m_port);
1826
1827 ctx.update(&port, sizeof(port));
1828 if (generation != -1) {
1829 ctx.update(&generation, sizeof(generation));
1830 Debug("url_cachekey", "Final url string for cache hash key %.*s%d%d", buffer_len, buffer, port, static_cast<int>(generation));
1831 } else {
1832 Debug("url_cachekey", "Final url string for cache hash key %.*s%d", buffer_len, buffer, port);
1833 }
1834 ctx.finalize(hash);
1835 }
1836
1837 void
url_CryptoHash_get(const URLImpl * url,CryptoHash * hash,cache_generation_t generation)1838 url_CryptoHash_get(const URLImpl *url, CryptoHash *hash, cache_generation_t generation)
1839 {
1840 URLHashContext ctx;
1841 if ((url_hash_method != 0) && (url->m_url_type == URL_TYPE_HTTP) &&
1842 ((url->m_len_user + url->m_len_password + url->m_len_params + url->m_len_query) == 0) &&
1843 (3 + 1 + 1 + 1 + 1 + 1 + 2 + url->m_len_scheme + url->m_len_host + url->m_len_path < BUFSIZE) &&
1844 (memchr(url->m_ptr_host, '%', url->m_len_host) == nullptr) && (memchr(url->m_ptr_path, '%', url->m_len_path) == nullptr)) {
1845 url_CryptoHash_get_fast(url, ctx, hash, generation);
1846 #ifdef DEBUG
1847 CryptoHash hash_general;
1848 url_CryptoHash_get_general(url, ctx, hash_general, generation);
1849 ink_assert(*hash == hash_general);
1850 #endif
1851 } else {
1852 url_CryptoHash_get_general(url, ctx, *hash, generation);
1853 }
1854 }
1855
1856 #undef BUFSIZE
1857
1858 /*-------------------------------------------------------------------------
1859 -------------------------------------------------------------------------*/
1860
1861 void
url_host_CryptoHash_get(URLImpl * url,CryptoHash * hash)1862 url_host_CryptoHash_get(URLImpl *url, CryptoHash *hash)
1863 {
1864 CryptoContext ctx;
1865
1866 if (url->m_ptr_scheme) {
1867 ctx.update(url->m_ptr_scheme, url->m_len_scheme);
1868 }
1869
1870 ctx.update("://", 3);
1871
1872 if (url->m_ptr_host) {
1873 ctx.update(url->m_ptr_host, url->m_len_host);
1874 }
1875
1876 ctx.update(":", 1);
1877
1878 // [amc] Why is this <int> and not <in_port_t>?
1879 // Especially since it's in_port_t for url_CryptoHash_get.
1880 int port = url_canonicalize_port(url->m_url_type, url->m_port);
1881 ctx.update(&port, sizeof(port));
1882 ctx.finalize(*hash);
1883 }
1884