1 /** @file
2 
3   URL rewriting.
4 
5   @section license License
6 
7   Licensed to the Apache Software Foundation (ASF) under one
8   or more contributor license agreements.  See the NOTICE file
9   distributed with this work for additional information
10   regarding copyright ownership.  The ASF licenses this file
11   to you under the Apache License, Version 2.0 (the
12   "License"); you may not use this file except in compliance
13   with the License.  You may obtain a copy of the License at
14 
15       http://www.apache.org/licenses/LICENSE-2.0
16 
17   Unless required by applicable law or agreed to in writing, software
18   distributed under the License is distributed on an "AS IS" BASIS,
19   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
20   See the License for the specific language governing permissions and
21   limitations under the License.
22 
23  */
24 
25 #include "UrlRewrite.h"
26 #include "ProxyConfig.h"
27 #include "ReverseProxy.h"
28 #include "RemapConfig.h"
29 #include "tscore/I_Layout.h"
30 #include "tscore/Filenames.h"
31 #include "HttpSM.h"
32 
33 #define modulePrefix "[ReverseProxy]"
34 
35 /**
36   Determines where we are in a situation where a virtual path is
37   being mapped to a server home page. If it is, we set a special flag
38   instructing us to be on the lookout for the need to send a redirect
39   to if the request URL is a object, opposed to a directory. We need
40   the redirect for an object so that the browser is aware that it is
41   real accessing a directory (albeit a virtual one).
42 
43 */
44 static void
SetHomePageRedirectFlag(url_mapping * new_mapping,URL & new_to_url)45 SetHomePageRedirectFlag(url_mapping *new_mapping, URL &new_to_url)
46 {
47   int fromLen, toLen;
48   const char *from_path = new_mapping->fromURL.path_get(&fromLen);
49   const char *to_path   = new_to_url.path_get(&toLen);
50 
51   new_mapping->homePageRedirect = (from_path && !to_path) ? true : false;
52 }
53 
54 bool
load()55 UrlRewrite::load()
56 {
57   ats_scoped_str config_file_path;
58 
59   config_file_path = RecConfigReadConfigPath("proxy.config.url_remap.filename", ts::filename::REMAP);
60   if (!config_file_path) {
61     pmgmt->signalManager(MGMT_SIGNAL_CONFIG_ERROR, "Unable to find proxy.config.url_remap.filename");
62     Warning("%s Unable to locate %s. No remappings in effect", modulePrefix, ts::filename::REMAP);
63     return false;
64   }
65 
66   this->ts_name = nullptr;
67   REC_ReadConfigStringAlloc(this->ts_name, "proxy.config.proxy_name");
68   if (this->ts_name == nullptr) {
69     pmgmt->signalManager(MGMT_SIGNAL_CONFIG_ERROR, "Unable to read proxy.config.proxy_name");
70     Warning("%s Unable to determine proxy name.  Incorrect redirects could be generated", modulePrefix);
71     this->ts_name = ats_strdup("");
72   }
73 
74   this->http_default_redirect_url = nullptr;
75   REC_ReadConfigStringAlloc(this->http_default_redirect_url, "proxy.config.http.referer_default_redirect");
76   if (this->http_default_redirect_url == nullptr) {
77     pmgmt->signalManager(MGMT_SIGNAL_CONFIG_ERROR, "Unable to read proxy.config.http.referer_default_redirect");
78     Warning("%s Unable to determine default redirect url for \"referer\" filter.", modulePrefix);
79     this->http_default_redirect_url = ats_strdup("http://www.apache.org");
80   }
81 
82   REC_ReadConfigInteger(reverse_proxy, "proxy.config.reverse_proxy.enabled");
83 
84   /* Initialize the plugin factory */
85   pluginFactory.setRuntimeDir(RecConfigReadRuntimeDir()).addSearchDir(RecConfigReadPluginDir());
86 
87   /* Initialize the next hop strategy factory */
88   std::string sf = RecConfigReadConfigPath("proxy.config.url_remap.strategies.filename", "strategies.yaml");
89   Debug("url_rewrite_regex", "strategyFactory file: %s", sf.c_str());
90   strategyFactory = new NextHopStrategyFactory(sf.c_str());
91 
92   if (0 == this->BuildTable(config_file_path)) {
93     _valid = true;
94     if (is_debug_tag_set("url_rewrite")) {
95       Print();
96     }
97   } else {
98     Warning("something failed during BuildTable() -- check your remap plugins!");
99   }
100   return _valid;
101 }
102 
~UrlRewrite()103 UrlRewrite::~UrlRewrite()
104 {
105   ats_free(this->ts_name);
106   ats_free(this->http_default_redirect_url);
107 
108   DestroyStore(forward_mappings);
109   DestroyStore(reverse_mappings);
110   DestroyStore(permanent_redirects);
111   DestroyStore(temporary_redirects);
112   DestroyStore(forward_mappings_with_recv_port);
113   _valid = false;
114 
115   /* Deactivate the factory when all SM are gone for sure. */
116   pluginFactory.deactivate();
117   delete strategyFactory;
118 }
119 
120 /** Sets the reverse proxy flag. */
121 void
SetReverseFlag(int flag)122 UrlRewrite::SetReverseFlag(int flag)
123 {
124   reverse_proxy = flag;
125   if (is_debug_tag_set("url_rewrite")) {
126     Print();
127   }
128 }
129 
130 /** Deallocated a hash table and all the url_mappings in it. */
131 void
_destroyTable(std::unique_ptr<URLTable> & h_table)132 UrlRewrite::_destroyTable(std::unique_ptr<URLTable> &h_table)
133 {
134   if (h_table) {
135     for (auto &it : *h_table) {
136       delete it.second;
137     }
138   }
139 }
140 
141 /** Debugging Method. */
142 void
Print() const143 UrlRewrite::Print() const
144 {
145   printf("URL Rewrite table with %d entries\n", num_rules_forward + num_rules_reverse + num_rules_redirect_temporary +
146                                                   num_rules_redirect_permanent + num_rules_forward_with_recv_port);
147   printf("  Reverse Proxy is %s\n", (reverse_proxy == 0) ? "Off" : "On");
148 
149   printf("  Forward Mapping Table with %d entries\n", num_rules_forward);
150   PrintStore(forward_mappings);
151 
152   printf("  Reverse Mapping Table with %d entries\n", num_rules_reverse);
153   PrintStore(reverse_mappings);
154 
155   printf("  Permanent Redirect Mapping Table with %d entries\n", num_rules_redirect_permanent);
156   PrintStore(permanent_redirects);
157 
158   printf("  Temporary Redirect Mapping Table with %d entries\n", num_rules_redirect_temporary);
159   PrintStore(temporary_redirects);
160 
161   printf("  Forward Mapping With Recv Port Table with %d entries\n", num_rules_forward_with_recv_port);
162   PrintStore(forward_mappings_with_recv_port);
163 
164   if (http_default_redirect_url != nullptr) {
165     printf("  Referer filter default redirect URL: \"%s\"\n", http_default_redirect_url);
166   }
167 }
168 
169 /** Debugging method. */
170 void
PrintStore(const MappingsStore & store) const171 UrlRewrite::PrintStore(const MappingsStore &store) const
172 {
173   if (store.hash_lookup) {
174     for (auto &it : *store.hash_lookup) {
175       it.second->Print();
176     }
177   }
178 
179   if (!store.regex_list.empty()) {
180     printf("    Regex mappings:\n");
181     forl_LL(RegexMapping, list_iter, store.regex_list) { list_iter->url_map->Print(); }
182   }
183 }
184 
185 /**
186   If a remapping is found, returns a pointer to it otherwise NULL is
187   returned.
188 
189 */
190 url_mapping *
_tableLookup(std::unique_ptr<URLTable> & h_table,URL * request_url,int request_port,char * request_host,int request_host_len)191 UrlRewrite::_tableLookup(std::unique_ptr<URLTable> &h_table, URL *request_url, int request_port, char *request_host,
192                          int request_host_len)
193 {
194   if (!h_table) {
195     h_table.reset(new URLTable);
196   }
197   UrlMappingPathIndex *ht_entry = nullptr;
198   url_mapping *um               = nullptr;
199   int ht_result                 = 0;
200 
201   if (auto it = h_table->find(request_host); it != h_table->end()) {
202     ht_result = 1;
203     ht_entry  = it->second;
204   }
205 
206   if (likely(ht_result && ht_entry)) {
207     // for empty host don't do a normal search, get a mapping arbitrarily
208     um = ht_entry->Search(request_url, request_port, request_host_len ? true : false);
209   }
210   return um;
211 }
212 
213 // This is only used for redirects and reverse rules, and the homepageredirect flag
214 // can never be set. The end result is that request_url is modified per remap container.
215 void
url_rewrite_remap_request(const UrlMappingContainer & mapping_container,URL * request_url,int method)216 url_rewrite_remap_request(const UrlMappingContainer &mapping_container, URL *request_url, int method)
217 {
218   URL *map_to   = mapping_container.getToURL();
219   URL *map_from = mapping_container.getFromURL();
220   const char *toHost;
221   int toHostLen;
222 
223   toHost = map_to->host_get(&toHostLen);
224 
225   Debug("url_rewrite", "%s: Remapping rule id: %d matched", __func__, mapping_container.getMapping()->map_id);
226 
227   request_url->host_set(toHost, toHostLen);
228   request_url->port_set(map_to->port_get_raw());
229 
230   // With the CONNECT method, we have to avoid messing with the scheme and path, because it's not part of
231   // the CONNECT request (only host and port is).
232   if (HTTP_WKSIDX_CONNECT != method) {
233     const char *toScheme;
234     int toSchemeLen;
235     const char *requestPath;
236     int requestPathLen = 0;
237     int fromPathLen    = 0;
238     const char *toPath;
239     int toPathLen;
240 
241     toScheme = map_to->scheme_get(&toSchemeLen);
242     request_url->scheme_set(toScheme, toSchemeLen);
243 
244     map_from->path_get(&fromPathLen);
245     toPath      = map_to->path_get(&toPathLen);
246     requestPath = request_url->path_get(&requestPathLen);
247 
248     // Should be +3, little extra padding won't hurt.
249     char newPath[(requestPathLen - fromPathLen) + toPathLen + 8];
250     int newPathLen = 0;
251 
252     *newPath = 0;
253     if (toPath) {
254       memcpy(newPath, toPath, toPathLen);
255       newPathLen += toPathLen;
256     }
257 
258     // We might need to insert a trailing slash in the new portion of the path
259     // if more will be added and none is present and one will be needed.
260     if (!fromPathLen && requestPathLen && newPathLen && toPathLen && *(newPath + newPathLen - 1) != '/') {
261       *(newPath + newPathLen) = '/';
262       newPathLen++;
263     }
264 
265     if (requestPath) {
266       // avoid adding another trailing slash if the requestPath already had one and so does the toPath
267       if (requestPathLen < fromPathLen) {
268         if (toPath && requestPath[requestPathLen - 1] == '/' && toPath[toPathLen - 1] == '/') {
269           fromPathLen++;
270         }
271       } else {
272         if (toPath && requestPath[fromPathLen] == '/' && toPath[toPathLen - 1] == '/') {
273           fromPathLen++;
274         }
275       }
276 
277       // copy the end of the path past what has been mapped
278       if ((requestPathLen - fromPathLen) > 0) {
279         memcpy(newPath + newPathLen, requestPath + fromPathLen, requestPathLen - fromPathLen);
280         newPathLen += (requestPathLen - fromPathLen);
281       }
282     }
283 
284     // Skip any leading / in the path when setting the new URL path
285     if (*newPath == '/') {
286       request_url->path_set(newPath + 1, newPathLen - 1);
287     } else {
288       request_url->path_set(newPath, newPathLen);
289     }
290   }
291 }
292 
293 /** Used to do the backwards lookups. */
294 #define N_URL_HEADERS 4
295 bool
ReverseMap(HTTPHdr * response_header)296 UrlRewrite::ReverseMap(HTTPHdr *response_header)
297 {
298   const char *location_hdr;
299   URL location_url;
300   int loc_length;
301   bool remap_found = false;
302   const char *host;
303   int host_len;
304   char *new_loc_hdr;
305   int new_loc_length;
306   int i;
307   const struct {
308     const char *const field;
309     const int len;
310   } url_headers[N_URL_HEADERS] = {{MIME_FIELD_LOCATION, MIME_LEN_LOCATION},
311                                   {MIME_FIELD_CONTENT_LOCATION, MIME_LEN_CONTENT_LOCATION},
312                                   {"URI", 3},
313                                   {"Destination", 11}};
314 
315   if (unlikely(num_rules_reverse == 0)) {
316     ink_assert(reverse_mappings.empty());
317     return false;
318   }
319 
320   for (i = 0; i < N_URL_HEADERS; ++i) {
321     location_hdr = response_header->value_get(url_headers[i].field, url_headers[i].len, &loc_length);
322 
323     if (location_hdr == nullptr) {
324       continue;
325     }
326 
327     location_url.create(nullptr);
328     location_url.parse(location_hdr, loc_length);
329 
330     host = location_url.host_get(&host_len);
331 
332     UrlMappingContainer reverse_mapping(response_header->m_heap);
333 
334     if (reverseMappingLookup(&location_url, location_url.port_get(), host, host_len, reverse_mapping)) {
335       if (i == 0) {
336         remap_found = true;
337       }
338       url_rewrite_remap_request(reverse_mapping, &location_url);
339       new_loc_hdr = location_url.string_get_ref(&new_loc_length);
340       response_header->value_set(url_headers[i].field, url_headers[i].len, new_loc_hdr, new_loc_length);
341     }
342 
343     location_url.destroy();
344   }
345   return remap_found;
346 }
347 
348 /** Perform fast ACL filtering. */
349 void
PerformACLFiltering(HttpTransact::State * s,url_mapping * map)350 UrlRewrite::PerformACLFiltering(HttpTransact::State *s, url_mapping *map)
351 {
352   if (unlikely(!s || s->acl_filtering_performed || !s->client_connection_enabled)) {
353     return;
354   }
355 
356   s->acl_filtering_performed = true; // small protection against reverse mapping
357 
358   if (map->filter) {
359     int method               = s->hdr_info.client_request.method_get_wksidx();
360     int method_wksidx        = (method != -1) ? (method - HTTP_WKSIDX_CONNECT) : -1;
361     bool client_enabled_flag = true;
362 
363     ink_release_assert(ats_is_ip(&s->client_info.src_addr));
364 
365     for (acl_filter_rule *rp = map->filter; rp && client_enabled_flag; rp = rp->next) {
366       bool match = true;
367 
368       if (rp->method_restriction_enabled) {
369         if (method_wksidx >= 0 && method_wksidx < HTTP_WKSIDX_METHODS_CNT) {
370           match = rp->standard_method_lookup[method_wksidx];
371         } else if (!rp->nonstandard_methods.empty()) {
372           match = false;
373         } else {
374           int method_str_len;
375           const char *method_str = s->hdr_info.client_request.method_get(&method_str_len);
376           match                  = rp->nonstandard_methods.count(std::string(method_str, method_str_len));
377         }
378       }
379 
380       if (match && rp->src_ip_valid) {
381         match = false;
382         for (int j = 0; j < rp->src_ip_cnt && !match; j++) {
383           bool in_range = rp->src_ip_array[j].contains(s->client_info.src_addr);
384           if (rp->src_ip_array[j].invert) {
385             if (!in_range) {
386               match = true;
387             }
388           } else {
389             if (in_range) {
390               match = true;
391             }
392           }
393         }
394       }
395 
396       if (match && rp->in_ip_valid) {
397         Debug("url_rewrite", "match was true and we have specified a in_ip field");
398         match = false;
399         for (int j = 0; j < rp->in_ip_cnt && !match; j++) {
400           IpEndpoint incoming_addr;
401           incoming_addr.assign(s->state_machine->ua_txn->get_netvc()->get_local_addr());
402           if (is_debug_tag_set("url_rewrite")) {
403             char buf1[128], buf2[128], buf3[128];
404             ats_ip_ntop(incoming_addr, buf1, sizeof(buf1));
405             rp->in_ip_array[j].start.toString(buf2, sizeof(buf2));
406             rp->in_ip_array[j].end.toString(buf3, sizeof(buf3));
407             Debug("url_rewrite", "Trying to match incoming address %s in range %s - %s.", buf1, buf2, buf3);
408           }
409           bool in_range = rp->in_ip_array[j].contains(incoming_addr);
410           if (rp->in_ip_array[j].invert) {
411             if (!in_range) {
412               match = true;
413             }
414           } else {
415             if (in_range) {
416               match = true;
417             }
418           }
419         }
420       }
421 
422       if (rp->internal) {
423         match = s->state_machine->ua_txn->get_netvc()->get_is_internal_request();
424         Debug("url_rewrite", "%s an internal request", match ? "matched" : "didn't match");
425       }
426 
427       if (match && client_enabled_flag) { // make sure that a previous filter did not DENY
428         Debug("url_rewrite", "matched ACL filter rule, %s request", rp->allow_flag ? "allowing" : "denying");
429         client_enabled_flag = rp->allow_flag ? true : false;
430       } else {
431         if (!client_enabled_flag) {
432           Debug("url_rewrite", "Previous ACL filter rule denied request, continuing to deny it");
433         } else {
434           Debug("url_rewrite", "did NOT match ACL filter rule, %s request", rp->allow_flag ? "denying" : "allowing");
435           client_enabled_flag = rp->allow_flag ? false : true;
436         }
437       }
438 
439     } /* end of for(rp = map->filter;rp;rp = rp->next) */
440 
441     s->client_connection_enabled = client_enabled_flag;
442   }
443 }
444 
445 /**
446    Determines if a redirect is to occur and if so, figures out what the
447    redirect is. This was plaguiarized from UrlRewrite::Remap. redirect_url
448    ought to point to the new, mapped URL when the function exits.
449 */
450 mapping_type
Remap_redirect(HTTPHdr * request_header,URL * redirect_url)451 UrlRewrite::Remap_redirect(HTTPHdr *request_header, URL *redirect_url)
452 {
453   URL *request_url;
454   mapping_type mappingType;
455   const char *host = nullptr;
456   int host_len = 0, request_port = 0;
457   bool prt, trt; // existence of permanent and temporary redirect tables, respectively
458 
459   prt = (num_rules_redirect_permanent != 0);
460   trt = (num_rules_redirect_temporary != 0);
461 
462   if (prt + trt == 0) {
463     return NONE;
464   }
465 
466   // Since are called before request validity checking
467   //  occurs, make sure that we have both a valid request
468   //  header and a valid URL
469   //
470   if (request_header == nullptr) {
471     Debug("url_rewrite", "request_header was invalid.  UrlRewrite::Remap_redirect bailing out.");
472     return NONE;
473   }
474   request_url = request_header->url_get();
475   if (!request_url->valid()) {
476     Debug("url_rewrite", "request_url was invalid.  UrlRewrite::Remap_redirect bailing out.");
477     return NONE;
478   }
479 
480   host         = request_url->host_get(&host_len);
481   request_port = request_url->port_get();
482 
483   if (host_len == 0 && reverse_proxy != 0) { // Server request.  Use the host header to figure out where
484                                              // it goes.  Host header parsing is same as in ::Remap
485     int host_hdr_len;
486     const char *host_hdr = request_header->value_get(MIME_FIELD_HOST, MIME_LEN_HOST, &host_hdr_len);
487 
488     if (!host_hdr) {
489       host_hdr     = "";
490       host_hdr_len = 0;
491     }
492 
493     const char *tmp = static_cast<const char *>(memchr(host_hdr, ':', host_hdr_len));
494 
495     if (tmp == nullptr) {
496       host_len = host_hdr_len;
497     } else {
498       host_len     = tmp - host_hdr;
499       request_port = ink_atoi(tmp + 1, host_hdr_len - host_len);
500 
501       // If atoi fails, try the default for the
502       //   protocol
503       if (request_port == 0) {
504         request_port = request_url->port_get();
505       }
506     }
507 
508     host = host_hdr;
509   }
510   // Temporary Redirects have precedence over Permanent Redirects
511   // the rationale behind this is that network administrators might
512   // want quick redirects and not want to worry about all the existing
513   // permanent rules
514   mappingType = NONE;
515 
516   UrlMappingContainer redirect_mapping(request_header->m_heap);
517 
518   if (trt) {
519     if (temporaryRedirectLookup(request_url, request_port, host, host_len, redirect_mapping)) {
520       mappingType = TEMPORARY_REDIRECT;
521     }
522   }
523   if ((mappingType == NONE) && prt) {
524     if (permanentRedirectLookup(request_url, request_port, host, host_len, redirect_mapping)) {
525       mappingType = PERMANENT_REDIRECT;
526     }
527   }
528 
529   if (mappingType != NONE) {
530     ink_assert((mappingType == PERMANENT_REDIRECT) || (mappingType == TEMPORARY_REDIRECT));
531 
532     // Make a copy of the request url so that we can munge it
533     //   for the redirect
534     redirect_url->create(nullptr);
535     redirect_url->copy(request_url);
536 
537     // Perform the actual URL rewrite
538     url_rewrite_remap_request(redirect_mapping, redirect_url);
539 
540     return mappingType;
541   }
542   ink_assert(mappingType == NONE);
543 
544   return NONE;
545 }
546 
547 bool
_addToStore(MappingsStore & store,url_mapping * new_mapping,RegexMapping * reg_map,const char * src_host,bool is_cur_mapping_regex,int & count)548 UrlRewrite::_addToStore(MappingsStore &store, url_mapping *new_mapping, RegexMapping *reg_map, const char *src_host,
549                         bool is_cur_mapping_regex, int &count)
550 {
551   bool retval;
552 
553   new_mapping->setRank(count); // Use the mapping rules number count for rank
554   if (is_cur_mapping_regex) {
555     store.regex_list.enqueue(reg_map);
556     retval = true;
557   } else {
558     retval = TableInsert(store.hash_lookup, new_mapping, src_host);
559   }
560   if (retval) {
561     ++count;
562   }
563   return retval;
564 }
565 
566 bool
InsertMapping(mapping_type maptype,url_mapping * new_mapping,RegexMapping * reg_map,const char * src_host,bool is_cur_mapping_regex)567 UrlRewrite::InsertMapping(mapping_type maptype, url_mapping *new_mapping, RegexMapping *reg_map, const char *src_host,
568                           bool is_cur_mapping_regex)
569 {
570   bool success = false;
571 
572   // Now add the mapping to appropriate container
573   switch (maptype) {
574   case FORWARD_MAP:
575   case FORWARD_MAP_REFERER:
576     success = _addToStore(forward_mappings, new_mapping, reg_map, src_host, is_cur_mapping_regex, num_rules_forward);
577     if (success) {
578       // @todo: is this applicable to regex mapping too?
579       SetHomePageRedirectFlag(new_mapping, new_mapping->toURL);
580     }
581     break;
582   case REVERSE_MAP:
583     success = _addToStore(reverse_mappings, new_mapping, reg_map, src_host, is_cur_mapping_regex, num_rules_reverse);
584     new_mapping->homePageRedirect = false;
585     break;
586   case PERMANENT_REDIRECT:
587     success = _addToStore(permanent_redirects, new_mapping, reg_map, src_host, is_cur_mapping_regex, num_rules_redirect_permanent);
588     break;
589   case TEMPORARY_REDIRECT:
590     success = _addToStore(temporary_redirects, new_mapping, reg_map, src_host, is_cur_mapping_regex, num_rules_redirect_temporary);
591     break;
592   case FORWARD_MAP_WITH_RECV_PORT:
593     success = _addToStore(forward_mappings_with_recv_port, new_mapping, reg_map, src_host, is_cur_mapping_regex,
594                           num_rules_forward_with_recv_port);
595     break;
596   default:
597     // 'default' required to avoid compiler warning; unsupported map
598     // type would have been dealt with much before this
599     return false;
600   }
601 
602   return success;
603 }
604 
605 bool
InsertForwardMapping(mapping_type maptype,url_mapping * mapping,const char * src_host)606 UrlRewrite::InsertForwardMapping(mapping_type maptype, url_mapping *mapping, const char *src_host)
607 {
608   bool success;
609 
610   if (maptype == FORWARD_MAP_WITH_RECV_PORT) {
611     success = TableInsert(forward_mappings_with_recv_port.hash_lookup, mapping, src_host);
612   } else {
613     success = TableInsert(forward_mappings.hash_lookup, mapping, src_host);
614   }
615 
616   if (success) {
617     switch (maptype) {
618     case FORWARD_MAP:
619     case FORWARD_MAP_REFERER:
620     case FORWARD_MAP_WITH_RECV_PORT:
621       SetHomePageRedirectFlag(mapping, mapping->toURL);
622       break;
623     default:
624       break;
625     }
626 
627     (maptype != FORWARD_MAP_WITH_RECV_PORT) ? ++num_rules_forward : ++num_rules_forward_with_recv_port;
628   }
629 
630   return success;
631 }
632 
633 /**
634   Reads the configuration file and creates a new hash table.
635 
636   @return zero on success and non-zero on failure.
637 
638 */
639 int
BuildTable(const char * path)640 UrlRewrite::BuildTable(const char *path)
641 {
642   ink_assert(forward_mappings.empty());
643   ink_assert(reverse_mappings.empty());
644   ink_assert(permanent_redirects.empty());
645   ink_assert(temporary_redirects.empty());
646   ink_assert(forward_mappings_with_recv_port.empty());
647   ink_assert(num_rules_forward == 0);
648   ink_assert(num_rules_reverse == 0);
649   ink_assert(num_rules_redirect_permanent == 0);
650   ink_assert(num_rules_redirect_temporary == 0);
651   ink_assert(num_rules_forward_with_recv_port == 0);
652 
653   forward_mappings.hash_lookup.reset(new URLTable);
654   reverse_mappings.hash_lookup.reset(new URLTable);
655   permanent_redirects.hash_lookup.reset(new URLTable);
656   temporary_redirects.hash_lookup.reset(new URLTable);
657   forward_mappings_with_recv_port.hash_lookup.reset(new URLTable);
658 
659   if (!remap_parse_config(path, this)) {
660     // XXX handle file reload error
661     return 3;
662   }
663 
664   // Destroy unused tables
665   if (num_rules_forward == 0) {
666     forward_mappings.hash_lookup.reset(nullptr);
667   } else {
668     if (forward_mappings.hash_lookup->find("") != forward_mappings.hash_lookup->end()) {
669       nohost_rules = 1;
670     }
671   }
672 
673   if (num_rules_reverse == 0) {
674     reverse_mappings.hash_lookup.reset(nullptr);
675   }
676 
677   if (num_rules_redirect_permanent == 0) {
678     permanent_redirects.hash_lookup.reset(nullptr);
679   }
680 
681   if (num_rules_redirect_temporary == 0) {
682     temporary_redirects.hash_lookup.reset(nullptr);
683   }
684 
685   if (num_rules_forward_with_recv_port == 0) {
686     forward_mappings_with_recv_port.hash_lookup.reset(nullptr);
687   }
688 
689   return 0;
690 }
691 
692 /**
693   Inserts arg mapping in h_table with key src_host chaining the mapping
694   of existing entries bound to src_host if necessary.
695 
696 */
697 bool
TableInsert(std::unique_ptr<URLTable> & h_table,url_mapping * mapping,const char * src_host)698 UrlRewrite::TableInsert(std::unique_ptr<URLTable> &h_table, url_mapping *mapping, const char *src_host)
699 {
700   if (!h_table) {
701     h_table.reset(new URLTable);
702   }
703   char src_host_tmp_buf[1];
704   UrlMappingPathIndex *ht_contents;
705 
706   if (!src_host) {
707     src_host            = &src_host_tmp_buf[0];
708     src_host_tmp_buf[0] = 0;
709   }
710   // Insert the new_mapping into hash table
711   if (auto it = h_table->find(src_host); it != h_table->end()) {
712     ht_contents = it->second;
713     // There is already a path index for this host
714     if (it->second == nullptr) {
715       // why should this happen?
716       Warning("Found entry cannot be null!");
717       return false;
718     }
719   } else {
720     ht_contents = new UrlMappingPathIndex();
721     h_table->emplace(src_host, ht_contents);
722   }
723   if (!ht_contents->Insert(mapping)) {
724     Warning("Could not insert new mapping");
725     return false;
726   }
727   return true;
728 }
729 
730 /**  First looks up the hash table for "simple" mappings and then the
731      regex mappings.  Only higher-ranked regex mappings are examined if
732      a hash mapping is found; or else all regex mappings are examined
733 
734      Returns highest-ranked mapping on success, NULL on failure
735 */
736 bool
_mappingLookup(MappingsStore & mappings,URL * request_url,int request_port,const char * request_host,int request_host_len,UrlMappingContainer & mapping_container)737 UrlRewrite::_mappingLookup(MappingsStore &mappings, URL *request_url, int request_port, const char *request_host,
738                            int request_host_len, UrlMappingContainer &mapping_container)
739 {
740   char request_host_lower[TS_MAX_HOST_NAME_LEN];
741 
742   if (!request_host || !request_url || (request_host_len < 0) || (request_host_len >= TS_MAX_HOST_NAME_LEN)) {
743     Debug("url_rewrite", "Invalid arguments!");
744     return false;
745   }
746 
747   // lowercase
748   for (int i = 0; i < request_host_len; ++i) {
749     request_host_lower[i] = tolower(request_host[i]);
750   }
751   request_host_lower[request_host_len] = 0;
752 
753   bool retval          = false;
754   int rank_ceiling     = -1;
755   url_mapping *mapping = _tableLookup(mappings.hash_lookup, request_url, request_port, request_host_lower, request_host_len);
756   if (mapping != nullptr) {
757     rank_ceiling = mapping->getRank();
758     Debug("url_rewrite", "Found 'simple' mapping with rank %d", rank_ceiling);
759     mapping_container.set(mapping);
760     retval = true;
761   }
762   if (_regexMappingLookup(mappings.regex_list, request_url, request_port, request_host_lower, request_host_len, rank_ceiling,
763                           mapping_container)) {
764     Debug("url_rewrite", "Using regex mapping with rank %d", (mapping_container.getMapping())->getRank());
765     retval = true;
766   }
767   return retval;
768 }
769 
770 // does not null terminate return string
771 int
_expandSubstitutions(int * matches_info,const RegexMapping * reg_map,const char * matched_string,char * dest_buf,int dest_buf_size)772 UrlRewrite::_expandSubstitutions(int *matches_info, const RegexMapping *reg_map, const char *matched_string, char *dest_buf,
773                                  int dest_buf_size)
774 {
775   int cur_buf_size = 0;
776   int token_start  = 0;
777   int n_bytes_needed;
778   int match_index;
779   for (int i = 0; i < reg_map->n_substitutions; ++i) {
780     // first copy preceding bytes
781     n_bytes_needed = reg_map->substitution_markers[i] - token_start;
782     if ((cur_buf_size + n_bytes_needed) > dest_buf_size) {
783       goto lOverFlow;
784     }
785     memcpy(dest_buf + cur_buf_size, reg_map->to_url_host_template + token_start, n_bytes_needed);
786     cur_buf_size += n_bytes_needed;
787 
788     // then copy the sub pattern match
789     match_index    = reg_map->substitution_ids[i] * 2;
790     n_bytes_needed = matches_info[match_index + 1] - matches_info[match_index];
791     if ((cur_buf_size + n_bytes_needed) > dest_buf_size) {
792       goto lOverFlow;
793     }
794     memcpy(dest_buf + cur_buf_size, matched_string + matches_info[match_index], n_bytes_needed);
795     cur_buf_size += n_bytes_needed;
796 
797     token_start = reg_map->substitution_markers[i] + 2; // skip the place holder
798   }
799 
800   // copy last few bytes (if any)
801   if (token_start < reg_map->to_url_host_template_len) {
802     n_bytes_needed = reg_map->to_url_host_template_len - token_start;
803     if ((cur_buf_size + n_bytes_needed) > dest_buf_size) {
804       goto lOverFlow;
805     }
806     memcpy(dest_buf + cur_buf_size, reg_map->to_url_host_template + token_start, n_bytes_needed);
807     cur_buf_size += n_bytes_needed;
808   }
809   Debug("url_rewrite_regex", "Expanded substitutions and returning string [%.*s] with length %d", cur_buf_size, dest_buf,
810         cur_buf_size);
811   return cur_buf_size;
812 
813 lOverFlow:
814   Warning("Overflow while expanding substitutions");
815   return 0;
816 }
817 
818 bool
_regexMappingLookup(RegexMappingList & regex_mappings,URL * request_url,int request_port,const char * request_host,int request_host_len,int rank_ceiling,UrlMappingContainer & mapping_container)819 UrlRewrite::_regexMappingLookup(RegexMappingList &regex_mappings, URL *request_url, int request_port, const char *request_host,
820                                 int request_host_len, int rank_ceiling, UrlMappingContainer &mapping_container)
821 {
822   bool retval = false;
823 
824   if (rank_ceiling == -1) { // we will now look at all regex mappings
825     rank_ceiling = INT_MAX;
826     Debug("url_rewrite_regex", "Going to match all regexes");
827   } else {
828     Debug("url_rewrite_regex", "Going to match regexes with rank <= %d", rank_ceiling);
829   }
830 
831   int request_scheme_len, reg_map_scheme_len;
832   const char *request_scheme = request_url->scheme_get(&request_scheme_len), *reg_map_scheme;
833 
834   int request_path_len, reg_map_path_len;
835   const char *request_path = request_url->path_get(&request_path_len), *reg_map_path;
836 
837   // If the scheme is empty (e.g. because of a CONNECT method), guess it based on port
838   // This is equivalent to the logic in UrlMappingPathIndex::_GetTrie().
839   if (request_scheme_len == 0) {
840     request_scheme     = request_port == 80 ? URL_SCHEME_HTTP : URL_SCHEME_HTTPS;
841     request_scheme_len = hdrtoken_wks_to_length(request_scheme);
842   }
843 
844   // Loop over the entire linked list, or until we're satisfied
845   forl_LL(RegexMapping, list_iter, regex_mappings)
846   {
847     int reg_map_rank = list_iter->url_map->getRank();
848 
849     if (reg_map_rank > rank_ceiling) {
850       break;
851     }
852 
853     reg_map_scheme = list_iter->url_map->fromURL.scheme_get(&reg_map_scheme_len);
854     if ((request_scheme_len != reg_map_scheme_len) || strncmp(request_scheme, reg_map_scheme, request_scheme_len)) {
855       Debug("url_rewrite_regex", "Skipping regex with rank %d as scheme does not match request scheme", reg_map_rank);
856       continue;
857     }
858 
859     if (list_iter->url_map->fromURL.port_get() != request_port) {
860       Debug("url_rewrite_regex",
861             "Skipping regex with rank %d as regex map port does not match request port. "
862             "regex map port: %d, request port %d",
863             reg_map_rank, list_iter->url_map->fromURL.port_get(), request_port);
864       continue;
865     }
866 
867     reg_map_path = list_iter->url_map->fromURL.path_get(&reg_map_path_len);
868     if ((request_path_len < reg_map_path_len) ||
869         strncmp(reg_map_path, request_path, reg_map_path_len)) { // use the shorter path length here
870       Debug("url_rewrite_regex", "Skipping regex with rank %d as path does not cover request path", reg_map_rank);
871       continue;
872     }
873 
874     int matches_info[MAX_REGEX_SUBS * 3];
875     bool match_result =
876       list_iter->regular_expression.exec(std::string_view(request_host, request_host_len), matches_info, countof(matches_info));
877 
878     if (match_result == true) {
879       Debug("url_rewrite_regex",
880             "Request URL host [%.*s] matched regex in mapping of rank %d "
881             "with %d possible substitutions",
882             request_host_len, request_host, reg_map_rank, match_result);
883 
884       mapping_container.set(list_iter->url_map);
885 
886       char buf[4096];
887       int buf_len;
888 
889       // Expand substitutions in the host field from the stored template
890       buf_len           = _expandSubstitutions(matches_info, list_iter, request_host, buf, sizeof(buf));
891       URL *expanded_url = mapping_container.createNewToURL();
892       expanded_url->copy(&((list_iter->url_map)->toURL));
893       expanded_url->host_set(buf, buf_len);
894 
895       Debug("url_rewrite_regex", "Expanded toURL to [%.*s]", expanded_url->length_get(), expanded_url->string_get_ref());
896       retval = true;
897       break;
898     } else {
899       Debug("url_rewrite_regex", "Request URL host [%.*s] did NOT match regex in mapping of rank %d", request_host_len,
900             request_host, reg_map_rank);
901     }
902   }
903 
904   return retval;
905 }
906 
907 void
_destroyList(RegexMappingList & mappings)908 UrlRewrite::_destroyList(RegexMappingList &mappings)
909 {
910   RegexMapping *list_iter;
911   while ((list_iter = mappings.pop()) != nullptr) {
912     delete list_iter->url_map;
913     if (list_iter->to_url_host_template) {
914       ats_free(list_iter->to_url_host_template);
915     }
916     delete list_iter;
917   }
918   mappings.clear();
919 }
920