1 /** @file 2 3 A brief file description 4 5 @section license License 6 7 Licensed to the Apache Software Foundation (ASF) under one 8 or more contributor license agreements. See the NOTICE file 9 distributed with this work for additional information 10 regarding copyright ownership. The ASF licenses this file 11 to you under the Apache License, Version 2.0 (the 12 "License"); you may not use this file except in compliance 13 with the License. You may obtain a copy of the License at 14 15 http://www.apache.org/licenses/LICENSE-2.0 16 17 Unless required by applicable law or agreed to in writing, software 18 distributed under the License is distributed on an "AS IS" BASIS, 19 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 20 See the License for the specific language governing permissions and 21 limitations under the License. 22 */ 23 24 /***************************************************************************** 25 * 26 * ControlMatcher.h - Interface to general purpose matcher 27 * 28 * 29 * 30 * 31 * Description: 32 * 33 * The control matcher module provides the ability to lookup arbitrary 34 * information specific to a URL and IP address. The outside 35 * world only sees the ControlMatcher class which parses the relevant 36 * configuration file and builds the lookup table 37 * 38 * Four types of matched are supported: hostname, domain name, ip address 39 * and URL regex. For these four types, three lookup tables are used. Regex and 40 * ip lookups have there own tables and host and domain lookups share a single 41 * table 42 * 43 * Module Purpose & Specifications 44 * ------------------------------- 45 * - to provide a generic mechanism for matching configuration data 46 * against hostname, domain name, ip address and URL regex 47 * - the generic mechanism should require minimum effort to apply it 48 * to new features that require per request matching 49 * - for the mechanism to be efficient such that lookups against 50 * the tables are not a performance problem when they are both done 51 * for every request through the proxy and set of matching 52 * is very large 53 * 54 * Lookup Table Descriptions 55 * ------------------------- 56 * 57 * regex table - implemented as a linear list of regular expressions to 58 * match against 59 * 60 * host/domain table - The host domain table is logically implemented as 61 * tree, broken up at each partition in a hostname. Three mechanism 62 * are used to move from one level to the next: a hash table, a fixed 63 * sized array and a constant time index (class charIndex). The constant 64 * time index is only used to from the root domain to the first 65 * level partition (ie: .com). The fixed array is used for subsequent 66 * paritions until the fan out exceeds the arrays fixed size at which 67 * time, the fixed array is converted to a hash table 68 * 69 * ip table - supports ip ranges. A single ip address is treated as 70 * a range with the same beginning and end address. The table is 71 * is divided up into a fixed number of levels, indexed 8 bit 72 * boundaries, starting at the the high bit of the address. Subsequent 73 * levels are allocated only when needed. 74 * 75 ****************************************************************************/ 76 77 // 78 // IMPORTANT: Instantiating these templates 79 // 80 // The Implementation for these templates appears in 81 // ControlMatcher.cc To get the templates instantiated 82 // correctly on all compilers new uses MUST explicitly 83 // instantiate the new instance at the bottom of 84 // ControlMatcher.cc 85 // 86 87 #pragma once 88 89 #include "tscore/IpMap.h" 90 #include "tscore/Result.h" 91 #include "tscore/MatcherUtils.h" 92 93 #include "tscore/ink_apidefs.h" 94 #include "tscore/ink_defs.h" 95 #include "HTTP.h" 96 #include "tscore/Regex.h" 97 #include "URL.h" 98 99 #include <unordered_map> 100 101 #ifdef HAVE_CTYPE_H 102 #include <cctype> 103 #endif 104 105 #define SignalError(_buf, _already) \ 106 { \ 107 if (_already == false) \ 108 pmgmt->signalManager(MGMT_SIGNAL_CONFIG_ERROR, _buf); \ 109 _already = true; \ 110 Error("%s", _buf); \ 111 } 112 113 class HostLookup; 114 struct HttpApiInfo; 115 struct matcher_line; 116 struct matcher_tags; 117 118 struct RequestData { 119 public: 120 // First three are the lookup keys to the tables 121 // get_ip() can be either client_ip or server_ip 122 // depending on how the module user wants to key 123 // the table ~RequestDataRequestData124 virtual ~RequestData() {} 125 virtual char *get_string() = 0; 126 virtual const char *get_host() = 0; 127 virtual sockaddr const *get_ip() = 0; 128 129 virtual sockaddr const *get_client_ip() = 0; 130 }; 131 132 class HttpRequestData : public RequestData 133 { 134 public: 135 inkcoreapi char *get_string() override; 136 inkcoreapi const char *get_host() override; 137 inkcoreapi sockaddr const *get_ip() override; 138 inkcoreapi sockaddr const *get_client_ip() override; 139 HttpRequestData()140 HttpRequestData() 141 142 { 143 ink_zero(src_ip); 144 ink_zero(dest_ip); 145 } 146 147 HTTPHdr *hdr = nullptr; 148 char *hostname_str = nullptr; 149 HttpApiInfo *api_info = nullptr; 150 time_t xact_start = 0; 151 IpEndpoint src_ip; 152 IpEndpoint dest_ip; 153 uint16_t incoming_port = 0; 154 char *tag = nullptr; 155 bool internal_txn = false; 156 URL **cache_info_lookup_url = nullptr; 157 URL **cache_info_parent_selection_url = nullptr; 158 }; 159 160 // Mixin class for shared info across all templates. This just wraps the 161 // shared members such that we don't have to duplicate all these initialixers 162 // etc. If someone wants to rewrite all this code to use setters and getters, 163 // by all means, please do so. The plumbing is in place :). 164 template <class Data> class BaseMatcher 165 { 166 public: BaseMatcher(const char * name,const char * filename)167 BaseMatcher(const char *name, const char *filename) : matcher_name(name), file_name(filename) {} 168 ~BaseMatcher()169 ~BaseMatcher() { delete[] data_array; } 170 171 protected: 172 int num_el = -1; // number of elements in the table 173 const char *matcher_name = "unknown"; // Used for Debug/Warning/Error messages 174 const char *file_name = nullptr; // Used for Debug/Warning/Error messages 175 Data *data_array = nullptr; // Array with the Data elements 176 int array_len = -1; // length of the arrays (all three are the same length) 177 }; 178 179 template <class Data, class MatchResult> class UrlMatcher : protected BaseMatcher<Data> 180 { 181 typedef BaseMatcher<Data> super; 182 183 public: 184 UrlMatcher(const char *name, const char *filename); 185 ~UrlMatcher(); 186 187 void AllocateSpace(int num_entries); 188 Result NewEntry(matcher_line *line_info); 189 190 void Match(RequestData *rdata, MatchResult *result) const; 191 void Print() const; 192 193 using super::num_el; 194 using super::matcher_name; 195 using super::file_name; 196 using super::data_array; 197 using super::array_len; 198 199 private: 200 std::unordered_map<std::string, int> url_ht; 201 char **url_str = nullptr; // array of url strings 202 int *url_value = nullptr; // array of posion of url strings 203 }; 204 205 template <class Data, class MatchResult> class RegexMatcher : protected BaseMatcher<Data> 206 { 207 typedef BaseMatcher<Data> super; 208 209 public: 210 RegexMatcher(const char *name, const char *filename); 211 ~RegexMatcher(); 212 213 void AllocateSpace(int num_entries); 214 Result NewEntry(matcher_line *line_info); 215 216 void Match(RequestData *rdata, MatchResult *result) const; 217 void Print() const; 218 219 using super::num_el; 220 using super::matcher_name; 221 using super::file_name; 222 using super::data_array; 223 using super::array_len; 224 225 protected: 226 pcre **re_array = nullptr; // array of compiled regexs 227 char **re_str = nullptr; // array of uncompiled regex strings 228 }; 229 230 template <class Data, class MatchResult> class HostRegexMatcher : public RegexMatcher<Data, MatchResult> 231 { 232 typedef BaseMatcher<Data> super; 233 234 public: 235 HostRegexMatcher(const char *name, const char *filename); 236 void Match(RequestData *rdata, MatchResult *result) const; 237 238 using super::num_el; 239 using super::matcher_name; 240 using super::file_name; 241 using super::data_array; 242 using super::array_len; 243 }; 244 245 template <class Data, class MatchResult> class HostMatcher : protected BaseMatcher<Data> 246 { 247 typedef BaseMatcher<Data> super; 248 249 public: 250 HostMatcher(const char *name, const char *filename); 251 ~HostMatcher(); 252 253 void AllocateSpace(int num_entries); 254 Result NewEntry(matcher_line *line_info); 255 256 void Match(RequestData *rdata, MatchResult *result) const; 257 void Print() const; 258 259 using super::num_el; 260 using super::matcher_name; 261 using super::file_name; 262 using super::data_array; 263 using super::array_len; 264 265 HostLookup * getHLookup()266 getHLookup() 267 { 268 return host_lookup; 269 } 270 271 private: 272 static void PrintFunc(void *opaque_data); 273 HostLookup *host_lookup = nullptr; // Data structure to do the lookups 274 }; 275 276 template <class Data, class MatchResult> class IpMatcher : protected BaseMatcher<Data> 277 { 278 typedef BaseMatcher<Data> super; 279 280 public: 281 IpMatcher(const char *name, const char *filename); 282 283 void AllocateSpace(int num_entries); 284 Result NewEntry(matcher_line *line_info); 285 286 void Match(sockaddr const *ip_addr, RequestData *rdata, MatchResult *result) const; 287 void Print() const; 288 289 using super::num_el; 290 using super::matcher_name; 291 using super::file_name; 292 using super::data_array; 293 using super::array_len; 294 295 private: 296 static void PrintFunc(void *opaque_data); 297 IpMap ip_map; // Data structure to do lookups 298 }; 299 300 #define ALLOW_HOST_TABLE 1 << 0 301 #define ALLOW_IP_TABLE 1 << 1 302 #define ALLOW_REGEX_TABLE 1 << 2 303 #define ALLOW_HOST_REGEX_TABLE 1 << 3 304 #define ALLOW_URL_TABLE 1 << 4 305 #define DONT_BUILD_TABLE 1 << 5 // for testing 306 307 template <class Data, class MatchResult> class ControlMatcher 308 { 309 public: 310 // Parameter name must not be deallocated before this object is 311 ControlMatcher(const char *file_var, const char *name, const matcher_tags *tags, 312 int flags_in = (ALLOW_HOST_TABLE | ALLOW_IP_TABLE | ALLOW_REGEX_TABLE | ALLOW_HOST_REGEX_TABLE | ALLOW_URL_TABLE)); 313 ~ControlMatcher(); 314 315 int BuildTable(); 316 int BuildTableFromString(char *str); 317 318 void Match(RequestData *rdata, MatchResult *result) const; 319 void Print() const; 320 321 int getEntryCount()322 getEntryCount() const 323 { 324 return m_numEntries; 325 } 326 327 HostMatcher<Data, MatchResult> * getHostMatcher()328 getHostMatcher() 329 { 330 return hostMatch; 331 } 332 333 RegexMatcher<Data, MatchResult> * getReMatcher()334 getReMatcher() 335 { 336 return reMatch; 337 } 338 339 IpMatcher<Data, MatchResult> * getIPMatcher()340 getIPMatcher() 341 { 342 return ipMatch; 343 } 344 345 // private 346 RegexMatcher<Data, MatchResult> *reMatch; 347 UrlMatcher<Data, MatchResult> *urlMatch; 348 HostMatcher<Data, MatchResult> *hostMatch; 349 IpMatcher<Data, MatchResult> *ipMatch; 350 HostRegexMatcher<Data, MatchResult> *hrMatch; 351 352 const matcher_tags *config_tags = nullptr; 353 char config_file_path[PATH_NAME_MAX]; 354 int flags = 0; 355 int m_numEntries = 0; 356 const char *matcher_name = "unknown"; // Used for Debug/Warning/Error messages 357 }; 358