1 /** @file
2 
3   A brief file description
4 
5   @section license License
6 
7   Licensed to the Apache Software Foundation (ASF) under one
8   or more contributor license agreements.  See the NOTICE file
9   distributed with this work for additional information
10   regarding copyright ownership.  The ASF licenses this file
11   to you under the Apache License, Version 2.0 (the
12   "License"); you may not use this file except in compliance
13   with the License.  You may obtain a copy of the License at
14 
15       http://www.apache.org/licenses/LICENSE-2.0
16 
17   Unless required by applicable law or agreed to in writing, software
18   distributed under the License is distributed on an "AS IS" BASIS,
19   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
20   See the License for the specific language governing permissions and
21   limitations under the License.
22  */
23 
24 /*****************************************************************************
25  *
26  *  ControlMatcher.h - Interface to general purpose matcher
27  *
28  *
29  *
30  *
31  *  Description:
32  *
33  *     The control matcher module provides the ability to lookup arbitrary
34  *  information specific to a URL and IP address.  The outside
35  *  world only sees the ControlMatcher class which parses the relevant
36  *  configuration file and builds the lookup table
37  *
38  *     Four types of matched are supported: hostname, domain name, ip address
39  *  and URL regex.  For these four types, three lookup tables are used.  Regex and
40  *  ip lookups have there own tables and host and domain lookups share a single
41  *  table
42  *
43  *  Module Purpose & Specifications
44  *  -------------------------------
45  *   -  to provide a generic mechanism for matching configuration data
46  *       against hostname, domain name, ip address and URL regex
47  *   -  the generic mechanism should require minimum effort to apply it
48  *       to new features that require per request matching
49  *   -  for the mechanism to be efficient such that lookups against
50  *       the tables are not a performance problem when they are both done
51  *       for every request through the proxy and set of matching
52  *       is very large
53  *
54  *  Lookup Table Descriptions
55  *  -------------------------
56  *
57  *   regex table - implemented as a linear list of regular expressions to
58  *       match against
59  *
60  *   host/domain table - The host domain table is logically implemented as
61  *       tree, broken up at each partition in a hostname.  Three mechanism
62  *       are used to move from one level to the next: a hash table, a fixed
63  *       sized array and a constant time index (class charIndex).  The constant
64  *       time index is only used to from the root domain to the first
65  *       level partition (ie: .com). The fixed array is used for subsequent
66  *       paritions until the fan out exceeds the arrays fixed size at which
67  *       time, the fixed array is converted to a hash table
68  *
69  *   ip table - supports ip ranges.  A single ip address is treated as
70  *       a range with the same beginning and end address.  The table is
71  *       is divided up into a fixed number of  levels, indexed 8 bit
72  *       boundaries, starting at the the high bit of the address.  Subsequent
73  *       levels are allocated only when needed.
74  *
75  ****************************************************************************/
76 
77 //
78 // IMPORTANT: Instantiating these templates
79 //
80 //    The Implementation for these templates appears in
81 //     ControlMatcher.cc   To get the templates instantiated
82 //     correctly on all compilers new uses MUST explicitly
83 //     instantiate the new instance at the bottom of
84 //     ControlMatcher.cc
85 //
86 
87 #pragma once
88 
89 #include "tscore/IpMap.h"
90 #include "tscore/Result.h"
91 #include "tscore/MatcherUtils.h"
92 
93 #include "tscore/ink_apidefs.h"
94 #include "tscore/ink_defs.h"
95 #include "HTTP.h"
96 #include "tscore/Regex.h"
97 #include "URL.h"
98 
99 #include <unordered_map>
100 
101 #ifdef HAVE_CTYPE_H
102 #include <cctype>
103 #endif
104 
105 #define SignalError(_buf, _already)                         \
106   {                                                         \
107     if (_already == false)                                  \
108       pmgmt->signalManager(MGMT_SIGNAL_CONFIG_ERROR, _buf); \
109     _already = true;                                        \
110     Error("%s", _buf);                                      \
111   }
112 
113 class HostLookup;
114 struct HttpApiInfo;
115 struct matcher_line;
116 struct matcher_tags;
117 
118 struct RequestData {
119 public:
120   // First three are the lookup keys to the tables
121   //  get_ip() can be either client_ip or server_ip
122   //  depending on how the module user wants to key
123   //  the table
~RequestDataRequestData124   virtual ~RequestData() {}
125   virtual char *get_string()       = 0;
126   virtual const char *get_host()   = 0;
127   virtual sockaddr const *get_ip() = 0;
128 
129   virtual sockaddr const *get_client_ip() = 0;
130 };
131 
132 class HttpRequestData : public RequestData
133 {
134 public:
135   inkcoreapi char *get_string() override;
136   inkcoreapi const char *get_host() override;
137   inkcoreapi sockaddr const *get_ip() override;
138   inkcoreapi sockaddr const *get_client_ip() override;
139 
HttpRequestData()140   HttpRequestData()
141 
142   {
143     ink_zero(src_ip);
144     ink_zero(dest_ip);
145   }
146 
147   HTTPHdr *hdr          = nullptr;
148   char *hostname_str    = nullptr;
149   HttpApiInfo *api_info = nullptr;
150   time_t xact_start     = 0;
151   IpEndpoint src_ip;
152   IpEndpoint dest_ip;
153   uint16_t incoming_port                = 0;
154   char *tag                             = nullptr;
155   bool internal_txn                     = false;
156   URL **cache_info_lookup_url           = nullptr;
157   URL **cache_info_parent_selection_url = nullptr;
158 };
159 
160 // Mixin class for shared info across all templates. This just wraps the
161 // shared members such that we don't have to duplicate all these initialixers
162 // etc. If someone wants to rewrite all this code to use setters and getters,
163 // by all means, please do so. The plumbing is in place :).
164 template <class Data> class BaseMatcher
165 {
166 public:
BaseMatcher(const char * name,const char * filename)167   BaseMatcher(const char *name, const char *filename) : matcher_name(name), file_name(filename) {}
168 
~BaseMatcher()169   ~BaseMatcher() { delete[] data_array; }
170 
171 protected:
172   int num_el               = -1;        // number of elements in the table
173   const char *matcher_name = "unknown"; // Used for Debug/Warning/Error messages
174   const char *file_name    = nullptr;   // Used for Debug/Warning/Error messages
175   Data *data_array         = nullptr;   // Array with the Data elements
176   int array_len            = -1;        // length of the arrays (all three are the same length)
177 };
178 
179 template <class Data, class MatchResult> class UrlMatcher : protected BaseMatcher<Data>
180 {
181   typedef BaseMatcher<Data> super;
182 
183 public:
184   UrlMatcher(const char *name, const char *filename);
185   ~UrlMatcher();
186 
187   void AllocateSpace(int num_entries);
188   Result NewEntry(matcher_line *line_info);
189 
190   void Match(RequestData *rdata, MatchResult *result) const;
191   void Print() const;
192 
193   using super::num_el;
194   using super::matcher_name;
195   using super::file_name;
196   using super::data_array;
197   using super::array_len;
198 
199 private:
200   std::unordered_map<std::string, int> url_ht;
201   char **url_str = nullptr; // array of url strings
202   int *url_value = nullptr; // array of posion of url strings
203 };
204 
205 template <class Data, class MatchResult> class RegexMatcher : protected BaseMatcher<Data>
206 {
207   typedef BaseMatcher<Data> super;
208 
209 public:
210   RegexMatcher(const char *name, const char *filename);
211   ~RegexMatcher();
212 
213   void AllocateSpace(int num_entries);
214   Result NewEntry(matcher_line *line_info);
215 
216   void Match(RequestData *rdata, MatchResult *result) const;
217   void Print() const;
218 
219   using super::num_el;
220   using super::matcher_name;
221   using super::file_name;
222   using super::data_array;
223   using super::array_len;
224 
225 protected:
226   pcre **re_array = nullptr; // array of compiled regexs
227   char **re_str   = nullptr; // array of uncompiled regex strings
228 };
229 
230 template <class Data, class MatchResult> class HostRegexMatcher : public RegexMatcher<Data, MatchResult>
231 {
232   typedef BaseMatcher<Data> super;
233 
234 public:
235   HostRegexMatcher(const char *name, const char *filename);
236   void Match(RequestData *rdata, MatchResult *result) const;
237 
238   using super::num_el;
239   using super::matcher_name;
240   using super::file_name;
241   using super::data_array;
242   using super::array_len;
243 };
244 
245 template <class Data, class MatchResult> class HostMatcher : protected BaseMatcher<Data>
246 {
247   typedef BaseMatcher<Data> super;
248 
249 public:
250   HostMatcher(const char *name, const char *filename);
251   ~HostMatcher();
252 
253   void AllocateSpace(int num_entries);
254   Result NewEntry(matcher_line *line_info);
255 
256   void Match(RequestData *rdata, MatchResult *result) const;
257   void Print() const;
258 
259   using super::num_el;
260   using super::matcher_name;
261   using super::file_name;
262   using super::data_array;
263   using super::array_len;
264 
265   HostLookup *
getHLookup()266   getHLookup()
267   {
268     return host_lookup;
269   }
270 
271 private:
272   static void PrintFunc(void *opaque_data);
273   HostLookup *host_lookup = nullptr; // Data structure to do the lookups
274 };
275 
276 template <class Data, class MatchResult> class IpMatcher : protected BaseMatcher<Data>
277 {
278   typedef BaseMatcher<Data> super;
279 
280 public:
281   IpMatcher(const char *name, const char *filename);
282 
283   void AllocateSpace(int num_entries);
284   Result NewEntry(matcher_line *line_info);
285 
286   void Match(sockaddr const *ip_addr, RequestData *rdata, MatchResult *result) const;
287   void Print() const;
288 
289   using super::num_el;
290   using super::matcher_name;
291   using super::file_name;
292   using super::data_array;
293   using super::array_len;
294 
295 private:
296   static void PrintFunc(void *opaque_data);
297   IpMap ip_map; // Data structure to do lookups
298 };
299 
300 #define ALLOW_HOST_TABLE 1 << 0
301 #define ALLOW_IP_TABLE 1 << 1
302 #define ALLOW_REGEX_TABLE 1 << 2
303 #define ALLOW_HOST_REGEX_TABLE 1 << 3
304 #define ALLOW_URL_TABLE 1 << 4
305 #define DONT_BUILD_TABLE 1 << 5 // for testing
306 
307 template <class Data, class MatchResult> class ControlMatcher
308 {
309 public:
310   // Parameter name must not be deallocated before this object is
311   ControlMatcher(const char *file_var, const char *name, const matcher_tags *tags,
312                  int flags_in = (ALLOW_HOST_TABLE | ALLOW_IP_TABLE | ALLOW_REGEX_TABLE | ALLOW_HOST_REGEX_TABLE | ALLOW_URL_TABLE));
313   ~ControlMatcher();
314 
315   int BuildTable();
316   int BuildTableFromString(char *str);
317 
318   void Match(RequestData *rdata, MatchResult *result) const;
319   void Print() const;
320 
321   int
getEntryCount()322   getEntryCount() const
323   {
324     return m_numEntries;
325   }
326 
327   HostMatcher<Data, MatchResult> *
getHostMatcher()328   getHostMatcher()
329   {
330     return hostMatch;
331   }
332 
333   RegexMatcher<Data, MatchResult> *
getReMatcher()334   getReMatcher()
335   {
336     return reMatch;
337   }
338 
339   IpMatcher<Data, MatchResult> *
getIPMatcher()340   getIPMatcher()
341   {
342     return ipMatch;
343   }
344 
345   // private
346   RegexMatcher<Data, MatchResult> *reMatch;
347   UrlMatcher<Data, MatchResult> *urlMatch;
348   HostMatcher<Data, MatchResult> *hostMatch;
349   IpMatcher<Data, MatchResult> *ipMatch;
350   HostRegexMatcher<Data, MatchResult> *hrMatch;
351 
352   const matcher_tags *config_tags = nullptr;
353   char config_file_path[PATH_NAME_MAX];
354   int flags                = 0;
355   int m_numEntries         = 0;
356   const char *matcher_name = "unknown"; // Used for Debug/Warning/Error messages
357 };
358