1 /*
2   Licensed to the Apache Software Foundation (ASF) under one
3   or more contributor license agreements.  See the NOTICE file
4   distributed with this work for additional information
5   regarding copyright ownership.  The ASF licenses this file
6   to you under the Apache License, Version 2.0 (the
7   "License"); you may not use this file except in compliance
8   with the License.  You may obtain a copy of the License at
9 
10   http://www.apache.org/licenses/LICENSE-2.0
11 
12   Unless required by applicable law or agreed to in writing, software
13   distributed under the License is distributed on an "AS IS" BASIS,
14   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   See the License for the specific language governing permissions and
16   limitations under the License.
17 */
18 ////////////////////////////////////////////////////////////////////////////////
19 // cookie_remap: ATS plugin to do (simple) cookie based remap rules
20 // To use this plugin, configure a remap.config rule like
21 //   map http://foo.com http://bar.com @plugin=.../libexec/cookie_remap.so
22 //   @pparam=maps.reg
23 
24 #include "cookiejar.h"
25 #include <ts/ts.h>
26 
27 #include <pcre.h>
28 #include <ts/remap.h>
29 #include <yaml-cpp/yaml.h>
30 
31 #include <string>
32 #include <vector>
33 #include <string_view>
34 #include <cstddef>
35 #include "hash.h"
36 
37 #undef FMT_SV
38 #define FMT_SV(SV) static_cast<int>((SV).size()), (SV).data()
39 
40 using namespace std;
41 
42 #define MY_NAME "cookie_remap"
43 
44 const int OVECCOUNT = 30; // We support $1 - $9 only, and this needs to be 3x that
45 
46 class UrlComponents
47 {
48 public:
UrlComponents(TSRemapRequestInfo * rri,TSHttpTxn txn)49   UrlComponents(TSRemapRequestInfo *rri, TSHttpTxn txn) : _rri(rri), _txn(txn) {}
50 
51   std::string const &
path(bool pre_remap)52   path(bool pre_remap)
53   {
54     if (_d[pre_remap].path_str.empty()) {
55       auto urlh = _get_url(pre_remap);
56       // based on RFC2396, matrix params are part of path segments so
57       // we will just
58       // append them to the path
59       _d[pre_remap].path_str = _get_url_comp(urlh, TSUrlPathGet);
60       auto matrix            = _get_url_comp(urlh, TSUrlHttpParamsGet);
61       if (!matrix.empty()) {
62         _d[pre_remap].path_str.append(";", 1).append(matrix);
63       }
64     }
65     return _d[pre_remap].path_str;
66   }
67 
68   std::string_view
query(bool pre_remap)69   query(bool pre_remap)
70   {
71     if (_d[pre_remap].query.empty()) {
72       _d[pre_remap].query = _get_url_comp(_get_url(pre_remap), TSUrlHttpQueryGet);
73     }
74     return _d[pre_remap].query;
75   }
76 
77   std::string_view
from_path()78   from_path()
79   {
80     if (_from_path.empty()) {
81       _UrlHandle urlh{_rri->requestBufp, _rri->mapFromUrl};
82       _from_path = _get_url_comp(urlh, TSUrlPathGet);
83     }
84     return _from_path;
85   }
86 
87   std::string_view
url(bool pre_remap)88   url(bool pre_remap)
89   {
90     if (_d[pre_remap].url.empty()) {
91       auto urlh = _get_url(pre_remap);
92       int length;
93       auto data         = TSUrlStringGet(urlh.bufp, urlh.urlp, &length);
94       _d[pre_remap].url = std::string_view(data, length);
95     }
96     return _d[pre_remap].url;
97   }
98 
99   // No copying/moving.
100   //
101   UrlComponents(UrlComponents const &) = delete;
102   UrlComponents &operator=(UrlComponents const &) = delete;
103 
~UrlComponents()104   ~UrlComponents()
105   {
106     // Not calling TSHandleMLocRelease() for the URL TSMLoc pointers because it doesn't do anything.
107 
108     if (_d[0].url.data() != nullptr) {
109       TSfree(const_cast<char *>(_d[0].url.data()));
110     }
111     if (_d[1].url.data() != nullptr) {
112       TSfree(const_cast<char *>(_d[1].url.data()));
113     }
114   }
115 
116 private:
117   TSRemapRequestInfo *_rri;
118   TSHttpTxn _txn;
119 
120   struct _UrlHandle {
121     TSMBuffer bufp = nullptr;
122     TSMLoc urlp;
123   };
124 
125   // Buffer any data that's likely to be used more than once.
126 
127   struct _Data {
128     _UrlHandle urlh;
129     std::string path_str;
130     std::string_view url;
131     std::string_view query;
132   };
133 
134   // index 0 - remapped
135   // index 1 - pre-remap
136   //
137   _Data _d[2];
138 
139   std::string_view _from_path;
140 
141   _UrlHandle
_get_url(bool pre_remap)142   _get_url(bool pre_remap)
143   {
144     _UrlHandle h = _d[pre_remap].urlh;
145 
146     if (!h.bufp) {
147       if (pre_remap) {
148         if (TSHttpTxnPristineUrlGet(_txn, &h.bufp, &h.urlp) != TS_SUCCESS) {
149           TSError("%s: Plugin is unable to get pristine url", MY_NAME);
150           return _UrlHandle();
151         }
152       } else {
153         h.bufp = _rri->requestBufp;
154         h.urlp = _rri->requestUrl;
155       }
156       _d[pre_remap].urlh = h;
157     }
158     return h;
159   }
160 
161   static std::string_view
_get_url_comp(_UrlHandle urlh,char const * (* comp_func)(TSMBuffer,TSMLoc,int *))162   _get_url_comp(_UrlHandle urlh, char const *(*comp_func)(TSMBuffer, TSMLoc, int *))
163   {
164     int length;
165     auto data = comp_func(urlh.bufp, urlh.urlp, &length);
166     return std::string_view(data, length);
167   }
168 };
169 
170 enum operation_type { UNKNOWN = -1, EXISTS = 1, NOTEXISTS, REGEXP, STRING, BUCKET };
171 
172 enum target_type {
173   COOKIE = 1,
174   URI, // URI = PATH + QUERY
175   PRE_REMAP_URI,
176   UNKNOWN_TARGET
177 };
178 
179 /***************************************************************************************
180                                                                 Decimal to Hex
181 converter
182 
183 This is a template function which returns a char* array filled with hex digits
184 when
185 passed to it a number(can work as a decimal to hex conversion)and will work for
186 signed
187 and unsigned: char, short, and integer(long) type parameters passed to it.
188 
189 Shortcomings:It won't work for decimal numbers because of presence of
190 bitshifting in its algorithm.
191 
192 Arguments:
193   * _num is the number to convert to hex
194   * hdigits two-byte character array, will be populated with the hex number
195 
196 ***************************************************************************************/
197 
198 template <class type> // template usage to allow multiple types of parameters
199 void
dec_to_hex(type _num,char * hdigits)200 dec_to_hex(type _num, char *hdigits)
201 {
202   const char *hlookup = "0123456789ABCDEF"; // lookup table stores the hex digits into their
203   // corresponding index.
204 
205   if (_num < 0) {
206     _num *= -1; // and make _num positive to clear(zero) the sign bit
207   }
208 
209   char mask = 0x000f; // mask will clear(zero) out all the bits except lowest 4
210   // which represent a single hex digit
211 
212   hdigits[1] = hlookup[mask & _num];
213   hdigits[0] = hlookup[mask & (_num >> 4)];
214 
215   return;
216 }
217 
218 void
urlencode(std::string & str)219 urlencode(std::string &str)
220 {
221   auto orig = str.size();
222   auto enc  = orig;
223   for (auto c : str) {
224     if (!isalnum(c)) {
225       enc += 2;
226     }
227   }
228   if (enc == orig) {
229     // No changes needed.
230     return;
231   }
232   str.resize(enc);
233   while (orig--) {
234     if (!isalnum(str[orig])) {
235       enc -= 3;
236       dec_to_hex(str[orig], &(str[enc + 1]));
237       str[enc] = '%';
238     } else {
239       str[--enc] = str[orig];
240     }
241   }
242 }
243 
244 //----------------------------------------------------------------------------
245 class subop
246 {
247 public:
subop()248   subop()
249     : cookie(""),
250       operation(""),
251 
252       str_match(""),
253 
254       bucket("")
255 
256   {
257     TSDebug(MY_NAME, "subop constructor called");
258   }
259 
~subop()260   ~subop()
261   {
262     TSDebug(MY_NAME, "subop destructor called");
263     if (regex) {
264       pcre_free(regex);
265     }
266 
267     if (regex_extra) {
268       pcre_free(regex_extra);
269     }
270   }
271 
272   bool
empty() const273   empty() const
274   {
275     return (cookie == "" && operation == "" && op_type == UNKNOWN);
276   }
277 
278   void
setCookieName(const std::string & s)279   setCookieName(const std::string &s)
280   {
281     cookie = s;
282   }
283 
284   const std::string &
getCookieName() const285   getCookieName() const
286   {
287     return cookie;
288   }
289 
290   const std::string &
getOperation() const291   getOperation() const
292   {
293     return operation;
294   }
295 
296   operation_type
getOpType() const297   getOpType() const
298   {
299     return op_type;
300   }
301 
302   target_type
getTargetType() const303   getTargetType() const
304   {
305     return target;
306   }
307 
308   void
setOperation(const std::string & s)309   setOperation(const std::string &s)
310   {
311     operation = s;
312 
313     if (operation == "string") {
314       op_type = STRING;
315     }
316     if (operation == "regex") {
317       op_type = REGEXP;
318     }
319     if (operation == "exists") {
320       op_type = EXISTS;
321     }
322     if (operation == "not exists") {
323       op_type = NOTEXISTS;
324     }
325     if (operation == "bucket") {
326       op_type = BUCKET;
327     }
328   }
329 
330   void
setTarget(const std::string & s)331   setTarget(const std::string &s)
332   {
333     if (s == "uri") {
334       target = URI;
335     } else if (s == "puri") {
336       target = PRE_REMAP_URI;
337     } else {
338       target = COOKIE;
339     }
340   }
341 
342   void
setStringMatch(const std::string & s)343   setStringMatch(const std::string &s)
344   {
345     op_type   = STRING;
346     str_match = s;
347   }
348 
349   const std::string &
getStringMatch() const350   getStringMatch() const
351   {
352     return str_match;
353   }
354 
355   void
setBucket(const std::string & s)356   setBucket(const std::string &s)
357   {
358     int start_pos = s.find('/');
359 
360     op_type  = BUCKET;
361     bucket   = s;
362     how_many = atoi(bucket.substr(0, start_pos).c_str());
363     out_of   = atoi(bucket.substr(start_pos + 1).c_str());
364   }
365 
366   int
bucketGetTaking() const367   bucketGetTaking() const
368   {
369     return how_many;
370   }
371 
372   int
bucketOutOf() const373   bucketOutOf() const
374   {
375     return out_of;
376   }
377 
378   bool
setRegexMatch(const std::string & s)379   setRegexMatch(const std::string &s)
380   {
381     const char *error_comp  = nullptr;
382     const char *error_study = nullptr;
383     int erroffset;
384 
385     op_type      = REGEXP;
386     regex_string = s;
387     regex        = pcre_compile(regex_string.c_str(), 0, &error_comp, &erroffset, nullptr);
388 
389     if (regex == nullptr) {
390       return false;
391     }
392     regex_extra = pcre_study(regex, 0, &error_study);
393     if ((regex_extra == nullptr) && (error_study != nullptr)) {
394       return false;
395     }
396 
397     if (pcre_fullinfo(regex, regex_extra, PCRE_INFO_CAPTURECOUNT, &regex_ccount) != 0) {
398       return false;
399     }
400 
401     return true;
402   }
403 
404   const std::string &
getRegexString() const405   getRegexString() const
406   {
407     return regex_string;
408   }
409 
410   int
getRegexCcount() const411   getRegexCcount() const
412   {
413     return regex_ccount;
414   }
415 
416   int
regexMatch(const char * str,int len,int ovector[]) const417   regexMatch(const char *str, int len, int ovector[]) const
418   {
419     return pcre_exec(regex,       // the compiled pattern
420                      regex_extra, // Extra data from study (maybe)
421                      str,         // the subject std::string
422                      len,         // the length of the subject
423                      0,           // start at offset 0 in the subject
424                      0,           // default options
425                      ovector,     // output vector for substring information
426                      OVECCOUNT);  // number of elements in the output vector
427   };
428 
429   void
printSubOp() const430   printSubOp() const
431   {
432     TSDebug(MY_NAME, "\t+++subop+++");
433     TSDebug(MY_NAME, "\t\tcookie: %s", cookie.c_str());
434     TSDebug(MY_NAME, "\t\toperation: %s", operation.c_str());
435     if (str_match.size() > 0) {
436       TSDebug(MY_NAME, "\t\tmatching: %s", str_match.c_str());
437     }
438     if (regex) {
439       TSDebug(MY_NAME, "\t\tregex: %s", regex_string.c_str());
440     }
441     if (bucket.size() > 0) {
442       TSDebug(MY_NAME, "\t\tbucket: %s", bucket.c_str());
443       TSDebug(MY_NAME, "\t\ttaking: %d", how_many);
444       TSDebug(MY_NAME, "\t\tout of: %d", out_of);
445     }
446   }
447 
448 private:
449   std::string cookie;
450   std::string operation;
451   enum operation_type op_type = UNKNOWN;
452   enum target_type target     = UNKNOWN_TARGET;
453 
454   std::string str_match;
455 
456   pcre *regex             = nullptr;
457   pcre_extra *regex_extra = nullptr;
458   std::string regex_string;
459   int regex_ccount = 0;
460 
461   std::string bucket;
462   unsigned int how_many = 0;
463   unsigned int out_of   = 0;
464 };
465 
466 using SubOpQueue = std::vector<const subop *>;
467 
468 //----------------------------------------------------------------------------
469 class op
470 {
471 public:
op()472   op() { TSDebug(MY_NAME, "op constructor called"); }
473 
~op()474   ~op()
475   {
476     TSDebug(MY_NAME, "op destructor called");
477     for (auto &subop : subops) {
478       delete subop;
479     }
480   }
481 
482   void
addSubOp(const subop * s)483   addSubOp(const subop *s)
484   {
485     subops.push_back(s);
486   }
487 
488   void
setSendTo(const std::string & s)489   setSendTo(const std::string &s)
490   {
491     sendto = s;
492   }
493 
494   const std::string &
getSendTo() const495   getSendTo() const
496   {
497     return sendto;
498   }
499 
500   void
setElseSendTo(const std::string & s)501   setElseSendTo(const std::string &s)
502   {
503     else_sendto = s;
504   }
505 
506   void
setStatus(const std::string & s)507   setStatus(const std::string &s)
508   {
509     if (else_sendto.size() > 0) {
510       else_status = static_cast<TSHttpStatus>(atoi(s.c_str()));
511     } else {
512       status = static_cast<TSHttpStatus>(atoi(s.c_str()));
513     }
514   }
515 
516   void
setElseStatus(const std::string & s)517   setElseStatus(const std::string &s)
518   {
519     else_status = static_cast<TSHttpStatus>(atoi(s.c_str()));
520   }
521 
522   void
printOp() const523   printOp() const
524   {
525     TSDebug(MY_NAME, "++++operation++++");
526     TSDebug(MY_NAME, "sending to: %s", sendto.c_str());
527     TSDebug(MY_NAME, "if these operations match: ");
528 
529     for (auto subop : subops) {
530       subop->printSubOp();
531     }
532     if (else_sendto.size() > 0) {
533       TSDebug(MY_NAME, "else: %s", else_sendto.c_str());
534     }
535   }
536 
537   bool
process(CookieJar & jar,std::string & dest,TSHttpStatus & retstat,TSRemapRequestInfo * rri,UrlComponents & req_url) const538   process(CookieJar &jar, std::string &dest, TSHttpStatus &retstat, TSRemapRequestInfo *rri, UrlComponents &req_url) const
539   {
540     if (sendto == "") {
541       return false; // guessing every operation must have a
542                     // sendto url???
543     }
544 
545     int retval        = 1;
546     bool cookie_found = false;
547     std::string c;
548     std::string cookie_data;
549     std::string object_name; // name of the thing being processed,
550                              // cookie, or
551                              // request url
552 
553     TSDebug(MY_NAME, "starting to process a new operation");
554 
555     for (auto subop : subops) {
556       // subop* s = *it;
557       int subop_type     = subop->getOpType();
558       target_type target = subop->getTargetType();
559 
560       c = subop->getCookieName();
561       if (c.length()) {
562         TSDebug(MY_NAME, "processing cookie: %s", c.c_str());
563 
564         size_t period_pos = c.find_first_of('.');
565 
566         if (period_pos == std::string::npos) { // not a sublevel
567                                                // cookie name
568           TSDebug(MY_NAME, "processing non-sublevel cookie");
569 
570           cookie_found = jar.get_full(c, cookie_data);
571           TSDebug(MY_NAME, "full cookie: %s", cookie_data.c_str());
572           object_name = c;
573         } else { // is in the format FOO.BAR
574           std::string cookie_main   = c.substr(0, period_pos);
575           std::string cookie_subkey = c.substr(period_pos + 1);
576 
577           TSDebug(MY_NAME, "processing sublevel cookie");
578           TSDebug(MY_NAME, "c key: %s", cookie_main.c_str());
579           TSDebug(MY_NAME, "c subkey: %s", cookie_subkey.c_str());
580 
581           cookie_found = jar.get_part(cookie_main, cookie_subkey, cookie_data);
582           object_name  = cookie_main + " . " + cookie_subkey;
583         }
584         // invariant:  cookie name is in object_name and
585         // cookie data (if any) is
586         // in cookie_data
587 
588         if (cookie_found == false) { // cookie name or sub-key not found
589                                      // inside cookies
590           if (subop_type == NOTEXISTS) {
591             TSDebug(MY_NAME,
592                     "cookie %s was not "
593                     "found (and we wanted "
594                     "that)",
595                     object_name.c_str());
596             continue; // we can short
597                       // circuit more
598                       // testing
599           }
600           TSDebug(MY_NAME, "cookie %s was not found", object_name.c_str());
601           retval &= 0;
602           break;
603         } else {
604           // cookie exists
605           if (subop_type == NOTEXISTS) { // we found the cookie
606                                          // but are asking
607             // for non existence
608             TSDebug(MY_NAME,
609                     "cookie %s was found, "
610                     "but operation "
611                     "requires "
612                     "non-existence",
613                     object_name.c_str());
614             retval &= 0;
615             break;
616           }
617 
618           if (subop_type == EXISTS) {
619             TSDebug(MY_NAME, "cookie %s was found", object_name.c_str()); // got what
620                                                                           // we were
621                                                                           // looking
622                                                                           // for
623             continue;                                                     // we can short
624                                                                           // circuit more
625                                                                           // testing
626           }
627         } // handled EXISTS / NOTEXISTS subops
628 
629         TSDebug(MY_NAME, "processing cookie data: \"%s\"", cookie_data.c_str());
630       } else if (target != PRE_REMAP_URI) {
631         target = URI;
632       }
633 
634       // INVARIANT: we now have the data from the cookie (if
635       // any) inside
636       // cookie_data and we are here because we need
637       // to continue processing this suboperation in some way
638 
639       if (!rri) { // too dangerous to continue without the
640                   // rri; hopefully that
641         // never happens
642         TSDebug(MY_NAME, "request info structure is "
643                          "empty; can't continue "
644                          "processing this subop");
645         retval &= 0;
646         break;
647       }
648 
649       // If the user has specified a cookie in his
650       // suboperation, use the cookie
651       // data for matching;
652       //  otherwise, use the request uri (path + query)
653       std::string request_uri; // only set the value if we
654                                // need it; we might
655       // match the cookie data instead
656       bool use_url = (target == URI) || (target == PRE_REMAP_URI);
657       const std::string &string_to_match(use_url ? request_uri : cookie_data);
658       if (use_url) {
659         request_uri = req_url.path(target == PRE_REMAP_URI);
660         TSDebug(MY_NAME, "process req_url.path = %s", request_uri.c_str());
661         if (request_uri.length() && request_uri[0] != '/') {
662           request_uri.insert(0, 1, '/');
663         }
664         auto query = req_url.query(target == PRE_REMAP_URI);
665         if (query.size() > 0) {
666           request_uri += '?';
667           request_uri += query;
668         }
669         object_name = "request uri";
670       }
671 
672       // invariant:  we've decided at this point what string
673       // we'll match, if we
674       // do matching
675 
676       // OPERATION::string matching
677       if (subop_type == STRING) {
678         if (string_to_match == subop->getStringMatch()) {
679           TSDebug(MY_NAME, "string match succeeded");
680           continue;
681         } else {
682           TSDebug(MY_NAME, "string match failed");
683           retval &= 0;
684           break;
685         }
686       }
687 
688       // OPERATION::regex matching
689       if (subop_type == REGEXP) {
690         int ovector[OVECCOUNT];
691         int ret = subop->regexMatch(string_to_match.c_str(), string_to_match.length(), ovector);
692 
693         if (ret >= 0) {
694           std::string::size_type pos  = sendto.find('$');
695           std::string::size_type ppos = 0;
696 
697           dest.erase();                    // we only reset dest if
698                                            // there is a successful
699                                            // regex
700                                            // match
701           dest.reserve(sendto.size() * 2); // Wild guess at this
702                                            // time ... is
703           // sucks we can't precalculate this
704           // like regex_remap.
705 
706           TSDebug(MY_NAME, "found %d matches", ret);
707           TSDebug(MY_NAME,
708                   "successful regex "
709                   "match of: %s with %s "
710                   "rewriting string: %s",
711                   string_to_match.c_str(), subop->getRegexString().c_str(), sendto.c_str());
712 
713           // replace the $(1-9) in the sendto url
714           // as necessary
715           const size_t LAST_IDX_TO_SEARCH(sendto.length() - 2); // otherwise the below loop can
716                                                                 // access "sendto" out of range
717           while (pos <= LAST_IDX_TO_SEARCH) {
718             if (isdigit(sendto[pos + 1])) {
719               int ix = sendto[pos + 1] - '0';
720 
721               if (ix <= subop->getRegexCcount()) { // Just skip an illegal regex group
722                 dest += sendto.substr(ppos, pos - ppos);
723                 dest += string_to_match.substr(ovector[ix * 2], ovector[ix * 2 + 1] - ovector[ix * 2]);
724                 ppos = pos + 2;
725               } else {
726                 TSDebug(MY_NAME,
727                         "bad "
728                         "rewriting "
729                         "string, "
730                         "for group "
731                         "%d: %s",
732                         ix, sendto.c_str());
733               }
734             }
735             pos = sendto.find('$', pos + 1);
736           }
737           dest += sendto.substr(ppos);
738           continue; // next subop, please
739         } else {
740           TSDebug(MY_NAME,
741                   "could not match "
742                   "regular expression "
743                   "%s to %s",
744                   subop->getRegexString().c_str(), string_to_match.c_str());
745           retval &= 0;
746           break;
747         }
748       }
749 
750       // OPERATION::bucket ranges
751       if (subop_type == BUCKET) {
752         unsigned int taking = subop->bucketGetTaking();
753         unsigned int out_of = subop->bucketOutOf();
754 
755         uint32_t hash;
756 
757         if (taking == 0 || out_of == 0) {
758           TSDebug(MY_NAME,
759                   "taking %d out of %d "
760                   "makes no sense?!",
761                   taking, out_of);
762           retval &= 0;
763           break;
764         }
765 
766         hash = hash_fnv32_buckets(cookie_data.c_str(), cookie_data.size(), out_of);
767         TSDebug(MY_NAME,
768                 "we hashed this to bucket: %u "
769                 "taking: %u out of: %u",
770                 hash, taking, out_of);
771 
772         if (hash < taking) {
773           TSDebug(MY_NAME, "we hashed in the range, yay!");
774           continue; // we hashed in the range
775         } else {
776           TSDebug(MY_NAME, "we didn't hash in the "
777                            "range requested, so "
778                            "sad");
779           retval &= 0;
780           break;
781         }
782       }
783     }
784 
785     if (retval == 1) {
786       if (dest.size() == 0) { // Unless already set by one of
787                               // the operators (e.g. regex)
788         dest = sendto;
789       }
790       if (status > 0) {
791         retstat = status;
792       }
793       return true;
794     } else if (else_sendto.size() > 0 && retval == 0) {
795       dest = else_sendto;
796       if (else_status > 0) {
797         retstat = else_status;
798       }
799       return true;
800     } else {
801       dest = "";
802       return false;
803     }
804   }
805 
806 private:
807   SubOpQueue subops{};
808   std::string sendto{""};
809   std::string else_sendto{""};
810   TSHttpStatus status      = TS_HTTP_STATUS_NONE;
811   TSHttpStatus else_status = TS_HTTP_STATUS_NONE;
812 };
813 
814 using StringPair = std::pair<std::string, std::string>;
815 using OpMap      = std::vector<StringPair>;
816 
817 //----------------------------------------------------------------------------
818 static bool
build_op(op & o,OpMap const & q)819 build_op(op &o, OpMap const &q)
820 {
821   subop *sub = new subop();
822 
823   // loop through the array of key->value pairs
824   for (auto const &pr : q) {
825     std::string const &key = pr.first;
826     std::string const &val = pr.second;
827 
828     TSDebug(MY_NAME, "build_op: key=%s val=%s", key.c_str(), val.c_str());
829 
830     if (key == "cookie") {
831       if (!sub->empty()) {
832         TSDebug(MY_NAME, "ERROR: you need to define a connector");
833         goto error;
834       }
835       sub->setCookieName(val);
836     }
837 
838     if (key == "sendto" || key == "url") {
839       o.setSendTo(val);
840     }
841 
842     if (key == "else") {
843       o.setElseSendTo(val);
844     }
845 
846     if (key == "status") {
847       o.setStatus(val);
848     }
849 
850     if (key == "operation") {
851       sub->setOperation(val);
852     }
853 
854     if (key == "target") {
855       sub->setTarget(val);
856     }
857 
858     if (key == "match") {
859       sub->setStringMatch(val);
860     }
861 
862     if (key == "regex") {
863       bool ret = sub->setRegexMatch(val);
864 
865       if (!ret) {
866         goto error;
867       }
868     }
869 
870     if (key == "bucket" || key == "hash") {
871       sub->setBucket(val);
872     }
873 
874     if (key == "connector") {
875       o.addSubOp(sub);
876       sub = new subop();
877     }
878   }
879 
880   o.addSubOp(sub);
881   return true;
882 
883 error:
884   TSDebug(MY_NAME, "error building operation");
885   return false;
886 }
887 
888 using OpsQueue = std::vector<const op *>;
889 
890 //----------------------------------------------------------------------------
891 // init
892 TSReturnCode
TSRemapInit(TSRemapInterface * api_info,char * errbuf,int errbuf_size)893 TSRemapInit(TSRemapInterface *api_info, char *errbuf, int errbuf_size)
894 {
895   return TS_SUCCESS;
896 }
897 
898 //----------------------------------------------------------------------------
899 // initialization of structures from config parameters
900 TSReturnCode
TSRemapNewInstance(int argc,char * argv[],void ** ih,char * errbuf,int errbuf_size)901 TSRemapNewInstance(int argc, char *argv[], void **ih, char *errbuf, int errbuf_size)
902 {
903   if (argc != 3) {
904     TSError("arguments not equal to 3: %d", argc);
905     TSDebug(MY_NAME, "arguments not equal to 3: %d", argc);
906     return TS_ERROR;
907   }
908 
909   std::string filename(argv[2]);
910   try {
911     YAML::Node config = YAML::LoadFile(filename);
912 
913     std::unique_ptr<OpsQueue> ops(new OpsQueue);
914     OpMap op_data;
915 
916     for (YAML::const_iterator it = config.begin(); it != config.end(); ++it) {
917       const string &name         = it->first.as<std::string>();
918       YAML::NodeType::value type = it->second.Type();
919 
920       if (name != "op" || type != YAML::NodeType::Map) {
921         const string reason = "Top level nodes must be named op and be of type map";
922         TSError("Invalid YAML Configuration format for cookie_remap: %s, reason: %s", filename.c_str(), reason.c_str());
923         return TS_ERROR;
924       }
925 
926       for (YAML::const_iterator it2 = it->second.begin(); it2 != it->second.end(); ++it2) {
927         const YAML::Node first  = it2->first;
928         const YAML::Node second = it2->second;
929 
930         if (second.IsScalar() == false) {
931           const string reason = "All op nodes must be of type scalar";
932           TSError("Invalid YAML Configuration format for cookie_remap: %s, reason: %s", filename.c_str(), reason.c_str());
933           return TS_ERROR;
934         }
935 
936         const string &key   = first.as<std::string>();
937         const string &value = second.as<std::string>();
938         op_data.emplace_back(key, value);
939       }
940 
941       if (op_data.size()) {
942         op *o = new op();
943         if (!build_op(*o, op_data)) {
944           delete o;
945 
946           TSError("building operation, check configuration file: %s", filename.c_str());
947           return TS_ERROR;
948         } else {
949           ops->push_back(o);
950         }
951         o->printOp();
952         op_data.clear();
953       }
954     }
955 
956     TSDebug(MY_NAME, "# of ops: %d", static_cast<int>(ops->size()));
957     *ih = static_cast<void *>(ops.release());
958   } catch (const YAML::Exception &e) {
959     TSError("YAML::Exception %s when parsing YAML config file %s for cookie_remap", e.what(), filename.c_str());
960     return TS_ERROR;
961   }
962 
963   return TS_SUCCESS;
964 }
965 
966 namespace
967 {
968 std::string
unmatched_path(UrlComponents & req_url,bool pre_remap)969 unmatched_path(UrlComponents &req_url, bool pre_remap)
970 {
971   std::string path           = req_url.path(pre_remap);
972   std::string_view from_path = req_url.from_path();
973 
974   std::size_t pos = path.find(from_path);
975   if (pos != std::string::npos) {
976     path.erase(pos, from_path.size());
977   }
978   TSDebug(MY_NAME, "from_path: %*s", FMT_SV(from_path));
979   TSDebug(MY_NAME, "%s: %s", pre_remap ? "unmatched_ppath" : "unmatched_path", path.c_str());
980 
981   return path;
982 }
983 
984 int const sub_req_url_id         = 0;
985 int const sub_req_purl_id        = -1;
986 int const sub_path_id            = -2;
987 int const sub_ppath_id           = -3;
988 int const sub_unmatched_path_id  = -4;
989 int const sub_unmatched_ppath_id = -5;
990 int const sub_url_encode_id      = -6;
991 
992 struct CompNext {
993   std::string_view const comp;
994   int const *const next;
995 
CompNext__anon854eb6790111::CompNext996   CompNext(std::string_view p, int const *n) : comp(p), next(n) {}
997 };
998 
999 struct {
1000   int count = 2;
1001   CompNext o1{"ath", &sub_unmatched_path_id};
1002   CompNext o2{"path", &sub_unmatched_ppath_id};
1003 } const sub_unmatched;
1004 
1005 struct {
1006   int count = 2;
1007   CompNext o1{"ath", &sub_path_id};
1008   CompNext o2{"path", &sub_ppath_id};
1009 } const sub_p;
1010 
1011 struct {
1012   int count = 2;
1013   CompNext o1{"url", &sub_req_url_id};
1014   CompNext o2{"purl", &sub_req_purl_id};
1015 } const sub_cr_req;
1016 
1017 struct {
1018   int count = 2;
1019   CompNext o1{"req_", &sub_cr_req.count};
1020   CompNext o2{"urlencode(", &sub_url_encode_id};
1021 } const sub_cr;
1022 
1023 struct {
1024   int count = 3;
1025   CompNext o1{"cr_", &sub_cr.count};
1026   CompNext o2{"p", &sub_p.count};
1027   CompNext o3{"unmatched_p", &sub_unmatched.count};
1028 } const sub;
1029 
1030 int
sub_lookup(char const * targ,int targ_len)1031 sub_lookup(char const *targ, int targ_len)
1032 {
1033   int count = sub.count;
1034   auto opt  = &sub.o1;
1035   for (;;) {
1036     while ((targ_len < static_cast<int>(opt->comp.size())) || (std::string_view(targ, opt->comp.size()) != opt->comp)) {
1037       if (!--count) {
1038         return 1; // Failed lookup, return some positive number.
1039       }
1040       ++opt;
1041     }
1042     count = *opt->next;
1043     if (count <= 0) {
1044       break;
1045     }
1046     targ += opt->comp.size();
1047     targ_len -= opt->comp.size();
1048     opt = reinterpret_cast<CompNext const *>(reinterpret_cast<char const *>(opt->next) + offsetof(decltype(sub), o1));
1049   }
1050   return count;
1051 }
1052 
1053 } // end anonymous namespace
1054 
1055 //----------------------------------------------------------------------------
1056 // called whenever we need to perform substitutions on a string; used to replace
1057 // things like
1058 //  $path, $ppath, $unmatched_path, $unmatched_ppath, $cr_req_url, $cr_req_purl, and $cr_url_encode
1059 void
cr_substitutions(std::string & obj,UrlComponents & req_url)1060 cr_substitutions(std::string &obj, UrlComponents &req_url)
1061 {
1062   {
1063     auto path = req_url.path(false);
1064     TSDebug(MY_NAME, "x req_url.path: %*s %d", FMT_SV(path), static_cast<int>(path.size()));
1065     auto url = req_url.url(false);
1066     TSDebug(MY_NAME, "x req_url.url: %*s %d", FMT_SV(url), static_cast<int>(url.size()));
1067   }
1068 
1069   auto npos = std::string::npos;
1070   std::string tmp;
1071   std::size_t pos = 0;
1072   for (;;) {
1073     pos = obj.find('$', pos);
1074     if (npos == pos) {
1075       break;
1076     }
1077     std::string_view variable, value;
1078     switch (sub_lookup(obj.data() + pos + 1, static_cast<int>(obj.size()) - pos - 1)) {
1079     case sub_req_url_id: {
1080       variable = "$cr_req_url";
1081       value    = req_url.url(false);
1082     } break;
1083 
1084     case sub_req_purl_id: {
1085       variable = "$cr_req_purl";
1086       value    = req_url.url(true);
1087     } break;
1088 
1089     case sub_path_id: {
1090       variable = "$path";
1091       value    = req_url.path(false);
1092     } break;
1093 
1094     case sub_ppath_id: {
1095       variable = "$ppath";
1096       value    = req_url.path(true);
1097     } break;
1098 
1099     case sub_unmatched_path_id: {
1100       variable = "$unmatched_path";
1101       tmp      = unmatched_path(req_url, false);
1102       value    = tmp;
1103     } break;
1104 
1105     case sub_unmatched_ppath_id: {
1106       variable = "$unmatched_ppath";
1107       tmp      = unmatched_path(req_url, true);
1108       value    = tmp;
1109     } break;
1110 
1111     case sub_url_encode_id: {
1112       std::size_t bpos = pos + sizeof("cr_urlencode(") - 1;
1113       std::size_t epos = obj.find(')', bpos);
1114       if (npos == epos) {
1115         variable = "$";
1116         value    = variable;
1117       } else {
1118         variable = std::string_view(obj.data() + pos, epos + 1 - pos);
1119 
1120         tmp = obj.substr(bpos, epos - bpos);
1121         cr_substitutions(tmp, req_url);
1122         urlencode(tmp);
1123         value = tmp;
1124       }
1125     } break;
1126 
1127     default: {
1128       variable = "$";
1129       value    = variable;
1130 
1131     } break;
1132 
1133     } // end switch
1134 
1135     TSDebug(MY_NAME, "%*s => %*s", FMT_SV(variable), FMT_SV(value));
1136 
1137     obj.replace(pos, variable.size(), value);
1138 
1139     pos += value.size();
1140 
1141   } // end for (;;)
1142 }
1143 
1144 //----------------------------------------------------------------------------
1145 // called on each request
1146 // returns 0 on error or failure to match rules, 1 on a match
1147 TSRemapStatus
TSRemapDoRemap(void * ih,TSHttpTxn txnp,TSRemapRequestInfo * rri)1148 TSRemapDoRemap(void *ih, TSHttpTxn txnp, TSRemapRequestInfo *rri)
1149 {
1150   OpsQueue *ops       = static_cast<OpsQueue *>(ih);
1151   TSHttpStatus status = TS_HTTP_STATUS_NONE;
1152 
1153   UrlComponents req_url{rri, txnp};
1154 
1155   if (ops == (OpsQueue *)nullptr) {
1156     TSError("serious error with encountered while attempting to "
1157             "cookie_remap");
1158     TSDebug(MY_NAME, "serious error with encountered while attempting to remap");
1159     return TSREMAP_NO_REMAP;
1160   }
1161 
1162   // get any query params..we will append that to the answer (possibly)
1163   std::string client_req_query_params;
1164   auto query = req_url.query(false);
1165   if (!query.empty()) {
1166     client_req_query_params = "?";
1167     client_req_query_params += query;
1168   }
1169   TSDebug(MY_NAME, "Query Parameters: %s", client_req_query_params.c_str());
1170 
1171   std::string rewrite_to;
1172   char cookie_str[] = "Cookie";
1173   TSMLoc field      = TSMimeHdrFieldFind(rri->requestBufp, rri->requestHdrp, cookie_str, sizeof(cookie_str) - 1);
1174 
1175   // cookie header doesn't exist
1176   if (field == nullptr) {
1177     TSDebug(MY_NAME, "no cookie header");
1178     // return TSREMAP_NO_REMAP;
1179   }
1180 
1181   const char *cookie = nullptr;
1182   int cookie_len     = 0;
1183   if (field != nullptr) {
1184     cookie = TSMimeHdrFieldValueStringGet(rri->requestBufp, rri->requestHdrp, field, -1, &cookie_len);
1185   }
1186   std::string temp_cookie(cookie, cookie_len);
1187   CookieJar jar;
1188   jar.create(temp_cookie);
1189 
1190   for (auto &op : *ops) {
1191     TSDebug(MY_NAME, ">>> processing new operation");
1192     if (op->process(jar, rewrite_to, status, rri, req_url)) {
1193       cr_substitutions(rewrite_to, req_url);
1194 
1195       size_t pos = 7;                             // 7 because we want to ignore the // in
1196                                                   // http:// :)
1197       size_t tmp_pos = rewrite_to.find('?', pos); // we don't want to alter the query string
1198       do {
1199         pos = rewrite_to.find("//", pos);
1200         if (pos < tmp_pos) {
1201           rewrite_to.erase(pos, 1); // remove one '/'
1202         }
1203       } while (pos <= rewrite_to.length() && pos < tmp_pos);
1204 
1205       // Add Query Parameters if not already present
1206       if (!client_req_query_params.empty() && rewrite_to.find('?') == std::string::npos) {
1207         rewrite_to.append(client_req_query_params);
1208       }
1209 
1210       TSDebug(MY_NAME, "rewriting to: %s", rewrite_to.c_str());
1211 
1212       // Maybe set the return status
1213       if (status > TS_HTTP_STATUS_NONE) {
1214         TSDebug(MY_NAME, "Setting return status to %d", status);
1215         TSHttpTxnStatusSet(txnp, status);
1216         if ((status == TS_HTTP_STATUS_MOVED_PERMANENTLY) || (status == TS_HTTP_STATUS_MOVED_TEMPORARILY)) {
1217           if (rewrite_to.size() > 8192) {
1218             TSError("Redirect in target "
1219                     "URL too long");
1220             TSHttpTxnStatusSet(txnp, TS_HTTP_STATUS_REQUEST_URI_TOO_LONG);
1221           } else {
1222             const char *start = rewrite_to.c_str();
1223             int dest_len      = rewrite_to.size();
1224 
1225             if (TS_PARSE_ERROR == TSUrlParse(rri->requestBufp, rri->requestUrl, &start, start + dest_len)) {
1226               TSHttpTxnStatusSet(txnp, TS_HTTP_STATUS_INTERNAL_SERVER_ERROR);
1227               TSError("can't parse "
1228                       "substituted "
1229                       "URL string");
1230             } else {
1231               rri->redirect = 1;
1232             }
1233           }
1234         }
1235         if (field != nullptr) {
1236           TSHandleMLocRelease(rri->requestBufp, rri->requestHdrp, field);
1237         }
1238         if (rri->redirect) {
1239           return TSREMAP_DID_REMAP;
1240         } else {
1241           return TSREMAP_NO_REMAP;
1242         }
1243       }
1244 
1245       const char *start = rewrite_to.c_str();
1246 
1247       // set the new url
1248       if (TSUrlParse(rri->requestBufp, rri->requestUrl, &start, start + rewrite_to.length()) == TS_PARSE_ERROR) {
1249         TSHttpTxnStatusSet(txnp, TS_HTTP_STATUS_INTERNAL_SERVER_ERROR);
1250         TSError("can't parse substituted URL string");
1251         goto error;
1252       } else {
1253         if (field != nullptr) {
1254           TSHandleMLocRelease(rri->requestBufp, rri->requestHdrp, field);
1255         }
1256         return TSREMAP_DID_REMAP;
1257       }
1258 
1259     // Cleanup
1260     error:
1261       if (field != nullptr) {
1262         TSHandleMLocRelease(rri->requestBufp, rri->requestHdrp, field);
1263       }
1264       return TSREMAP_NO_REMAP;
1265     }
1266   }
1267 
1268   TSDebug(MY_NAME, "could not execute ANY of the cookie remap operations... "
1269                    "falling back to default in remap.config");
1270 
1271   if (field != nullptr) {
1272     TSHandleMLocRelease(rri->requestBufp, rri->requestHdrp, field);
1273   }
1274   return TSREMAP_NO_REMAP;
1275 }
1276 
1277 //----------------------------------------------------------------------------
1278 // unload
1279 void
TSRemapDeleteInstance(void * ih)1280 TSRemapDeleteInstance(void *ih)
1281 {
1282   OpsQueue *ops = static_cast<OpsQueue *>(ih);
1283 
1284   TSDebug(MY_NAME, "deleting loaded operations");
1285   for (auto &op : *ops) {
1286     delete op;
1287   }
1288 
1289   delete ops;
1290 
1291   return;
1292 }
1293