1 /*
2 Licensed to the Apache Software Foundation (ASF) under one
3 or more contributor license agreements. See the NOTICE file
4 distributed with this work for additional information
5 regarding copyright ownership. The ASF licenses this file
6 to you under the Apache License, Version 2.0 (the
7 "License"); you may not use this file except in compliance
8 with the License. You may obtain a copy of the License at
9
10 http://www.apache.org/licenses/LICENSE-2.0
11
12 Unless required by applicable law or agreed to in writing, software
13 distributed under the License is distributed on an "AS IS" BASIS,
14 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 See the License for the specific language governing permissions and
16 limitations under the License.
17 */
18 ////////////////////////////////////////////////////////////////////////////////
19 // cookie_remap: ATS plugin to do (simple) cookie based remap rules
20 // To use this plugin, configure a remap.config rule like
21 // map http://foo.com http://bar.com @plugin=.../libexec/cookie_remap.so
22 // @pparam=maps.reg
23
24 #include "cookiejar.h"
25 #include <ts/ts.h>
26
27 #include <pcre.h>
28 #include <ts/remap.h>
29 #include <yaml-cpp/yaml.h>
30
31 #include <string>
32 #include <vector>
33 #include <string_view>
34 #include <cstddef>
35 #include "hash.h"
36
37 #undef FMT_SV
38 #define FMT_SV(SV) static_cast<int>((SV).size()), (SV).data()
39
40 using namespace std;
41
42 #define MY_NAME "cookie_remap"
43
44 const int OVECCOUNT = 30; // We support $1 - $9 only, and this needs to be 3x that
45
46 class UrlComponents
47 {
48 public:
UrlComponents(TSRemapRequestInfo * rri,TSHttpTxn txn)49 UrlComponents(TSRemapRequestInfo *rri, TSHttpTxn txn) : _rri(rri), _txn(txn) {}
50
51 std::string const &
path(bool pre_remap)52 path(bool pre_remap)
53 {
54 if (_d[pre_remap].path_str.empty()) {
55 auto urlh = _get_url(pre_remap);
56 // based on RFC2396, matrix params are part of path segments so
57 // we will just
58 // append them to the path
59 _d[pre_remap].path_str = _get_url_comp(urlh, TSUrlPathGet);
60 auto matrix = _get_url_comp(urlh, TSUrlHttpParamsGet);
61 if (!matrix.empty()) {
62 _d[pre_remap].path_str.append(";", 1).append(matrix);
63 }
64 }
65 return _d[pre_remap].path_str;
66 }
67
68 std::string_view
query(bool pre_remap)69 query(bool pre_remap)
70 {
71 if (_d[pre_remap].query.empty()) {
72 _d[pre_remap].query = _get_url_comp(_get_url(pre_remap), TSUrlHttpQueryGet);
73 }
74 return _d[pre_remap].query;
75 }
76
77 std::string_view
from_path()78 from_path()
79 {
80 if (_from_path.empty()) {
81 _UrlHandle urlh{_rri->requestBufp, _rri->mapFromUrl};
82 _from_path = _get_url_comp(urlh, TSUrlPathGet);
83 }
84 return _from_path;
85 }
86
87 std::string_view
url(bool pre_remap)88 url(bool pre_remap)
89 {
90 if (_d[pre_remap].url.empty()) {
91 auto urlh = _get_url(pre_remap);
92 int length;
93 auto data = TSUrlStringGet(urlh.bufp, urlh.urlp, &length);
94 _d[pre_remap].url = std::string_view(data, length);
95 }
96 return _d[pre_remap].url;
97 }
98
99 // No copying/moving.
100 //
101 UrlComponents(UrlComponents const &) = delete;
102 UrlComponents &operator=(UrlComponents const &) = delete;
103
~UrlComponents()104 ~UrlComponents()
105 {
106 // Not calling TSHandleMLocRelease() for the URL TSMLoc pointers because it doesn't do anything.
107
108 if (_d[0].url.data() != nullptr) {
109 TSfree(const_cast<char *>(_d[0].url.data()));
110 }
111 if (_d[1].url.data() != nullptr) {
112 TSfree(const_cast<char *>(_d[1].url.data()));
113 }
114 }
115
116 private:
117 TSRemapRequestInfo *_rri;
118 TSHttpTxn _txn;
119
120 struct _UrlHandle {
121 TSMBuffer bufp = nullptr;
122 TSMLoc urlp;
123 };
124
125 // Buffer any data that's likely to be used more than once.
126
127 struct _Data {
128 _UrlHandle urlh;
129 std::string path_str;
130 std::string_view url;
131 std::string_view query;
132 };
133
134 // index 0 - remapped
135 // index 1 - pre-remap
136 //
137 _Data _d[2];
138
139 std::string_view _from_path;
140
141 _UrlHandle
_get_url(bool pre_remap)142 _get_url(bool pre_remap)
143 {
144 _UrlHandle h = _d[pre_remap].urlh;
145
146 if (!h.bufp) {
147 if (pre_remap) {
148 if (TSHttpTxnPristineUrlGet(_txn, &h.bufp, &h.urlp) != TS_SUCCESS) {
149 TSError("%s: Plugin is unable to get pristine url", MY_NAME);
150 return _UrlHandle();
151 }
152 } else {
153 h.bufp = _rri->requestBufp;
154 h.urlp = _rri->requestUrl;
155 }
156 _d[pre_remap].urlh = h;
157 }
158 return h;
159 }
160
161 static std::string_view
_get_url_comp(_UrlHandle urlh,char const * (* comp_func)(TSMBuffer,TSMLoc,int *))162 _get_url_comp(_UrlHandle urlh, char const *(*comp_func)(TSMBuffer, TSMLoc, int *))
163 {
164 int length;
165 auto data = comp_func(urlh.bufp, urlh.urlp, &length);
166 return std::string_view(data, length);
167 }
168 };
169
170 enum operation_type { UNKNOWN = -1, EXISTS = 1, NOTEXISTS, REGEXP, STRING, BUCKET };
171
172 enum target_type {
173 COOKIE = 1,
174 URI, // URI = PATH + QUERY
175 PRE_REMAP_URI,
176 UNKNOWN_TARGET
177 };
178
179 /***************************************************************************************
180 Decimal to Hex
181 converter
182
183 This is a template function which returns a char* array filled with hex digits
184 when
185 passed to it a number(can work as a decimal to hex conversion)and will work for
186 signed
187 and unsigned: char, short, and integer(long) type parameters passed to it.
188
189 Shortcomings:It won't work for decimal numbers because of presence of
190 bitshifting in its algorithm.
191
192 Arguments:
193 * _num is the number to convert to hex
194 * hdigits two-byte character array, will be populated with the hex number
195
196 ***************************************************************************************/
197
198 template <class type> // template usage to allow multiple types of parameters
199 void
dec_to_hex(type _num,char * hdigits)200 dec_to_hex(type _num, char *hdigits)
201 {
202 const char *hlookup = "0123456789ABCDEF"; // lookup table stores the hex digits into their
203 // corresponding index.
204
205 if (_num < 0) {
206 _num *= -1; // and make _num positive to clear(zero) the sign bit
207 }
208
209 char mask = 0x000f; // mask will clear(zero) out all the bits except lowest 4
210 // which represent a single hex digit
211
212 hdigits[1] = hlookup[mask & _num];
213 hdigits[0] = hlookup[mask & (_num >> 4)];
214
215 return;
216 }
217
218 void
urlencode(std::string & str)219 urlencode(std::string &str)
220 {
221 auto orig = str.size();
222 auto enc = orig;
223 for (auto c : str) {
224 if (!isalnum(c)) {
225 enc += 2;
226 }
227 }
228 if (enc == orig) {
229 // No changes needed.
230 return;
231 }
232 str.resize(enc);
233 while (orig--) {
234 if (!isalnum(str[orig])) {
235 enc -= 3;
236 dec_to_hex(str[orig], &(str[enc + 1]));
237 str[enc] = '%';
238 } else {
239 str[--enc] = str[orig];
240 }
241 }
242 }
243
244 //----------------------------------------------------------------------------
245 class subop
246 {
247 public:
subop()248 subop()
249 : cookie(""),
250 operation(""),
251
252 str_match(""),
253
254 bucket("")
255
256 {
257 TSDebug(MY_NAME, "subop constructor called");
258 }
259
~subop()260 ~subop()
261 {
262 TSDebug(MY_NAME, "subop destructor called");
263 if (regex) {
264 pcre_free(regex);
265 }
266
267 if (regex_extra) {
268 pcre_free(regex_extra);
269 }
270 }
271
272 bool
empty() const273 empty() const
274 {
275 return (cookie == "" && operation == "" && op_type == UNKNOWN);
276 }
277
278 void
setCookieName(const std::string & s)279 setCookieName(const std::string &s)
280 {
281 cookie = s;
282 }
283
284 const std::string &
getCookieName() const285 getCookieName() const
286 {
287 return cookie;
288 }
289
290 const std::string &
getOperation() const291 getOperation() const
292 {
293 return operation;
294 }
295
296 operation_type
getOpType() const297 getOpType() const
298 {
299 return op_type;
300 }
301
302 target_type
getTargetType() const303 getTargetType() const
304 {
305 return target;
306 }
307
308 void
setOperation(const std::string & s)309 setOperation(const std::string &s)
310 {
311 operation = s;
312
313 if (operation == "string") {
314 op_type = STRING;
315 }
316 if (operation == "regex") {
317 op_type = REGEXP;
318 }
319 if (operation == "exists") {
320 op_type = EXISTS;
321 }
322 if (operation == "not exists") {
323 op_type = NOTEXISTS;
324 }
325 if (operation == "bucket") {
326 op_type = BUCKET;
327 }
328 }
329
330 void
setTarget(const std::string & s)331 setTarget(const std::string &s)
332 {
333 if (s == "uri") {
334 target = URI;
335 } else if (s == "puri") {
336 target = PRE_REMAP_URI;
337 } else {
338 target = COOKIE;
339 }
340 }
341
342 void
setStringMatch(const std::string & s)343 setStringMatch(const std::string &s)
344 {
345 op_type = STRING;
346 str_match = s;
347 }
348
349 const std::string &
getStringMatch() const350 getStringMatch() const
351 {
352 return str_match;
353 }
354
355 void
setBucket(const std::string & s)356 setBucket(const std::string &s)
357 {
358 int start_pos = s.find('/');
359
360 op_type = BUCKET;
361 bucket = s;
362 how_many = atoi(bucket.substr(0, start_pos).c_str());
363 out_of = atoi(bucket.substr(start_pos + 1).c_str());
364 }
365
366 int
bucketGetTaking() const367 bucketGetTaking() const
368 {
369 return how_many;
370 }
371
372 int
bucketOutOf() const373 bucketOutOf() const
374 {
375 return out_of;
376 }
377
378 bool
setRegexMatch(const std::string & s)379 setRegexMatch(const std::string &s)
380 {
381 const char *error_comp = nullptr;
382 const char *error_study = nullptr;
383 int erroffset;
384
385 op_type = REGEXP;
386 regex_string = s;
387 regex = pcre_compile(regex_string.c_str(), 0, &error_comp, &erroffset, nullptr);
388
389 if (regex == nullptr) {
390 return false;
391 }
392 regex_extra = pcre_study(regex, 0, &error_study);
393 if ((regex_extra == nullptr) && (error_study != nullptr)) {
394 return false;
395 }
396
397 if (pcre_fullinfo(regex, regex_extra, PCRE_INFO_CAPTURECOUNT, ®ex_ccount) != 0) {
398 return false;
399 }
400
401 return true;
402 }
403
404 const std::string &
getRegexString() const405 getRegexString() const
406 {
407 return regex_string;
408 }
409
410 int
getRegexCcount() const411 getRegexCcount() const
412 {
413 return regex_ccount;
414 }
415
416 int
regexMatch(const char * str,int len,int ovector[]) const417 regexMatch(const char *str, int len, int ovector[]) const
418 {
419 return pcre_exec(regex, // the compiled pattern
420 regex_extra, // Extra data from study (maybe)
421 str, // the subject std::string
422 len, // the length of the subject
423 0, // start at offset 0 in the subject
424 0, // default options
425 ovector, // output vector for substring information
426 OVECCOUNT); // number of elements in the output vector
427 };
428
429 void
printSubOp() const430 printSubOp() const
431 {
432 TSDebug(MY_NAME, "\t+++subop+++");
433 TSDebug(MY_NAME, "\t\tcookie: %s", cookie.c_str());
434 TSDebug(MY_NAME, "\t\toperation: %s", operation.c_str());
435 if (str_match.size() > 0) {
436 TSDebug(MY_NAME, "\t\tmatching: %s", str_match.c_str());
437 }
438 if (regex) {
439 TSDebug(MY_NAME, "\t\tregex: %s", regex_string.c_str());
440 }
441 if (bucket.size() > 0) {
442 TSDebug(MY_NAME, "\t\tbucket: %s", bucket.c_str());
443 TSDebug(MY_NAME, "\t\ttaking: %d", how_many);
444 TSDebug(MY_NAME, "\t\tout of: %d", out_of);
445 }
446 }
447
448 private:
449 std::string cookie;
450 std::string operation;
451 enum operation_type op_type = UNKNOWN;
452 enum target_type target = UNKNOWN_TARGET;
453
454 std::string str_match;
455
456 pcre *regex = nullptr;
457 pcre_extra *regex_extra = nullptr;
458 std::string regex_string;
459 int regex_ccount = 0;
460
461 std::string bucket;
462 unsigned int how_many = 0;
463 unsigned int out_of = 0;
464 };
465
466 using SubOpQueue = std::vector<const subop *>;
467
468 //----------------------------------------------------------------------------
469 class op
470 {
471 public:
op()472 op() { TSDebug(MY_NAME, "op constructor called"); }
473
~op()474 ~op()
475 {
476 TSDebug(MY_NAME, "op destructor called");
477 for (auto &subop : subops) {
478 delete subop;
479 }
480 }
481
482 void
addSubOp(const subop * s)483 addSubOp(const subop *s)
484 {
485 subops.push_back(s);
486 }
487
488 void
setSendTo(const std::string & s)489 setSendTo(const std::string &s)
490 {
491 sendto = s;
492 }
493
494 const std::string &
getSendTo() const495 getSendTo() const
496 {
497 return sendto;
498 }
499
500 void
setElseSendTo(const std::string & s)501 setElseSendTo(const std::string &s)
502 {
503 else_sendto = s;
504 }
505
506 void
setStatus(const std::string & s)507 setStatus(const std::string &s)
508 {
509 if (else_sendto.size() > 0) {
510 else_status = static_cast<TSHttpStatus>(atoi(s.c_str()));
511 } else {
512 status = static_cast<TSHttpStatus>(atoi(s.c_str()));
513 }
514 }
515
516 void
setElseStatus(const std::string & s)517 setElseStatus(const std::string &s)
518 {
519 else_status = static_cast<TSHttpStatus>(atoi(s.c_str()));
520 }
521
522 void
printOp() const523 printOp() const
524 {
525 TSDebug(MY_NAME, "++++operation++++");
526 TSDebug(MY_NAME, "sending to: %s", sendto.c_str());
527 TSDebug(MY_NAME, "if these operations match: ");
528
529 for (auto subop : subops) {
530 subop->printSubOp();
531 }
532 if (else_sendto.size() > 0) {
533 TSDebug(MY_NAME, "else: %s", else_sendto.c_str());
534 }
535 }
536
537 bool
process(CookieJar & jar,std::string & dest,TSHttpStatus & retstat,TSRemapRequestInfo * rri,UrlComponents & req_url) const538 process(CookieJar &jar, std::string &dest, TSHttpStatus &retstat, TSRemapRequestInfo *rri, UrlComponents &req_url) const
539 {
540 if (sendto == "") {
541 return false; // guessing every operation must have a
542 // sendto url???
543 }
544
545 int retval = 1;
546 bool cookie_found = false;
547 std::string c;
548 std::string cookie_data;
549 std::string object_name; // name of the thing being processed,
550 // cookie, or
551 // request url
552
553 TSDebug(MY_NAME, "starting to process a new operation");
554
555 for (auto subop : subops) {
556 // subop* s = *it;
557 int subop_type = subop->getOpType();
558 target_type target = subop->getTargetType();
559
560 c = subop->getCookieName();
561 if (c.length()) {
562 TSDebug(MY_NAME, "processing cookie: %s", c.c_str());
563
564 size_t period_pos = c.find_first_of('.');
565
566 if (period_pos == std::string::npos) { // not a sublevel
567 // cookie name
568 TSDebug(MY_NAME, "processing non-sublevel cookie");
569
570 cookie_found = jar.get_full(c, cookie_data);
571 TSDebug(MY_NAME, "full cookie: %s", cookie_data.c_str());
572 object_name = c;
573 } else { // is in the format FOO.BAR
574 std::string cookie_main = c.substr(0, period_pos);
575 std::string cookie_subkey = c.substr(period_pos + 1);
576
577 TSDebug(MY_NAME, "processing sublevel cookie");
578 TSDebug(MY_NAME, "c key: %s", cookie_main.c_str());
579 TSDebug(MY_NAME, "c subkey: %s", cookie_subkey.c_str());
580
581 cookie_found = jar.get_part(cookie_main, cookie_subkey, cookie_data);
582 object_name = cookie_main + " . " + cookie_subkey;
583 }
584 // invariant: cookie name is in object_name and
585 // cookie data (if any) is
586 // in cookie_data
587
588 if (cookie_found == false) { // cookie name or sub-key not found
589 // inside cookies
590 if (subop_type == NOTEXISTS) {
591 TSDebug(MY_NAME,
592 "cookie %s was not "
593 "found (and we wanted "
594 "that)",
595 object_name.c_str());
596 continue; // we can short
597 // circuit more
598 // testing
599 }
600 TSDebug(MY_NAME, "cookie %s was not found", object_name.c_str());
601 retval &= 0;
602 break;
603 } else {
604 // cookie exists
605 if (subop_type == NOTEXISTS) { // we found the cookie
606 // but are asking
607 // for non existence
608 TSDebug(MY_NAME,
609 "cookie %s was found, "
610 "but operation "
611 "requires "
612 "non-existence",
613 object_name.c_str());
614 retval &= 0;
615 break;
616 }
617
618 if (subop_type == EXISTS) {
619 TSDebug(MY_NAME, "cookie %s was found", object_name.c_str()); // got what
620 // we were
621 // looking
622 // for
623 continue; // we can short
624 // circuit more
625 // testing
626 }
627 } // handled EXISTS / NOTEXISTS subops
628
629 TSDebug(MY_NAME, "processing cookie data: \"%s\"", cookie_data.c_str());
630 } else if (target != PRE_REMAP_URI) {
631 target = URI;
632 }
633
634 // INVARIANT: we now have the data from the cookie (if
635 // any) inside
636 // cookie_data and we are here because we need
637 // to continue processing this suboperation in some way
638
639 if (!rri) { // too dangerous to continue without the
640 // rri; hopefully that
641 // never happens
642 TSDebug(MY_NAME, "request info structure is "
643 "empty; can't continue "
644 "processing this subop");
645 retval &= 0;
646 break;
647 }
648
649 // If the user has specified a cookie in his
650 // suboperation, use the cookie
651 // data for matching;
652 // otherwise, use the request uri (path + query)
653 std::string request_uri; // only set the value if we
654 // need it; we might
655 // match the cookie data instead
656 bool use_url = (target == URI) || (target == PRE_REMAP_URI);
657 const std::string &string_to_match(use_url ? request_uri : cookie_data);
658 if (use_url) {
659 request_uri = req_url.path(target == PRE_REMAP_URI);
660 TSDebug(MY_NAME, "process req_url.path = %s", request_uri.c_str());
661 if (request_uri.length() && request_uri[0] != '/') {
662 request_uri.insert(0, 1, '/');
663 }
664 auto query = req_url.query(target == PRE_REMAP_URI);
665 if (query.size() > 0) {
666 request_uri += '?';
667 request_uri += query;
668 }
669 object_name = "request uri";
670 }
671
672 // invariant: we've decided at this point what string
673 // we'll match, if we
674 // do matching
675
676 // OPERATION::string matching
677 if (subop_type == STRING) {
678 if (string_to_match == subop->getStringMatch()) {
679 TSDebug(MY_NAME, "string match succeeded");
680 continue;
681 } else {
682 TSDebug(MY_NAME, "string match failed");
683 retval &= 0;
684 break;
685 }
686 }
687
688 // OPERATION::regex matching
689 if (subop_type == REGEXP) {
690 int ovector[OVECCOUNT];
691 int ret = subop->regexMatch(string_to_match.c_str(), string_to_match.length(), ovector);
692
693 if (ret >= 0) {
694 std::string::size_type pos = sendto.find('$');
695 std::string::size_type ppos = 0;
696
697 dest.erase(); // we only reset dest if
698 // there is a successful
699 // regex
700 // match
701 dest.reserve(sendto.size() * 2); // Wild guess at this
702 // time ... is
703 // sucks we can't precalculate this
704 // like regex_remap.
705
706 TSDebug(MY_NAME, "found %d matches", ret);
707 TSDebug(MY_NAME,
708 "successful regex "
709 "match of: %s with %s "
710 "rewriting string: %s",
711 string_to_match.c_str(), subop->getRegexString().c_str(), sendto.c_str());
712
713 // replace the $(1-9) in the sendto url
714 // as necessary
715 const size_t LAST_IDX_TO_SEARCH(sendto.length() - 2); // otherwise the below loop can
716 // access "sendto" out of range
717 while (pos <= LAST_IDX_TO_SEARCH) {
718 if (isdigit(sendto[pos + 1])) {
719 int ix = sendto[pos + 1] - '0';
720
721 if (ix <= subop->getRegexCcount()) { // Just skip an illegal regex group
722 dest += sendto.substr(ppos, pos - ppos);
723 dest += string_to_match.substr(ovector[ix * 2], ovector[ix * 2 + 1] - ovector[ix * 2]);
724 ppos = pos + 2;
725 } else {
726 TSDebug(MY_NAME,
727 "bad "
728 "rewriting "
729 "string, "
730 "for group "
731 "%d: %s",
732 ix, sendto.c_str());
733 }
734 }
735 pos = sendto.find('$', pos + 1);
736 }
737 dest += sendto.substr(ppos);
738 continue; // next subop, please
739 } else {
740 TSDebug(MY_NAME,
741 "could not match "
742 "regular expression "
743 "%s to %s",
744 subop->getRegexString().c_str(), string_to_match.c_str());
745 retval &= 0;
746 break;
747 }
748 }
749
750 // OPERATION::bucket ranges
751 if (subop_type == BUCKET) {
752 unsigned int taking = subop->bucketGetTaking();
753 unsigned int out_of = subop->bucketOutOf();
754
755 uint32_t hash;
756
757 if (taking == 0 || out_of == 0) {
758 TSDebug(MY_NAME,
759 "taking %d out of %d "
760 "makes no sense?!",
761 taking, out_of);
762 retval &= 0;
763 break;
764 }
765
766 hash = hash_fnv32_buckets(cookie_data.c_str(), cookie_data.size(), out_of);
767 TSDebug(MY_NAME,
768 "we hashed this to bucket: %u "
769 "taking: %u out of: %u",
770 hash, taking, out_of);
771
772 if (hash < taking) {
773 TSDebug(MY_NAME, "we hashed in the range, yay!");
774 continue; // we hashed in the range
775 } else {
776 TSDebug(MY_NAME, "we didn't hash in the "
777 "range requested, so "
778 "sad");
779 retval &= 0;
780 break;
781 }
782 }
783 }
784
785 if (retval == 1) {
786 if (dest.size() == 0) { // Unless already set by one of
787 // the operators (e.g. regex)
788 dest = sendto;
789 }
790 if (status > 0) {
791 retstat = status;
792 }
793 return true;
794 } else if (else_sendto.size() > 0 && retval == 0) {
795 dest = else_sendto;
796 if (else_status > 0) {
797 retstat = else_status;
798 }
799 return true;
800 } else {
801 dest = "";
802 return false;
803 }
804 }
805
806 private:
807 SubOpQueue subops{};
808 std::string sendto{""};
809 std::string else_sendto{""};
810 TSHttpStatus status = TS_HTTP_STATUS_NONE;
811 TSHttpStatus else_status = TS_HTTP_STATUS_NONE;
812 };
813
814 using StringPair = std::pair<std::string, std::string>;
815 using OpMap = std::vector<StringPair>;
816
817 //----------------------------------------------------------------------------
818 static bool
build_op(op & o,OpMap const & q)819 build_op(op &o, OpMap const &q)
820 {
821 subop *sub = new subop();
822
823 // loop through the array of key->value pairs
824 for (auto const &pr : q) {
825 std::string const &key = pr.first;
826 std::string const &val = pr.second;
827
828 TSDebug(MY_NAME, "build_op: key=%s val=%s", key.c_str(), val.c_str());
829
830 if (key == "cookie") {
831 if (!sub->empty()) {
832 TSDebug(MY_NAME, "ERROR: you need to define a connector");
833 goto error;
834 }
835 sub->setCookieName(val);
836 }
837
838 if (key == "sendto" || key == "url") {
839 o.setSendTo(val);
840 }
841
842 if (key == "else") {
843 o.setElseSendTo(val);
844 }
845
846 if (key == "status") {
847 o.setStatus(val);
848 }
849
850 if (key == "operation") {
851 sub->setOperation(val);
852 }
853
854 if (key == "target") {
855 sub->setTarget(val);
856 }
857
858 if (key == "match") {
859 sub->setStringMatch(val);
860 }
861
862 if (key == "regex") {
863 bool ret = sub->setRegexMatch(val);
864
865 if (!ret) {
866 goto error;
867 }
868 }
869
870 if (key == "bucket" || key == "hash") {
871 sub->setBucket(val);
872 }
873
874 if (key == "connector") {
875 o.addSubOp(sub);
876 sub = new subop();
877 }
878 }
879
880 o.addSubOp(sub);
881 return true;
882
883 error:
884 TSDebug(MY_NAME, "error building operation");
885 return false;
886 }
887
888 using OpsQueue = std::vector<const op *>;
889
890 //----------------------------------------------------------------------------
891 // init
892 TSReturnCode
TSRemapInit(TSRemapInterface * api_info,char * errbuf,int errbuf_size)893 TSRemapInit(TSRemapInterface *api_info, char *errbuf, int errbuf_size)
894 {
895 return TS_SUCCESS;
896 }
897
898 //----------------------------------------------------------------------------
899 // initialization of structures from config parameters
900 TSReturnCode
TSRemapNewInstance(int argc,char * argv[],void ** ih,char * errbuf,int errbuf_size)901 TSRemapNewInstance(int argc, char *argv[], void **ih, char *errbuf, int errbuf_size)
902 {
903 if (argc != 3) {
904 TSError("arguments not equal to 3: %d", argc);
905 TSDebug(MY_NAME, "arguments not equal to 3: %d", argc);
906 return TS_ERROR;
907 }
908
909 std::string filename(argv[2]);
910 try {
911 YAML::Node config = YAML::LoadFile(filename);
912
913 std::unique_ptr<OpsQueue> ops(new OpsQueue);
914 OpMap op_data;
915
916 for (YAML::const_iterator it = config.begin(); it != config.end(); ++it) {
917 const string &name = it->first.as<std::string>();
918 YAML::NodeType::value type = it->second.Type();
919
920 if (name != "op" || type != YAML::NodeType::Map) {
921 const string reason = "Top level nodes must be named op and be of type map";
922 TSError("Invalid YAML Configuration format for cookie_remap: %s, reason: %s", filename.c_str(), reason.c_str());
923 return TS_ERROR;
924 }
925
926 for (YAML::const_iterator it2 = it->second.begin(); it2 != it->second.end(); ++it2) {
927 const YAML::Node first = it2->first;
928 const YAML::Node second = it2->second;
929
930 if (second.IsScalar() == false) {
931 const string reason = "All op nodes must be of type scalar";
932 TSError("Invalid YAML Configuration format for cookie_remap: %s, reason: %s", filename.c_str(), reason.c_str());
933 return TS_ERROR;
934 }
935
936 const string &key = first.as<std::string>();
937 const string &value = second.as<std::string>();
938 op_data.emplace_back(key, value);
939 }
940
941 if (op_data.size()) {
942 op *o = new op();
943 if (!build_op(*o, op_data)) {
944 delete o;
945
946 TSError("building operation, check configuration file: %s", filename.c_str());
947 return TS_ERROR;
948 } else {
949 ops->push_back(o);
950 }
951 o->printOp();
952 op_data.clear();
953 }
954 }
955
956 TSDebug(MY_NAME, "# of ops: %d", static_cast<int>(ops->size()));
957 *ih = static_cast<void *>(ops.release());
958 } catch (const YAML::Exception &e) {
959 TSError("YAML::Exception %s when parsing YAML config file %s for cookie_remap", e.what(), filename.c_str());
960 return TS_ERROR;
961 }
962
963 return TS_SUCCESS;
964 }
965
966 namespace
967 {
968 std::string
unmatched_path(UrlComponents & req_url,bool pre_remap)969 unmatched_path(UrlComponents &req_url, bool pre_remap)
970 {
971 std::string path = req_url.path(pre_remap);
972 std::string_view from_path = req_url.from_path();
973
974 std::size_t pos = path.find(from_path);
975 if (pos != std::string::npos) {
976 path.erase(pos, from_path.size());
977 }
978 TSDebug(MY_NAME, "from_path: %*s", FMT_SV(from_path));
979 TSDebug(MY_NAME, "%s: %s", pre_remap ? "unmatched_ppath" : "unmatched_path", path.c_str());
980
981 return path;
982 }
983
984 int const sub_req_url_id = 0;
985 int const sub_req_purl_id = -1;
986 int const sub_path_id = -2;
987 int const sub_ppath_id = -3;
988 int const sub_unmatched_path_id = -4;
989 int const sub_unmatched_ppath_id = -5;
990 int const sub_url_encode_id = -6;
991
992 struct CompNext {
993 std::string_view const comp;
994 int const *const next;
995
CompNext__anon854eb6790111::CompNext996 CompNext(std::string_view p, int const *n) : comp(p), next(n) {}
997 };
998
999 struct {
1000 int count = 2;
1001 CompNext o1{"ath", &sub_unmatched_path_id};
1002 CompNext o2{"path", &sub_unmatched_ppath_id};
1003 } const sub_unmatched;
1004
1005 struct {
1006 int count = 2;
1007 CompNext o1{"ath", &sub_path_id};
1008 CompNext o2{"path", &sub_ppath_id};
1009 } const sub_p;
1010
1011 struct {
1012 int count = 2;
1013 CompNext o1{"url", &sub_req_url_id};
1014 CompNext o2{"purl", &sub_req_purl_id};
1015 } const sub_cr_req;
1016
1017 struct {
1018 int count = 2;
1019 CompNext o1{"req_", &sub_cr_req.count};
1020 CompNext o2{"urlencode(", &sub_url_encode_id};
1021 } const sub_cr;
1022
1023 struct {
1024 int count = 3;
1025 CompNext o1{"cr_", &sub_cr.count};
1026 CompNext o2{"p", &sub_p.count};
1027 CompNext o3{"unmatched_p", &sub_unmatched.count};
1028 } const sub;
1029
1030 int
sub_lookup(char const * targ,int targ_len)1031 sub_lookup(char const *targ, int targ_len)
1032 {
1033 int count = sub.count;
1034 auto opt = &sub.o1;
1035 for (;;) {
1036 while ((targ_len < static_cast<int>(opt->comp.size())) || (std::string_view(targ, opt->comp.size()) != opt->comp)) {
1037 if (!--count) {
1038 return 1; // Failed lookup, return some positive number.
1039 }
1040 ++opt;
1041 }
1042 count = *opt->next;
1043 if (count <= 0) {
1044 break;
1045 }
1046 targ += opt->comp.size();
1047 targ_len -= opt->comp.size();
1048 opt = reinterpret_cast<CompNext const *>(reinterpret_cast<char const *>(opt->next) + offsetof(decltype(sub), o1));
1049 }
1050 return count;
1051 }
1052
1053 } // end anonymous namespace
1054
1055 //----------------------------------------------------------------------------
1056 // called whenever we need to perform substitutions on a string; used to replace
1057 // things like
1058 // $path, $ppath, $unmatched_path, $unmatched_ppath, $cr_req_url, $cr_req_purl, and $cr_url_encode
1059 void
cr_substitutions(std::string & obj,UrlComponents & req_url)1060 cr_substitutions(std::string &obj, UrlComponents &req_url)
1061 {
1062 {
1063 auto path = req_url.path(false);
1064 TSDebug(MY_NAME, "x req_url.path: %*s %d", FMT_SV(path), static_cast<int>(path.size()));
1065 auto url = req_url.url(false);
1066 TSDebug(MY_NAME, "x req_url.url: %*s %d", FMT_SV(url), static_cast<int>(url.size()));
1067 }
1068
1069 auto npos = std::string::npos;
1070 std::string tmp;
1071 std::size_t pos = 0;
1072 for (;;) {
1073 pos = obj.find('$', pos);
1074 if (npos == pos) {
1075 break;
1076 }
1077 std::string_view variable, value;
1078 switch (sub_lookup(obj.data() + pos + 1, static_cast<int>(obj.size()) - pos - 1)) {
1079 case sub_req_url_id: {
1080 variable = "$cr_req_url";
1081 value = req_url.url(false);
1082 } break;
1083
1084 case sub_req_purl_id: {
1085 variable = "$cr_req_purl";
1086 value = req_url.url(true);
1087 } break;
1088
1089 case sub_path_id: {
1090 variable = "$path";
1091 value = req_url.path(false);
1092 } break;
1093
1094 case sub_ppath_id: {
1095 variable = "$ppath";
1096 value = req_url.path(true);
1097 } break;
1098
1099 case sub_unmatched_path_id: {
1100 variable = "$unmatched_path";
1101 tmp = unmatched_path(req_url, false);
1102 value = tmp;
1103 } break;
1104
1105 case sub_unmatched_ppath_id: {
1106 variable = "$unmatched_ppath";
1107 tmp = unmatched_path(req_url, true);
1108 value = tmp;
1109 } break;
1110
1111 case sub_url_encode_id: {
1112 std::size_t bpos = pos + sizeof("cr_urlencode(") - 1;
1113 std::size_t epos = obj.find(')', bpos);
1114 if (npos == epos) {
1115 variable = "$";
1116 value = variable;
1117 } else {
1118 variable = std::string_view(obj.data() + pos, epos + 1 - pos);
1119
1120 tmp = obj.substr(bpos, epos - bpos);
1121 cr_substitutions(tmp, req_url);
1122 urlencode(tmp);
1123 value = tmp;
1124 }
1125 } break;
1126
1127 default: {
1128 variable = "$";
1129 value = variable;
1130
1131 } break;
1132
1133 } // end switch
1134
1135 TSDebug(MY_NAME, "%*s => %*s", FMT_SV(variable), FMT_SV(value));
1136
1137 obj.replace(pos, variable.size(), value);
1138
1139 pos += value.size();
1140
1141 } // end for (;;)
1142 }
1143
1144 //----------------------------------------------------------------------------
1145 // called on each request
1146 // returns 0 on error or failure to match rules, 1 on a match
1147 TSRemapStatus
TSRemapDoRemap(void * ih,TSHttpTxn txnp,TSRemapRequestInfo * rri)1148 TSRemapDoRemap(void *ih, TSHttpTxn txnp, TSRemapRequestInfo *rri)
1149 {
1150 OpsQueue *ops = static_cast<OpsQueue *>(ih);
1151 TSHttpStatus status = TS_HTTP_STATUS_NONE;
1152
1153 UrlComponents req_url{rri, txnp};
1154
1155 if (ops == (OpsQueue *)nullptr) {
1156 TSError("serious error with encountered while attempting to "
1157 "cookie_remap");
1158 TSDebug(MY_NAME, "serious error with encountered while attempting to remap");
1159 return TSREMAP_NO_REMAP;
1160 }
1161
1162 // get any query params..we will append that to the answer (possibly)
1163 std::string client_req_query_params;
1164 auto query = req_url.query(false);
1165 if (!query.empty()) {
1166 client_req_query_params = "?";
1167 client_req_query_params += query;
1168 }
1169 TSDebug(MY_NAME, "Query Parameters: %s", client_req_query_params.c_str());
1170
1171 std::string rewrite_to;
1172 char cookie_str[] = "Cookie";
1173 TSMLoc field = TSMimeHdrFieldFind(rri->requestBufp, rri->requestHdrp, cookie_str, sizeof(cookie_str) - 1);
1174
1175 // cookie header doesn't exist
1176 if (field == nullptr) {
1177 TSDebug(MY_NAME, "no cookie header");
1178 // return TSREMAP_NO_REMAP;
1179 }
1180
1181 const char *cookie = nullptr;
1182 int cookie_len = 0;
1183 if (field != nullptr) {
1184 cookie = TSMimeHdrFieldValueStringGet(rri->requestBufp, rri->requestHdrp, field, -1, &cookie_len);
1185 }
1186 std::string temp_cookie(cookie, cookie_len);
1187 CookieJar jar;
1188 jar.create(temp_cookie);
1189
1190 for (auto &op : *ops) {
1191 TSDebug(MY_NAME, ">>> processing new operation");
1192 if (op->process(jar, rewrite_to, status, rri, req_url)) {
1193 cr_substitutions(rewrite_to, req_url);
1194
1195 size_t pos = 7; // 7 because we want to ignore the // in
1196 // http:// :)
1197 size_t tmp_pos = rewrite_to.find('?', pos); // we don't want to alter the query string
1198 do {
1199 pos = rewrite_to.find("//", pos);
1200 if (pos < tmp_pos) {
1201 rewrite_to.erase(pos, 1); // remove one '/'
1202 }
1203 } while (pos <= rewrite_to.length() && pos < tmp_pos);
1204
1205 // Add Query Parameters if not already present
1206 if (!client_req_query_params.empty() && rewrite_to.find('?') == std::string::npos) {
1207 rewrite_to.append(client_req_query_params);
1208 }
1209
1210 TSDebug(MY_NAME, "rewriting to: %s", rewrite_to.c_str());
1211
1212 // Maybe set the return status
1213 if (status > TS_HTTP_STATUS_NONE) {
1214 TSDebug(MY_NAME, "Setting return status to %d", status);
1215 TSHttpTxnStatusSet(txnp, status);
1216 if ((status == TS_HTTP_STATUS_MOVED_PERMANENTLY) || (status == TS_HTTP_STATUS_MOVED_TEMPORARILY)) {
1217 if (rewrite_to.size() > 8192) {
1218 TSError("Redirect in target "
1219 "URL too long");
1220 TSHttpTxnStatusSet(txnp, TS_HTTP_STATUS_REQUEST_URI_TOO_LONG);
1221 } else {
1222 const char *start = rewrite_to.c_str();
1223 int dest_len = rewrite_to.size();
1224
1225 if (TS_PARSE_ERROR == TSUrlParse(rri->requestBufp, rri->requestUrl, &start, start + dest_len)) {
1226 TSHttpTxnStatusSet(txnp, TS_HTTP_STATUS_INTERNAL_SERVER_ERROR);
1227 TSError("can't parse "
1228 "substituted "
1229 "URL string");
1230 } else {
1231 rri->redirect = 1;
1232 }
1233 }
1234 }
1235 if (field != nullptr) {
1236 TSHandleMLocRelease(rri->requestBufp, rri->requestHdrp, field);
1237 }
1238 if (rri->redirect) {
1239 return TSREMAP_DID_REMAP;
1240 } else {
1241 return TSREMAP_NO_REMAP;
1242 }
1243 }
1244
1245 const char *start = rewrite_to.c_str();
1246
1247 // set the new url
1248 if (TSUrlParse(rri->requestBufp, rri->requestUrl, &start, start + rewrite_to.length()) == TS_PARSE_ERROR) {
1249 TSHttpTxnStatusSet(txnp, TS_HTTP_STATUS_INTERNAL_SERVER_ERROR);
1250 TSError("can't parse substituted URL string");
1251 goto error;
1252 } else {
1253 if (field != nullptr) {
1254 TSHandleMLocRelease(rri->requestBufp, rri->requestHdrp, field);
1255 }
1256 return TSREMAP_DID_REMAP;
1257 }
1258
1259 // Cleanup
1260 error:
1261 if (field != nullptr) {
1262 TSHandleMLocRelease(rri->requestBufp, rri->requestHdrp, field);
1263 }
1264 return TSREMAP_NO_REMAP;
1265 }
1266 }
1267
1268 TSDebug(MY_NAME, "could not execute ANY of the cookie remap operations... "
1269 "falling back to default in remap.config");
1270
1271 if (field != nullptr) {
1272 TSHandleMLocRelease(rri->requestBufp, rri->requestHdrp, field);
1273 }
1274 return TSREMAP_NO_REMAP;
1275 }
1276
1277 //----------------------------------------------------------------------------
1278 // unload
1279 void
TSRemapDeleteInstance(void * ih)1280 TSRemapDeleteInstance(void *ih)
1281 {
1282 OpsQueue *ops = static_cast<OpsQueue *>(ih);
1283
1284 TSDebug(MY_NAME, "deleting loaded operations");
1285 for (auto &op : *ops) {
1286 delete op;
1287 }
1288
1289 delete ops;
1290
1291 return;
1292 }
1293