1 /** @file
2 
3   Implementation of Parent Proxy routing
4 
5   @section license License
6 
7   Licensed to the Apache Software Foundation (ASF) under one
8   or more contributor license agreements.  See the NOTICE file
9   distributed with this work for additional information
10   regarding copyright ownership.  The ASF licenses this file
11   to you under the Apache License, Version 2.0 (the
12   "License"); you may not use this file except in compliance
13   with the License.  You may obtain a copy of the License at
14 
15       http://www.apache.org/licenses/LICENSE-2.0
16 
17   Unless required by applicable law or agreed to in writing, software
18   distributed under the License is distributed on an "AS IS" BASIS,
19   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
20   See the License for the specific language governing permissions and
21   limitations under the License.
22  */
23 
24 #include "ParentSelection.h"
25 
26 void
markParentDown(ParentResult * result,unsigned int fail_threshold,unsigned int retry_time)27 ParentSelectionStrategy::markParentDown(ParentResult *result, unsigned int fail_threshold, unsigned int retry_time)
28 {
29   time_t now;
30   pRecord *pRec, *parents = result->rec->selection_strategy->getParents(result);
31   int new_fail_count = 0;
32 
33   //  Make sure that we are being called back with with a
34   //   result structure with a parent
35   ink_assert(result->result == PARENT_SPECIFIED);
36   if (result->result != PARENT_SPECIFIED) {
37     return;
38   }
39   // If we were set through the API we currently have not failover
40   //   so just return fail
41   if (result->is_api_result()) {
42     return;
43   }
44 
45   ink_assert((result->last_parent) < numParents(result));
46   pRec = (parents + result->last_parent);
47 
48   // If the parent has already been marked down, just increment
49   //   the failure count.  If this is the first mark down on a
50   //   parent we need to both set the failure time and set
51   //   count to one.  It's possible for the count and time get out
52   //   sync due there being no locks.  Therefore the code should
53   //   handle this condition.  If this was the result of a retry, we
54   //   must update move the failedAt timestamp to now so that we continue
55   //   negative cache the parent
56   if (pRec->failedAt.load() == 0 || result->retry == true) {
57     // Reread the current time.  We want this to be accurate since
58     //   it relates to how long the parent has been down.
59     now = time(nullptr);
60 
61     // Mark the parent failure time.
62     pRec->failedAt = now;
63 
64     // If this is clean mark down and not a failed retry, we
65     //   must set the count to reflect this
66     if (result->retry == false) {
67       new_fail_count = pRec->failCount = 1;
68     } else {
69       // this was a retry that failed, decrement the retriers count
70       if ((pRec->retriers--) < 0) {
71         pRec->retriers = 0;
72       }
73     }
74 
75     Note("Parent %s marked as down %s:%d", (result->retry) ? "retry" : "initially", pRec->hostname, pRec->port);
76 
77   } else {
78     int old_count = 0;
79     now           = time(nullptr);
80 
81     // if the last failure was outside the retry window, set the failcount to 1
82     // and failedAt to now.
83     if ((pRec->failedAt.load() + retry_time) < now) {
84       // coverity[check_return]
85       pRec->failCount = 1;
86       pRec->failedAt  = now;
87     } else {
88       old_count = pRec->failCount.fetch_add(1, std::memory_order_relaxed);
89     }
90 
91     Debug("parent_select", "Parent fail count increased to %d for %s:%d", old_count + 1, pRec->hostname, pRec->port);
92     new_fail_count = old_count + 1;
93   }
94 
95   if (new_fail_count > 0 && new_fail_count >= static_cast<int>(fail_threshold)) {
96     Note("Failure threshold met failcount:%d >= threshold:%d, http parent proxy %s:%d marked down", new_fail_count, fail_threshold,
97          pRec->hostname, pRec->port);
98     pRec->available = false;
99     Debug("parent_select", "Parent %s:%d marked unavailable, pRec->available=%d", pRec->hostname, pRec->port,
100           pRec->available.load());
101   }
102 }
103 
104 void
markParentUp(ParentResult * result)105 ParentSelectionStrategy::markParentUp(ParentResult *result)
106 {
107   pRecord *pRec, *parents = result->rec->selection_strategy->getParents(result);
108   int num_parents = result->rec->selection_strategy->numParents(result);
109 
110   //  Make sure that we are being called back with with a
111   //   result structure with a parent that is being retried
112   ink_release_assert(result->retry == true);
113   ink_assert(result->result == PARENT_SPECIFIED);
114   if (result->result != PARENT_SPECIFIED) {
115     return;
116   }
117   // If we were set through the API we currently have not failover
118   //   so just return fail
119   if (result->is_api_result()) {
120     ink_assert(0);
121     return;
122   }
123 
124   ink_assert((int)(result->last_parent) < num_parents);
125   pRec            = parents + result->last_parent;
126   pRec->available = true;
127 
128   pRec->failedAt = static_cast<time_t>(0);
129   int old_count  = pRec->failCount.exchange(0, std::memory_order_relaxed);
130   // a retry succeeded, just reset retriers
131   pRec->retriers = 0;
132 
133   if (old_count > 0) {
134     Note("http parent proxy %s:%d restored", pRec->hostname, pRec->port);
135   }
136 }
137