1 
2 /**
3  *    Copyright (C) 2018-present MongoDB, Inc.
4  *
5  *    This program is free software: you can redistribute it and/or modify
6  *    it under the terms of the Server Side Public License, version 1,
7  *    as published by MongoDB, Inc.
8  *
9  *    This program is distributed in the hope that it will be useful,
10  *    but WITHOUT ANY WARRANTY; without even the implied warranty of
11  *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  *    Server Side Public License for more details.
13  *
14  *    You should have received a copy of the Server Side Public License
15  *    along with this program. If not, see
16  *    <http://www.mongodb.com/licensing/server-side-public-license>.
17  *
18  *    As a special exception, the copyright holders give permission to link the
19  *    code of portions of this program with the OpenSSL library under certain
20  *    conditions as described in each individual source file and distribute
21  *    linked combinations including the program with the OpenSSL library. You
22  *    must comply with the Server Side Public License in all respects for
23  *    all of the code used other than as permitted herein. If you modify file(s)
24  *    with this exception, you may extend this exception to your version of the
25  *    file(s), but you are not obligated to do so. If you do not wish to do so,
26  *    delete this exception statement from your version. If you delete this
27  *    exception statement from all source files in the program, then also delete
28  *    it in the license file.
29  */
30 
31 #pragma once
32 
33 #include <cstdlib>
34 #include <initializer_list>
35 #include <memory>
36 
37 #include "mongo/base/disallow_copying.h"
38 #include "mongo/base/error_codes.h"
39 #include "mongo/executor/task_executor.h"
40 #include "mongo/stdx/condition_variable.h"
41 #include "mongo/stdx/mutex.h"
42 #include "mongo/util/time_support.h"
43 
44 namespace mongo {
45 
46 /**
47  * Schedules a remote command request. On receiving a response from task executor (or remote
48  * server), decides if the response should be forwarded to the "_callback" provided in the
49  * constructor based on the retry policy.
50  *
51  * If the command is successful or has been canceled (either by calling cancel() or canceled by
52  * the task executor on shutdown), the response is forwarded immediately to "_callback" and the
53  * scheduler becomes inactive.
54  *
55  * Otherwise, the retry policy (specified at construction) is used to decide if we should
56  * resubmit the remote command request. The retry policy is defined by:
57  *     - maximum number of times to run the remote command;
58  *     - maximum elapsed time of all failed remote command responses (requires SERVER-24067);
59  *     - list of error codes, if present in the response, should stop the scheduler.
60  */
61 class RemoteCommandRetryScheduler {
62     MONGO_DISALLOW_COPYING(RemoteCommandRetryScheduler);
63 
64 public:
65     class RetryPolicy;
66 
67     /**
68      * List of not master error codes.
69      */
70     static const std::initializer_list<ErrorCodes::Error> kNotMasterErrors;
71 
72     /**
73      * List of retriable error codes.
74      */
75     static const std::initializer_list<ErrorCodes::Error> kAllRetriableErrors;
76 
77     /**
78      * Generates a retry policy that will send the remote command request to the source at most
79      * once.
80      */
81     static std::unique_ptr<RetryPolicy> makeNoRetryPolicy();
82 
83     /**
84      * Creates a retry policy that will send the remote command request at most "maxAttempts".
85      * This policy will also direct the scheduler to stop retrying if it encounters any of the
86      * errors in "nonRetryableErrors".
87      * (Requires SERVER-24067) The scheduler will also stop retrying if the total elapsed time
88      * of all failed requests exceeds "maxResponseElapsedTotal".
89      */
90     static std::unique_ptr<RetryPolicy> makeRetryPolicy(
91         std::size_t maxAttempts,
92         Milliseconds maxResponseElapsedTotal,
93         const std::initializer_list<ErrorCodes::Error>& retryableErrors);
94 
95     /**
96      * Creates scheduler but does not schedule any remote command request.
97      */
98     RemoteCommandRetryScheduler(executor::TaskExecutor* executor,
99                                 const executor::RemoteCommandRequest& request,
100                                 const executor::TaskExecutor::RemoteCommandCallbackFn& callback,
101                                 std::unique_ptr<RetryPolicy> retryPolicy);
102 
103     virtual ~RemoteCommandRetryScheduler();
104 
105     /**
106      * Returns true if we have scheduled a remote command and are waiting for the response.
107      */
108     bool isActive() const;
109     bool _isActive_inlock() const;
110 
111     /**
112      * Schedules remote command request.
113      */
114     Status startup();
115 
116     /**
117      * Cancels scheduled remote command requests.
118      * Returns immediately if the scheduler is not active.
119      * It is fine to call this multiple times.
120      */
121     void shutdown();
122 
123     /**
124      * Waits until the scheduler is inactive.
125      * It is fine to call this multiple times.
126      */
127     void join();
128 
129     std::string toString() const;
130 
131 private:
132     /**
133      * Schedules remote command to be run by the executor.
134      * "requestCount" is number of requests scheduled before calling this function.
135      * When this function is called for the first time by startup(), "requestCount" will be 0.
136      * The executor will invoke _remoteCommandCallback() with the remote command response and
137      * ("requestCount" + 1).
138      */
139     Status _schedule_inlock();
140 
141     /**
142      * Callback for remote command.
143      */
144     void _remoteCommandCallback(const executor::TaskExecutor::RemoteCommandCallbackArgs& rcba);
145 
146     /**
147      * Notifies caller that the scheduler has completed processing responses.
148      */
149     void _onComplete(const executor::TaskExecutor::RemoteCommandCallbackArgs& rcba);
150 
151     // Not owned by us.
152     executor::TaskExecutor* _executor;
153 
154     const executor::RemoteCommandRequest _request;
155     executor::TaskExecutor::RemoteCommandCallbackFn _callback;
156     std::unique_ptr<RetryPolicy> _retryPolicy;
157     std::size_t _currentAttempt{0};
158     Milliseconds _currentUsedMillis{0};
159 
160     // Protects member data of this scheduler declared after mutex.
161     mutable stdx::mutex _mutex;
162 
163     mutable stdx::condition_variable _condition;
164 
165     // State transitions:
166     // PreStart --> Running --> ShuttingDown --> Complete
167     // It is possible to skip intermediate states. For example,
168     // Calling shutdown() when the scheduler has not started will transition from PreStart directly
169     // to Complete.
170     enum class State { kPreStart, kRunning, kShuttingDown, kComplete };
171     State _state = State::kPreStart;  // (M)
172 
173     // Callback handle to the scheduled remote command.
174     executor::TaskExecutor::CallbackHandle _remoteCommandCallbackHandle;
175 };
176 
177 /**
178  * Policy used by RemoteCommandRetryScheduler to determine if it is necessary to schedule another
179  * remote command request.
180  */
181 class RemoteCommandRetryScheduler::RetryPolicy {
182 public:
183     virtual ~RetryPolicy() = default;
184 
185     /**
186      * Retry scheduler should not send remote command request more than this limit.
187      */
188     virtual std::size_t getMaximumAttempts() const = 0;
189 
190     /**
191      * Retry scheduler should not re-send remote command request if total response elapsed times of
192      * prior responses exceed this limit.
193      * Assumes that re-sending the command will not exceed the limit returned by
194      * "getMaximumAttempts()".
195      * Returns executor::RemoteCommandRequest::kNoTimeout if this limit should be ignored.
196      */
197     virtual Milliseconds getMaximumResponseElapsedTotal() const = 0;
198 
199     /**
200      * Checks the error code in the most recent remote command response and returns true if
201      * scheduler should retry the remote command request.
202      * Assumes that re-sending the command will not exceed the limit returned by
203      * "getMaximumAttempts()" and total response elapsed time has not been exceeded (see
204      * "getMaximumResponseElapsedTotal()").
205      */
206     virtual bool shouldRetryOnError(ErrorCodes::Error error) const = 0;
207 
208     virtual std::string toString() const = 0;
209 };
210 
211 }  // namespace mongo
212