1
2 /**
3 * Copyright (C) 2018-present MongoDB, Inc.
4 *
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the Server Side Public License, version 1,
7 * as published by MongoDB, Inc.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * Server Side Public License for more details.
13 *
14 * You should have received a copy of the Server Side Public License
15 * along with this program. If not, see
16 * <http://www.mongodb.com/licensing/server-side-public-license>.
17 *
18 * As a special exception, the copyright holders give permission to link the
19 * code of portions of this program with the OpenSSL library under certain
20 * conditions as described in each individual source file and distribute
21 * linked combinations including the program with the OpenSSL library. You
22 * must comply with the Server Side Public License in all respects for
23 * all of the code used other than as permitted herein. If you modify file(s)
24 * with this exception, you may extend this exception to your version of the
25 * file(s), but you are not obligated to do so. If you do not wish to do so,
26 * delete this exception statement from your version. If you delete this
27 * exception statement from all source files in the program, then also delete
28 * it in the license file.
29 */
30
31 #define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kFTDC
32
33 #include "mongo/platform/basic.h"
34
35 #include "mongo/db/ftdc/controller.h"
36
37 #include "mongo/db/client.h"
38 #include "mongo/db/ftdc/collector.h"
39 #include "mongo/db/ftdc/util.h"
40 #include "mongo/db/jsobj.h"
41 #include "mongo/stdx/condition_variable.h"
42 #include "mongo/stdx/memory.h"
43 #include "mongo/stdx/mutex.h"
44 #include "mongo/stdx/thread.h"
45 #include "mongo/util/concurrency/idle_thread_block.h"
46 #include "mongo/util/exit.h"
47 #include "mongo/util/log.h"
48 #include "mongo/util/time_support.h"
49
50 namespace mongo {
51
setEnabled(bool enabled)52 Status FTDCController::setEnabled(bool enabled) {
53 stdx::lock_guard<stdx::mutex> lock(_mutex);
54
55 if (_path.empty()) {
56 return Status(ErrorCodes::FTDCPathNotSet,
57 str::stream() << "FTDC cannot be enabled without setting the set parameter "
58 "'diagnosticDataCollectionDirectoryPath' first.");
59 }
60
61 _configTemp.enabled = enabled;
62 _condvar.notify_one();
63
64 return Status::OK();
65 }
66
setPeriod(Milliseconds millis)67 void FTDCController::setPeriod(Milliseconds millis) {
68 stdx::lock_guard<stdx::mutex> lock(_mutex);
69 _configTemp.period = millis;
70 _condvar.notify_one();
71 }
72
setMaxDirectorySizeBytes(std::uint64_t size)73 void FTDCController::setMaxDirectorySizeBytes(std::uint64_t size) {
74 stdx::lock_guard<stdx::mutex> lock(_mutex);
75 _configTemp.maxDirectorySizeBytes = size;
76 _condvar.notify_one();
77 }
78
setMaxFileSizeBytes(std::uint64_t size)79 void FTDCController::setMaxFileSizeBytes(std::uint64_t size) {
80 stdx::lock_guard<stdx::mutex> lock(_mutex);
81 _configTemp.maxFileSizeBytes = size;
82 _condvar.notify_one();
83 }
84
setMaxSamplesPerArchiveMetricChunk(size_t size)85 void FTDCController::setMaxSamplesPerArchiveMetricChunk(size_t size) {
86 stdx::lock_guard<stdx::mutex> lock(_mutex);
87 _configTemp.maxSamplesPerArchiveMetricChunk = size;
88 _condvar.notify_one();
89 }
90
setMaxSamplesPerInterimMetricChunk(size_t size)91 void FTDCController::setMaxSamplesPerInterimMetricChunk(size_t size) {
92 stdx::lock_guard<stdx::mutex> lock(_mutex);
93 _configTemp.maxSamplesPerInterimMetricChunk = size;
94 _condvar.notify_one();
95 }
96
setDirectory(const boost::filesystem::path & path)97 Status FTDCController::setDirectory(const boost::filesystem::path& path) {
98 stdx::lock_guard<stdx::mutex> lock(_mutex);
99
100 if (!_path.empty()) {
101 return Status(ErrorCodes::FTDCPathAlreadySet,
102 str::stream() << "FTDC path has already been set to '" << _path.string()
103 << "'. It cannot be changed.");
104 }
105
106 _path = path;
107
108 // Do not notify for the change since it has to be enabled via setEnabled.
109
110 return Status::OK();
111 }
112
113
addPeriodicCollector(std::unique_ptr<FTDCCollectorInterface> collector)114 void FTDCController::addPeriodicCollector(std::unique_ptr<FTDCCollectorInterface> collector) {
115 {
116 stdx::lock_guard<stdx::mutex> lock(_mutex);
117 invariant(_state == State::kNotStarted);
118
119 _periodicCollectors.add(std::move(collector));
120 }
121 }
122
addOnRotateCollector(std::unique_ptr<FTDCCollectorInterface> collector)123 void FTDCController::addOnRotateCollector(std::unique_ptr<FTDCCollectorInterface> collector) {
124 {
125 stdx::lock_guard<stdx::mutex> lock(_mutex);
126 invariant(_state == State::kNotStarted);
127
128 _rotateCollectors.add(std::move(collector));
129 }
130 }
131
getMostRecentPeriodicDocument()132 BSONObj FTDCController::getMostRecentPeriodicDocument() {
133 {
134 stdx::lock_guard<stdx::mutex> lock(_mutex);
135 return _mostRecentPeriodicDocument.getOwned();
136 }
137 }
138
start()139 void FTDCController::start() {
140 log() << "Initializing full-time diagnostic data capture with directory '"
141 << _path.generic_string() << "'";
142
143 // Start the thread
144 _thread = stdx::thread(stdx::bind(&FTDCController::doLoop, this));
145
146 {
147 stdx::lock_guard<stdx::mutex> lock(_mutex);
148
149 invariant(_state == State::kNotStarted);
150 _state = State::kStarted;
151 }
152 }
153
stop()154 void FTDCController::stop() {
155 log() << "Shutting down full-time diagnostic data capture";
156
157 {
158 stdx::lock_guard<stdx::mutex> lock(_mutex);
159
160 bool started = (_state == State::kStarted);
161
162 invariant(_state == State::kNotStarted || _state == State::kStarted);
163
164 if (!started) {
165 _state = State::kDone;
166 return;
167 }
168
169 _configTemp.enabled = false;
170 _state = State::kStopRequested;
171
172 // Wake up the thread if sleeping so that it will check if we are done
173 _condvar.notify_one();
174 }
175
176 _thread.join();
177
178 _state = State::kDone;
179
180 if (_mgr) {
181 auto s = _mgr->close();
182 if (!s.isOK()) {
183 log() << "Failed to close full-time diagnostic data capture file manager: " << s;
184 }
185 }
186 }
187
doLoop()188 void FTDCController::doLoop() {
189 try {
190 // Update config
191 {
192 stdx::lock_guard<stdx::mutex> lock(_mutex);
193 _config = _configTemp;
194 }
195
196 Client::initThread("ftdc");
197 Client* client = &cc();
198
199 while (true) {
200 // Compute the next interval to run regardless of how we were woken up
201 // Skipping an interval due to a race condition with a config signal is harmless.
202 auto now = getGlobalServiceContext()->getPreciseClockSource()->now();
203
204 // Get next time to run at
205 auto next_time = FTDCUtil::roundTime(now, _config.period);
206
207 // Wait for the next run or signal to shutdown
208 {
209 stdx::unique_lock<stdx::mutex> lock(_mutex);
210 MONGO_IDLE_THREAD_BLOCK;
211
212 // We ignore spurious wakeups by just doing an iteration of the loop
213 auto status = _condvar.wait_until(lock, next_time.toSystemTimePoint());
214
215 // Are we done running?
216 if (_state == State::kStopRequested) {
217 break;
218 }
219
220 // Update the current configuration settings always
221 // In unit tests, we may never get a signal when the timeout is 1ms on Windows since
222 // MSVC 2013 converts wait_until(now() + 1ms) into ~ wait_for(0) which means it will
223 // not wait for the condition variable to be signaled because it uses
224 // GetFileSystemTime for now which has ~10 ms granularity.
225 _config = _configTemp;
226
227 // if we hit a timeout on the condvar, we need to do another collection
228 // if we were signalled, then we have a config update only or were asked to stop
229 if (status == stdx::cv_status::no_timeout) {
230 continue;
231 }
232 }
233
234 // TODO: consider only running this thread if we are enabled
235 // for now, we just keep an idle thread as it is simpler
236 if (_config.enabled) {
237 // Delay initialization of FTDCFileManager until we are sure the user has enabled
238 // FTDC
239 if (!_mgr) {
240 auto swMgr =
241 FTDCFileManager::create(&_config, _path, &_rotateCollectors, client);
242
243 _mgr = uassertStatusOK(std::move(swMgr));
244 }
245
246 auto collectSample = _periodicCollectors.collect(client);
247
248 Status s = _mgr->writeSampleAndRotateIfNeeded(
249 client, std::get<0>(collectSample), std::get<1>(collectSample));
250
251 uassertStatusOK(s);
252
253 // Store a reference to the most recent document from the periodic collectors
254 {
255 stdx::lock_guard<stdx::mutex> lock(_mutex);
256 _mostRecentPeriodicDocument = std::get<0>(collectSample);
257 }
258 }
259 }
260 } catch (...) {
261 warning() << "Uncaught exception in '" << exceptionToStatus()
262 << "' in full-time diagnostic data capture subsystem. Shutting down the "
263 "full-time diagnostic data capture subsystem.";
264 }
265 }
266
267 } // namespace mongo
268