1 
2 /**
3  *    Copyright (C) 2018-present MongoDB, Inc.
4  *
5  *    This program is free software: you can redistribute it and/or modify
6  *    it under the terms of the Server Side Public License, version 1,
7  *    as published by MongoDB, Inc.
8  *
9  *    This program is distributed in the hope that it will be useful,
10  *    but WITHOUT ANY WARRANTY; without even the implied warranty of
11  *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  *    Server Side Public License for more details.
13  *
14  *    You should have received a copy of the Server Side Public License
15  *    along with this program. If not, see
16  *    <http://www.mongodb.com/licensing/server-side-public-license>.
17  *
18  *    As a special exception, the copyright holders give permission to link the
19  *    code of portions of this program with the OpenSSL library under certain
20  *    conditions as described in each individual source file and distribute
21  *    linked combinations including the program with the OpenSSL library. You
22  *    must comply with the Server Side Public License in all respects for
23  *    all of the code used other than as permitted herein. If you modify file(s)
24  *    with this exception, you may extend this exception to your version of the
25  *    file(s), but you are not obligated to do so. If you do not wish to do so,
26  *    delete this exception statement from your version. If you delete this
27  *    exception statement from all source files in the program, then also delete
28  *    it in the license file.
29  */
30 
31 #define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kFTDC
32 
33 #include "mongo/platform/basic.h"
34 
35 #include "mongo/db/ftdc/controller.h"
36 
37 #include "mongo/db/client.h"
38 #include "mongo/db/ftdc/collector.h"
39 #include "mongo/db/ftdc/util.h"
40 #include "mongo/db/jsobj.h"
41 #include "mongo/stdx/condition_variable.h"
42 #include "mongo/stdx/memory.h"
43 #include "mongo/stdx/mutex.h"
44 #include "mongo/stdx/thread.h"
45 #include "mongo/util/concurrency/idle_thread_block.h"
46 #include "mongo/util/exit.h"
47 #include "mongo/util/log.h"
48 #include "mongo/util/time_support.h"
49 
50 namespace mongo {
51 
setEnabled(bool enabled)52 Status FTDCController::setEnabled(bool enabled) {
53     stdx::lock_guard<stdx::mutex> lock(_mutex);
54 
55     if (_path.empty()) {
56         return Status(ErrorCodes::FTDCPathNotSet,
57                       str::stream() << "FTDC cannot be enabled without setting the set parameter "
58                                        "'diagnosticDataCollectionDirectoryPath' first.");
59     }
60 
61     _configTemp.enabled = enabled;
62     _condvar.notify_one();
63 
64     return Status::OK();
65 }
66 
setPeriod(Milliseconds millis)67 void FTDCController::setPeriod(Milliseconds millis) {
68     stdx::lock_guard<stdx::mutex> lock(_mutex);
69     _configTemp.period = millis;
70     _condvar.notify_one();
71 }
72 
setMaxDirectorySizeBytes(std::uint64_t size)73 void FTDCController::setMaxDirectorySizeBytes(std::uint64_t size) {
74     stdx::lock_guard<stdx::mutex> lock(_mutex);
75     _configTemp.maxDirectorySizeBytes = size;
76     _condvar.notify_one();
77 }
78 
setMaxFileSizeBytes(std::uint64_t size)79 void FTDCController::setMaxFileSizeBytes(std::uint64_t size) {
80     stdx::lock_guard<stdx::mutex> lock(_mutex);
81     _configTemp.maxFileSizeBytes = size;
82     _condvar.notify_one();
83 }
84 
setMaxSamplesPerArchiveMetricChunk(size_t size)85 void FTDCController::setMaxSamplesPerArchiveMetricChunk(size_t size) {
86     stdx::lock_guard<stdx::mutex> lock(_mutex);
87     _configTemp.maxSamplesPerArchiveMetricChunk = size;
88     _condvar.notify_one();
89 }
90 
setMaxSamplesPerInterimMetricChunk(size_t size)91 void FTDCController::setMaxSamplesPerInterimMetricChunk(size_t size) {
92     stdx::lock_guard<stdx::mutex> lock(_mutex);
93     _configTemp.maxSamplesPerInterimMetricChunk = size;
94     _condvar.notify_one();
95 }
96 
setDirectory(const boost::filesystem::path & path)97 Status FTDCController::setDirectory(const boost::filesystem::path& path) {
98     stdx::lock_guard<stdx::mutex> lock(_mutex);
99 
100     if (!_path.empty()) {
101         return Status(ErrorCodes::FTDCPathAlreadySet,
102                       str::stream() << "FTDC path has already been set to '" << _path.string()
103                                     << "'. It cannot be changed.");
104     }
105 
106     _path = path;
107 
108     // Do not notify for the change since it has to be enabled via setEnabled.
109 
110     return Status::OK();
111 }
112 
113 
addPeriodicCollector(std::unique_ptr<FTDCCollectorInterface> collector)114 void FTDCController::addPeriodicCollector(std::unique_ptr<FTDCCollectorInterface> collector) {
115     {
116         stdx::lock_guard<stdx::mutex> lock(_mutex);
117         invariant(_state == State::kNotStarted);
118 
119         _periodicCollectors.add(std::move(collector));
120     }
121 }
122 
addOnRotateCollector(std::unique_ptr<FTDCCollectorInterface> collector)123 void FTDCController::addOnRotateCollector(std::unique_ptr<FTDCCollectorInterface> collector) {
124     {
125         stdx::lock_guard<stdx::mutex> lock(_mutex);
126         invariant(_state == State::kNotStarted);
127 
128         _rotateCollectors.add(std::move(collector));
129     }
130 }
131 
getMostRecentPeriodicDocument()132 BSONObj FTDCController::getMostRecentPeriodicDocument() {
133     {
134         stdx::lock_guard<stdx::mutex> lock(_mutex);
135         return _mostRecentPeriodicDocument.getOwned();
136     }
137 }
138 
start()139 void FTDCController::start() {
140     log() << "Initializing full-time diagnostic data capture with directory '"
141           << _path.generic_string() << "'";
142 
143     // Start the thread
144     _thread = stdx::thread(stdx::bind(&FTDCController::doLoop, this));
145 
146     {
147         stdx::lock_guard<stdx::mutex> lock(_mutex);
148 
149         invariant(_state == State::kNotStarted);
150         _state = State::kStarted;
151     }
152 }
153 
stop()154 void FTDCController::stop() {
155     log() << "Shutting down full-time diagnostic data capture";
156 
157     {
158         stdx::lock_guard<stdx::mutex> lock(_mutex);
159 
160         bool started = (_state == State::kStarted);
161 
162         invariant(_state == State::kNotStarted || _state == State::kStarted);
163 
164         if (!started) {
165             _state = State::kDone;
166             return;
167         }
168 
169         _configTemp.enabled = false;
170         _state = State::kStopRequested;
171 
172         // Wake up the thread if sleeping so that it will check if we are done
173         _condvar.notify_one();
174     }
175 
176     _thread.join();
177 
178     _state = State::kDone;
179 
180     if (_mgr) {
181         auto s = _mgr->close();
182         if (!s.isOK()) {
183             log() << "Failed to close full-time diagnostic data capture file manager: " << s;
184         }
185     }
186 }
187 
doLoop()188 void FTDCController::doLoop() {
189     try {
190         // Update config
191         {
192             stdx::lock_guard<stdx::mutex> lock(_mutex);
193             _config = _configTemp;
194         }
195 
196         Client::initThread("ftdc");
197         Client* client = &cc();
198 
199         while (true) {
200             // Compute the next interval to run regardless of how we were woken up
201             // Skipping an interval due to a race condition with a config signal is harmless.
202             auto now = getGlobalServiceContext()->getPreciseClockSource()->now();
203 
204             // Get next time to run at
205             auto next_time = FTDCUtil::roundTime(now, _config.period);
206 
207             // Wait for the next run or signal to shutdown
208             {
209                 stdx::unique_lock<stdx::mutex> lock(_mutex);
210                 MONGO_IDLE_THREAD_BLOCK;
211 
212                 // We ignore spurious wakeups by just doing an iteration of the loop
213                 auto status = _condvar.wait_until(lock, next_time.toSystemTimePoint());
214 
215                 // Are we done running?
216                 if (_state == State::kStopRequested) {
217                     break;
218                 }
219 
220                 // Update the current configuration settings always
221                 // In unit tests, we may never get a signal when the timeout is 1ms on Windows since
222                 // MSVC 2013 converts wait_until(now() + 1ms) into ~ wait_for(0) which means it will
223                 // not wait for the condition variable to be signaled because it uses
224                 // GetFileSystemTime for now which has ~10 ms granularity.
225                 _config = _configTemp;
226 
227                 // if we hit a timeout on the condvar, we need to do another collection
228                 // if we were signalled, then we have a config update only or were asked to stop
229                 if (status == stdx::cv_status::no_timeout) {
230                     continue;
231                 }
232             }
233 
234             // TODO: consider only running this thread if we are enabled
235             // for now, we just keep an idle thread as it is simpler
236             if (_config.enabled) {
237                 // Delay initialization of FTDCFileManager until we are sure the user has enabled
238                 // FTDC
239                 if (!_mgr) {
240                     auto swMgr =
241                         FTDCFileManager::create(&_config, _path, &_rotateCollectors, client);
242 
243                     _mgr = uassertStatusOK(std::move(swMgr));
244                 }
245 
246                 auto collectSample = _periodicCollectors.collect(client);
247 
248                 Status s = _mgr->writeSampleAndRotateIfNeeded(
249                     client, std::get<0>(collectSample), std::get<1>(collectSample));
250 
251                 uassertStatusOK(s);
252 
253                 // Store a reference to the most recent document from the periodic collectors
254                 {
255                     stdx::lock_guard<stdx::mutex> lock(_mutex);
256                     _mostRecentPeriodicDocument = std::get<0>(collectSample);
257                 }
258             }
259         }
260     } catch (...) {
261         warning() << "Uncaught exception in '" << exceptionToStatus()
262                   << "' in full-time diagnostic data capture subsystem. Shutting down the "
263                      "full-time diagnostic data capture subsystem.";
264     }
265 }
266 
267 }  // namespace mongo
268