1
2 /**
3 * Copyright (C) 2018-present MongoDB, Inc.
4 *
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the Server Side Public License, version 1,
7 * as published by MongoDB, Inc.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * Server Side Public License for more details.
13 *
14 * You should have received a copy of the Server Side Public License
15 * along with this program. If not, see
16 * <http://www.mongodb.com/licensing/server-side-public-license>.
17 *
18 * As a special exception, the copyright holders give permission to link the
19 * code of portions of this program with the OpenSSL library under certain
20 * conditions as described in each individual source file and distribute
21 * linked combinations including the program with the OpenSSL library. You
22 * must comply with the Server Side Public License in all respects for
23 * all of the code used other than as permitted herein. If you modify file(s)
24 * with this exception, you may extend this exception to your version of the
25 * file(s), but you are not obligated to do so. If you do not wish to do so,
26 * delete this exception statement from your version. If you delete this
27 * exception statement from all source files in the program, then also delete
28 * it in the license file.
29 */
30
31 #define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kCommand
32
33 #include "mongo/db/commands/fsync.h"
34
35 #include <string>
36 #include <vector>
37
38 #include "mongo/base/init.h"
39 #include "mongo/bson/bsonobj.h"
40 #include "mongo/bson/bsonobjbuilder.h"
41 #include "mongo/db/auth/action_set.h"
42 #include "mongo/db/auth/action_type.h"
43 #include "mongo/db/auth/authorization_manager.h"
44 #include "mongo/db/auth/authorization_session.h"
45 #include "mongo/db/auth/privilege.h"
46 #include "mongo/db/client.h"
47 #include "mongo/db/commands.h"
48 #include "mongo/db/concurrency/d_concurrency.h"
49 #include "mongo/db/concurrency/write_conflict_exception.h"
50 #include "mongo/db/db.h"
51 #include "mongo/db/service_context.h"
52 #include "mongo/db/storage/mmap_v1/dur.h"
53 #include "mongo/db/storage/storage_engine.h"
54 #include "mongo/stdx/condition_variable.h"
55 #include "mongo/util/assert_util.h"
56 #include "mongo/util/background.h"
57 #include "mongo/util/log.h"
58
59 namespace mongo {
60
61 using std::string;
62 using std::stringstream;
63
64 namespace {
65 // Ensures that only one command is operating on fsyncLock state at a time. As a 'ResourceMutex',
66 // lock time will be reported for a given user operation.
67 Lock::ResourceMutex commandMutex("fsyncCommandMutex");
68 }
69
70 /**
71 * Maintains a global read lock while mongod is fsyncLocked.
72 */
73 class FSyncLockThread : public BackgroundJob {
74 public:
FSyncLockThread()75 FSyncLockThread() : BackgroundJob(false) {}
~FSyncLockThread()76 virtual ~FSyncLockThread() {}
name() const77 virtual string name() const {
78 return "FSyncLockThread";
79 }
80 virtual void run();
81 };
82
83 class FSyncCommand : public ErrmsgCommandDeprecated {
84 public:
url()85 static const char* url() {
86 return "http://dochub.mongodb.org/core/fsynccommand";
87 }
88
FSyncCommand()89 FSyncCommand() : ErrmsgCommandDeprecated("fsync") {}
90
~FSyncCommand()91 virtual ~FSyncCommand() {
92 // The FSyncLockThread is owned by the FSyncCommand and accesses FsyncCommand state. It must
93 // be shut down prior to FSyncCommand destruction.
94 stdx::unique_lock<stdx::mutex> lk(lockStateMutex);
95 if (_lockCount > 0) {
96 _lockCount = 0;
97 releaseFsyncLockSyncCV.notify_one();
98 _lockThread->wait();
99 _lockThread.reset(nullptr);
100 }
101 }
102
supportsWriteConcern(const BSONObj & cmd) const103 virtual bool supportsWriteConcern(const BSONObj& cmd) const override {
104 return false;
105 }
slaveOk() const106 virtual bool slaveOk() const {
107 return true;
108 }
adminOnly() const109 virtual bool adminOnly() const {
110 return true;
111 }
help(stringstream & h) const112 virtual void help(stringstream& h) const {
113 h << url();
114 }
addRequiredPrivileges(const std::string & dbname,const BSONObj & cmdObj,std::vector<Privilege> * out)115 virtual void addRequiredPrivileges(const std::string& dbname,
116 const BSONObj& cmdObj,
117 std::vector<Privilege>* out) {
118 ActionSet actions;
119 actions.addAction(ActionType::fsync);
120 out->push_back(Privilege(ResourcePattern::forClusterResource(), actions));
121 }
errmsgRun(OperationContext * opCtx,const string & dbname,const BSONObj & cmdObj,string & errmsg,BSONObjBuilder & result)122 virtual bool errmsgRun(OperationContext* opCtx,
123 const string& dbname,
124 const BSONObj& cmdObj,
125 string& errmsg,
126 BSONObjBuilder& result) {
127 if (opCtx->lockState()->isLocked()) {
128 errmsg = "fsync: Cannot execute fsync command from contexts that hold a data lock";
129 return false;
130 }
131
132
133 const bool sync =
134 !cmdObj["async"].trueValue(); // async means do an fsync, but return immediately
135 const bool lock = cmdObj["lock"].trueValue();
136 log() << "CMD fsync: sync:" << sync << " lock:" << lock;
137
138 if (!lock) {
139 // the simple fsync command case
140 if (sync) {
141 // can this be GlobalRead? and if it can, it should be nongreedy.
142 Lock::GlobalWrite w(opCtx);
143 // TODO SERVER-26822: Replace MMAPv1 specific calls with ones that are storage
144 // engine agnostic.
145 getDur().commitNow(opCtx);
146
147 // No WriteUnitOfWork needed, as this does no writes of its own.
148 }
149
150 // Take a global IS lock to ensure the storage engine is not shutdown
151 Lock::GlobalLock global(opCtx, MODE_IS, UINT_MAX);
152 StorageEngine* storageEngine = getGlobalServiceContext()->getGlobalStorageEngine();
153 result.append("numFiles", storageEngine->flushAllFiles(opCtx, sync));
154 return true;
155 }
156
157 Lock::ExclusiveLock lk(opCtx->lockState(), commandMutex);
158 if (!sync) {
159 errmsg = "fsync: sync option must be true when using lock";
160 return false;
161 }
162
163 const auto lockCountAtStart = getLockCount();
164 invariant(lockCountAtStart > 0 || !_lockThread);
165
166 acquireLock();
167
168 if (lockCountAtStart == 0) {
169
170 Status status = Status::OK();
171 {
172 stdx::unique_lock<stdx::mutex> lk(lockStateMutex);
173 threadStatus = Status::OK();
174 threadStarted = false;
175 _lockThread = stdx::make_unique<FSyncLockThread>();
176 _lockThread->go();
177
178 while (!threadStarted && threadStatus.isOK()) {
179 acquireFsyncLockSyncCV.wait(lk);
180 }
181
182 // 'threadStatus' must be copied while 'lockStateMutex' is held.
183 status = threadStatus;
184 }
185
186 if (!status.isOK()) {
187 releaseLock();
188 warning() << "fsyncLock failed. Lock count reset to 0. Status: " << status;
189 return appendCommandStatus(result, status);
190 }
191 }
192
193 log() << "mongod is locked and no writes are allowed. db.fsyncUnlock() to unlock";
194 log() << "Lock count is " << getLockCount();
195 log() << " For more info see " << FSyncCommand::url();
196 result.append("info", "now locked against writes, use db.fsyncUnlock() to unlock");
197 result.append("lockCount", getLockCount());
198 result.append("seeAlso", FSyncCommand::url());
199
200 return true;
201 }
202
203 // Returns whether we are currently fsyncLocked. For use by callers not holding lockStateMutex.
fsyncLocked()204 bool fsyncLocked() {
205 stdx::unique_lock<stdx::mutex> lkFsyncLocked(_fsyncLockedMutex);
206 return _fsyncLocked;
207 }
208
209 // For callers not already holding 'lockStateMutex'.
getLockCount()210 int64_t getLockCount() {
211 stdx::unique_lock<stdx::mutex> lk(lockStateMutex);
212 return getLockCount_inLock();
213 }
214
215 // 'lockStateMutex' must be held when calling.
getLockCount_inLock()216 int64_t getLockCount_inLock() {
217 return _lockCount;
218 }
219
releaseLock()220 void releaseLock() {
221 stdx::unique_lock<stdx::mutex> lk(lockStateMutex);
222 invariant(_lockCount >= 1);
223 _lockCount--;
224
225 if (_lockCount == 0) {
226 {
227 stdx::unique_lock<stdx::mutex> lkFsyncLocked(_fsyncLockedMutex);
228 _fsyncLocked = false;
229 }
230 releaseFsyncLockSyncCV.notify_one();
231 lk.unlock();
232 _lockThread->wait();
233 _lockThread.reset(nullptr);
234 }
235 }
236
237 // Allows for control of lock state change between the fsyncLock and fsyncUnlock commands and
238 // the FSyncLockThread that maintains the global read lock.
239 stdx::mutex lockStateMutex;
240 stdx::condition_variable acquireFsyncLockSyncCV;
241 stdx::condition_variable releaseFsyncLockSyncCV;
242
243 // 'lockStateMutex' must be held to modify or read.
244 Status threadStatus = Status::OK();
245 // 'lockStateMutex' must be held to modify or read.
246 bool threadStarted = false;
247
248 private:
acquireLock()249 void acquireLock() {
250 stdx::unique_lock<stdx::mutex> lk(lockStateMutex);
251 _lockCount++;
252
253 if (_lockCount == 1) {
254 stdx::unique_lock<stdx::mutex> lkFsyncLocked(_fsyncLockedMutex);
255 _fsyncLocked = true;
256 }
257 }
258
259 std::unique_ptr<FSyncLockThread> _lockThread;
260
261 // The number of lock requests currently held. We will only release the fsyncLock when this
262 // number is decremented to 0. May only be accessed while 'lockStateMutex' is held.
263 int64_t _lockCount = 0;
264
265 stdx::mutex _fsyncLockedMutex;
266 bool _fsyncLocked = false;
267 } fsyncCmd;
268
269 class FSyncUnlockCommand : public ErrmsgCommandDeprecated {
270 public:
FSyncUnlockCommand()271 FSyncUnlockCommand() : ErrmsgCommandDeprecated("fsyncUnlock") {}
272
273
supportsWriteConcern(const BSONObj & cmd) const274 virtual bool supportsWriteConcern(const BSONObj& cmd) const override {
275 return false;
276 }
277
slaveOk() const278 bool slaveOk() const override {
279 return true;
280 }
281
adminOnly() const282 bool adminOnly() const override {
283 return true;
284 }
285
checkAuthForCommand(Client * client,const std::string & dbname,const BSONObj & cmdObj)286 Status checkAuthForCommand(Client* client,
287 const std::string& dbname,
288 const BSONObj& cmdObj) override {
289 bool isAuthorized = AuthorizationSession::get(client)->isAuthorizedForActionsOnResource(
290 ResourcePattern::forClusterResource(), ActionType::unlock);
291
292 return isAuthorized ? Status::OK() : Status(ErrorCodes::Unauthorized, "Unauthorized");
293 }
294
errmsgRun(OperationContext * opCtx,const std::string & db,const BSONObj & cmdObj,std::string & errmsg,BSONObjBuilder & result)295 bool errmsgRun(OperationContext* opCtx,
296 const std::string& db,
297 const BSONObj& cmdObj,
298 std::string& errmsg,
299 BSONObjBuilder& result) override {
300 log() << "command: unlock requested";
301
302 Lock::ExclusiveLock lk(opCtx->lockState(), commandMutex);
303
304 if (unlockFsync()) {
305 const auto lockCount = fsyncCmd.getLockCount();
306 result.append("info", str::stream() << "fsyncUnlock completed");
307 result.append("lockCount", lockCount);
308 if (lockCount == 0) {
309 log() << "fsyncUnlock completed. mongod is now unlocked and free to accept writes";
310 } else {
311 log() << "fsyncUnlock completed. Lock count is now " << lockCount;
312 }
313 return true;
314 } else {
315 errmsg = "fsyncUnlock called when not locked";
316 return false;
317 }
318 }
319
320 private:
321 // Returns true if lock count is decremented.
unlockFsync()322 bool unlockFsync() {
323 if (fsyncCmd.getLockCount() == 0) {
324 error() << "fsyncUnlock called when not locked";
325 return false;
326 }
327
328 fsyncCmd.releaseLock();
329 return true;
330 }
331
332 } unlockFsyncCmd;
333
334 // Exposed publically via extern in fsync.h.
335 SimpleMutex filesLockedFsync;
336
run()337 void FSyncLockThread::run() {
338 Client::initThread("fsyncLockWorker");
339 stdx::lock_guard<SimpleMutex> lkf(filesLockedFsync);
340 stdx::unique_lock<stdx::mutex> lk(fsyncCmd.lockStateMutex);
341
342 invariant(fsyncCmd.getLockCount_inLock() == 1);
343
344 try {
345 const ServiceContext::UniqueOperationContext opCtxPtr = cc().makeOperationContext();
346 OperationContext& opCtx = *opCtxPtr;
347 Lock::GlobalWrite global(&opCtx); // No WriteUnitOfWork needed
348
349 try {
350 // TODO SERVER-26822: Replace MMAPv1 specific calls with ones that are storage engine
351 // agnostic.
352 getDur().syncDataAndTruncateJournal(&opCtx);
353 } catch (const std::exception& e) {
354 error() << "error doing syncDataAndTruncateJournal: " << e.what();
355 fsyncCmd.threadStatus = Status(ErrorCodes::CommandFailed, e.what());
356 fsyncCmd.acquireFsyncLockSyncCV.notify_one();
357 return;
358 }
359 opCtx.lockState()->downgradeGlobalXtoSForMMAPV1();
360 StorageEngine* storageEngine = getGlobalServiceContext()->getGlobalStorageEngine();
361
362 try {
363 storageEngine->flushAllFiles(&opCtx, true);
364 } catch (const std::exception& e) {
365 error() << "error doing flushAll: " << e.what();
366 fsyncCmd.threadStatus = Status(ErrorCodes::CommandFailed, e.what());
367 fsyncCmd.acquireFsyncLockSyncCV.notify_one();
368 return;
369 }
370 try {
371 writeConflictRetry(&opCtx, "beginBackup", "global", [&storageEngine, &opCtx] {
372 uassertStatusOK(storageEngine->beginBackup(&opCtx));
373 });
374 } catch (const DBException& e) {
375 error() << "storage engine unable to begin backup : " << e.toString();
376 fsyncCmd.threadStatus = e.toStatus();
377 fsyncCmd.acquireFsyncLockSyncCV.notify_one();
378 return;
379 }
380
381 fsyncCmd.threadStarted = true;
382 fsyncCmd.acquireFsyncLockSyncCV.notify_one();
383
384 while (fsyncCmd.getLockCount_inLock() > 0) {
385 fsyncCmd.releaseFsyncLockSyncCV.wait(lk);
386 }
387
388 storageEngine->endBackup(&opCtx);
389
390 } catch (const std::exception& e) {
391 severe() << "FSyncLockThread exception: " << e.what();
392 fassertFailed(40350);
393 }
394 }
395
lockedForWriting()396 bool lockedForWriting() {
397 return fsyncCmd.fsyncLocked();
398 }
399 }
400