1 
2 /**
3  *    Copyright (C) 2018-present MongoDB, Inc.
4  *
5  *    This program is free software: you can redistribute it and/or modify
6  *    it under the terms of the Server Side Public License, version 1,
7  *    as published by MongoDB, Inc.
8  *
9  *    This program is distributed in the hope that it will be useful,
10  *    but WITHOUT ANY WARRANTY; without even the implied warranty of
11  *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  *    Server Side Public License for more details.
13  *
14  *    You should have received a copy of the Server Side Public License
15  *    along with this program. If not, see
16  *    <http://www.mongodb.com/licensing/server-side-public-license>.
17  *
18  *    As a special exception, the copyright holders give permission to link the
19  *    code of portions of this program with the OpenSSL library under certain
20  *    conditions as described in each individual source file and distribute
21  *    linked combinations including the program with the OpenSSL library. You
22  *    must comply with the Server Side Public License in all respects for
23  *    all of the code used other than as permitted herein. If you modify file(s)
24  *    with this exception, you may extend this exception to your version of the
25  *    file(s), but you are not obligated to do so. If you do not wish to do so,
26  *    delete this exception statement from your version. If you delete this
27  *    exception statement from all source files in the program, then also delete
28  *    it in the license file.
29  */
30 
31 #define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kCommand
32 
33 #include "mongo/db/commands/fsync.h"
34 
35 #include <string>
36 #include <vector>
37 
38 #include "mongo/base/init.h"
39 #include "mongo/bson/bsonobj.h"
40 #include "mongo/bson/bsonobjbuilder.h"
41 #include "mongo/db/auth/action_set.h"
42 #include "mongo/db/auth/action_type.h"
43 #include "mongo/db/auth/authorization_manager.h"
44 #include "mongo/db/auth/authorization_session.h"
45 #include "mongo/db/auth/privilege.h"
46 #include "mongo/db/client.h"
47 #include "mongo/db/commands.h"
48 #include "mongo/db/concurrency/d_concurrency.h"
49 #include "mongo/db/concurrency/write_conflict_exception.h"
50 #include "mongo/db/db.h"
51 #include "mongo/db/service_context.h"
52 #include "mongo/db/storage/mmap_v1/dur.h"
53 #include "mongo/db/storage/storage_engine.h"
54 #include "mongo/stdx/condition_variable.h"
55 #include "mongo/util/assert_util.h"
56 #include "mongo/util/background.h"
57 #include "mongo/util/log.h"
58 
59 namespace mongo {
60 
61 using std::string;
62 using std::stringstream;
63 
64 namespace {
65 // Ensures that only one command is operating on fsyncLock state at a time. As a 'ResourceMutex',
66 // lock time will be reported for a given user operation.
67 Lock::ResourceMutex commandMutex("fsyncCommandMutex");
68 }
69 
70 /**
71  * Maintains a global read lock while mongod is fsyncLocked.
72  */
73 class FSyncLockThread : public BackgroundJob {
74 public:
FSyncLockThread()75     FSyncLockThread() : BackgroundJob(false) {}
~FSyncLockThread()76     virtual ~FSyncLockThread() {}
name() const77     virtual string name() const {
78         return "FSyncLockThread";
79     }
80     virtual void run();
81 };
82 
83 class FSyncCommand : public ErrmsgCommandDeprecated {
84 public:
url()85     static const char* url() {
86         return "http://dochub.mongodb.org/core/fsynccommand";
87     }
88 
FSyncCommand()89     FSyncCommand() : ErrmsgCommandDeprecated("fsync") {}
90 
~FSyncCommand()91     virtual ~FSyncCommand() {
92         // The FSyncLockThread is owned by the FSyncCommand and accesses FsyncCommand state. It must
93         // be shut down prior to FSyncCommand destruction.
94         stdx::unique_lock<stdx::mutex> lk(lockStateMutex);
95         if (_lockCount > 0) {
96             _lockCount = 0;
97             releaseFsyncLockSyncCV.notify_one();
98             _lockThread->wait();
99             _lockThread.reset(nullptr);
100         }
101     }
102 
supportsWriteConcern(const BSONObj & cmd) const103     virtual bool supportsWriteConcern(const BSONObj& cmd) const override {
104         return false;
105     }
slaveOk() const106     virtual bool slaveOk() const {
107         return true;
108     }
adminOnly() const109     virtual bool adminOnly() const {
110         return true;
111     }
help(stringstream & h) const112     virtual void help(stringstream& h) const {
113         h << url();
114     }
addRequiredPrivileges(const std::string & dbname,const BSONObj & cmdObj,std::vector<Privilege> * out)115     virtual void addRequiredPrivileges(const std::string& dbname,
116                                        const BSONObj& cmdObj,
117                                        std::vector<Privilege>* out) {
118         ActionSet actions;
119         actions.addAction(ActionType::fsync);
120         out->push_back(Privilege(ResourcePattern::forClusterResource(), actions));
121     }
errmsgRun(OperationContext * opCtx,const string & dbname,const BSONObj & cmdObj,string & errmsg,BSONObjBuilder & result)122     virtual bool errmsgRun(OperationContext* opCtx,
123                            const string& dbname,
124                            const BSONObj& cmdObj,
125                            string& errmsg,
126                            BSONObjBuilder& result) {
127         if (opCtx->lockState()->isLocked()) {
128             errmsg = "fsync: Cannot execute fsync command from contexts that hold a data lock";
129             return false;
130         }
131 
132 
133         const bool sync =
134             !cmdObj["async"].trueValue();  // async means do an fsync, but return immediately
135         const bool lock = cmdObj["lock"].trueValue();
136         log() << "CMD fsync: sync:" << sync << " lock:" << lock;
137 
138         if (!lock) {
139             // the simple fsync command case
140             if (sync) {
141                 // can this be GlobalRead? and if it can, it should be nongreedy.
142                 Lock::GlobalWrite w(opCtx);
143                 // TODO SERVER-26822: Replace MMAPv1 specific calls with ones that are storage
144                 // engine agnostic.
145                 getDur().commitNow(opCtx);
146 
147                 //  No WriteUnitOfWork needed, as this does no writes of its own.
148             }
149 
150             // Take a global IS lock to ensure the storage engine is not shutdown
151             Lock::GlobalLock global(opCtx, MODE_IS, UINT_MAX);
152             StorageEngine* storageEngine = getGlobalServiceContext()->getGlobalStorageEngine();
153             result.append("numFiles", storageEngine->flushAllFiles(opCtx, sync));
154             return true;
155         }
156 
157         Lock::ExclusiveLock lk(opCtx->lockState(), commandMutex);
158         if (!sync) {
159             errmsg = "fsync: sync option must be true when using lock";
160             return false;
161         }
162 
163         const auto lockCountAtStart = getLockCount();
164         invariant(lockCountAtStart > 0 || !_lockThread);
165 
166         acquireLock();
167 
168         if (lockCountAtStart == 0) {
169 
170             Status status = Status::OK();
171             {
172                 stdx::unique_lock<stdx::mutex> lk(lockStateMutex);
173                 threadStatus = Status::OK();
174                 threadStarted = false;
175                 _lockThread = stdx::make_unique<FSyncLockThread>();
176                 _lockThread->go();
177 
178                 while (!threadStarted && threadStatus.isOK()) {
179                     acquireFsyncLockSyncCV.wait(lk);
180                 }
181 
182                 // 'threadStatus' must be copied while 'lockStateMutex' is held.
183                 status = threadStatus;
184             }
185 
186             if (!status.isOK()) {
187                 releaseLock();
188                 warning() << "fsyncLock failed. Lock count reset to 0. Status: " << status;
189                 return appendCommandStatus(result, status);
190             }
191         }
192 
193         log() << "mongod is locked and no writes are allowed. db.fsyncUnlock() to unlock";
194         log() << "Lock count is " << getLockCount();
195         log() << "    For more info see " << FSyncCommand::url();
196         result.append("info", "now locked against writes, use db.fsyncUnlock() to unlock");
197         result.append("lockCount", getLockCount());
198         result.append("seeAlso", FSyncCommand::url());
199 
200         return true;
201     }
202 
203     // Returns whether we are currently fsyncLocked. For use by callers not holding lockStateMutex.
fsyncLocked()204     bool fsyncLocked() {
205         stdx::unique_lock<stdx::mutex> lkFsyncLocked(_fsyncLockedMutex);
206         return _fsyncLocked;
207     }
208 
209     // For callers not already holding 'lockStateMutex'.
getLockCount()210     int64_t getLockCount() {
211         stdx::unique_lock<stdx::mutex> lk(lockStateMutex);
212         return getLockCount_inLock();
213     }
214 
215     // 'lockStateMutex' must be held when calling.
getLockCount_inLock()216     int64_t getLockCount_inLock() {
217         return _lockCount;
218     }
219 
releaseLock()220     void releaseLock() {
221         stdx::unique_lock<stdx::mutex> lk(lockStateMutex);
222         invariant(_lockCount >= 1);
223         _lockCount--;
224 
225         if (_lockCount == 0) {
226             {
227                 stdx::unique_lock<stdx::mutex> lkFsyncLocked(_fsyncLockedMutex);
228                 _fsyncLocked = false;
229             }
230             releaseFsyncLockSyncCV.notify_one();
231             lk.unlock();
232             _lockThread->wait();
233             _lockThread.reset(nullptr);
234         }
235     }
236 
237     // Allows for control of lock state change between the fsyncLock and fsyncUnlock commands and
238     // the FSyncLockThread that maintains the global read lock.
239     stdx::mutex lockStateMutex;
240     stdx::condition_variable acquireFsyncLockSyncCV;
241     stdx::condition_variable releaseFsyncLockSyncCV;
242 
243     // 'lockStateMutex' must be held to modify or read.
244     Status threadStatus = Status::OK();
245     // 'lockStateMutex' must be held to modify or read.
246     bool threadStarted = false;
247 
248 private:
acquireLock()249     void acquireLock() {
250         stdx::unique_lock<stdx::mutex> lk(lockStateMutex);
251         _lockCount++;
252 
253         if (_lockCount == 1) {
254             stdx::unique_lock<stdx::mutex> lkFsyncLocked(_fsyncLockedMutex);
255             _fsyncLocked = true;
256         }
257     }
258 
259     std::unique_ptr<FSyncLockThread> _lockThread;
260 
261     // The number of lock requests currently held. We will only release the fsyncLock when this
262     // number is decremented to 0. May only be accessed while 'lockStateMutex' is held.
263     int64_t _lockCount = 0;
264 
265     stdx::mutex _fsyncLockedMutex;
266     bool _fsyncLocked = false;
267 } fsyncCmd;
268 
269 class FSyncUnlockCommand : public ErrmsgCommandDeprecated {
270 public:
FSyncUnlockCommand()271     FSyncUnlockCommand() : ErrmsgCommandDeprecated("fsyncUnlock") {}
272 
273 
supportsWriteConcern(const BSONObj & cmd) const274     virtual bool supportsWriteConcern(const BSONObj& cmd) const override {
275         return false;
276     }
277 
slaveOk() const278     bool slaveOk() const override {
279         return true;
280     }
281 
adminOnly() const282     bool adminOnly() const override {
283         return true;
284     }
285 
checkAuthForCommand(Client * client,const std::string & dbname,const BSONObj & cmdObj)286     Status checkAuthForCommand(Client* client,
287                                const std::string& dbname,
288                                const BSONObj& cmdObj) override {
289         bool isAuthorized = AuthorizationSession::get(client)->isAuthorizedForActionsOnResource(
290             ResourcePattern::forClusterResource(), ActionType::unlock);
291 
292         return isAuthorized ? Status::OK() : Status(ErrorCodes::Unauthorized, "Unauthorized");
293     }
294 
errmsgRun(OperationContext * opCtx,const std::string & db,const BSONObj & cmdObj,std::string & errmsg,BSONObjBuilder & result)295     bool errmsgRun(OperationContext* opCtx,
296                    const std::string& db,
297                    const BSONObj& cmdObj,
298                    std::string& errmsg,
299                    BSONObjBuilder& result) override {
300         log() << "command: unlock requested";
301 
302         Lock::ExclusiveLock lk(opCtx->lockState(), commandMutex);
303 
304         if (unlockFsync()) {
305             const auto lockCount = fsyncCmd.getLockCount();
306             result.append("info", str::stream() << "fsyncUnlock completed");
307             result.append("lockCount", lockCount);
308             if (lockCount == 0) {
309                 log() << "fsyncUnlock completed. mongod is now unlocked and free to accept writes";
310             } else {
311                 log() << "fsyncUnlock completed. Lock count is now " << lockCount;
312             }
313             return true;
314         } else {
315             errmsg = "fsyncUnlock called when not locked";
316             return false;
317         }
318     }
319 
320 private:
321     // Returns true if lock count is decremented.
unlockFsync()322     bool unlockFsync() {
323         if (fsyncCmd.getLockCount() == 0) {
324             error() << "fsyncUnlock called when not locked";
325             return false;
326         }
327 
328         fsyncCmd.releaseLock();
329         return true;
330     }
331 
332 } unlockFsyncCmd;
333 
334 // Exposed publically via extern in fsync.h.
335 SimpleMutex filesLockedFsync;
336 
run()337 void FSyncLockThread::run() {
338     Client::initThread("fsyncLockWorker");
339     stdx::lock_guard<SimpleMutex> lkf(filesLockedFsync);
340     stdx::unique_lock<stdx::mutex> lk(fsyncCmd.lockStateMutex);
341 
342     invariant(fsyncCmd.getLockCount_inLock() == 1);
343 
344     try {
345         const ServiceContext::UniqueOperationContext opCtxPtr = cc().makeOperationContext();
346         OperationContext& opCtx = *opCtxPtr;
347         Lock::GlobalWrite global(&opCtx);  // No WriteUnitOfWork needed
348 
349         try {
350             // TODO SERVER-26822: Replace MMAPv1 specific calls with ones that are storage engine
351             // agnostic.
352             getDur().syncDataAndTruncateJournal(&opCtx);
353         } catch (const std::exception& e) {
354             error() << "error doing syncDataAndTruncateJournal: " << e.what();
355             fsyncCmd.threadStatus = Status(ErrorCodes::CommandFailed, e.what());
356             fsyncCmd.acquireFsyncLockSyncCV.notify_one();
357             return;
358         }
359         opCtx.lockState()->downgradeGlobalXtoSForMMAPV1();
360         StorageEngine* storageEngine = getGlobalServiceContext()->getGlobalStorageEngine();
361 
362         try {
363             storageEngine->flushAllFiles(&opCtx, true);
364         } catch (const std::exception& e) {
365             error() << "error doing flushAll: " << e.what();
366             fsyncCmd.threadStatus = Status(ErrorCodes::CommandFailed, e.what());
367             fsyncCmd.acquireFsyncLockSyncCV.notify_one();
368             return;
369         }
370         try {
371             writeConflictRetry(&opCtx, "beginBackup", "global", [&storageEngine, &opCtx] {
372                 uassertStatusOK(storageEngine->beginBackup(&opCtx));
373             });
374         } catch (const DBException& e) {
375             error() << "storage engine unable to begin backup : " << e.toString();
376             fsyncCmd.threadStatus = e.toStatus();
377             fsyncCmd.acquireFsyncLockSyncCV.notify_one();
378             return;
379         }
380 
381         fsyncCmd.threadStarted = true;
382         fsyncCmd.acquireFsyncLockSyncCV.notify_one();
383 
384         while (fsyncCmd.getLockCount_inLock() > 0) {
385             fsyncCmd.releaseFsyncLockSyncCV.wait(lk);
386         }
387 
388         storageEngine->endBackup(&opCtx);
389 
390     } catch (const std::exception& e) {
391         severe() << "FSyncLockThread exception: " << e.what();
392         fassertFailed(40350);
393     }
394 }
395 
lockedForWriting()396 bool lockedForWriting() {
397     return fsyncCmd.fsyncLocked();
398 }
399 }
400