1 
2 /**
3  *    Copyright (C) 2018-present MongoDB, Inc.
4  *
5  *    This program is free software: you can redistribute it and/or modify
6  *    it under the terms of the Server Side Public License, version 1,
7  *    as published by MongoDB, Inc.
8  *
9  *    This program is distributed in the hope that it will be useful,
10  *    but WITHOUT ANY WARRANTY; without even the implied warranty of
11  *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  *    Server Side Public License for more details.
13  *
14  *    You should have received a copy of the Server Side Public License
15  *    along with this program. If not, see
16  *    <http://www.mongodb.com/licensing/server-side-public-license>.
17  *
18  *    As a special exception, the copyright holders give permission to link the
19  *    code of portions of this program with the OpenSSL library under certain
20  *    conditions as described in each individual source file and distribute
21  *    linked combinations including the program with the OpenSSL library. You
22  *    must comply with the Server Side Public License in all respects for
23  *    all of the code used other than as permitted herein. If you modify file(s)
24  *    with this exception, you may extend this exception to your version of the
25  *    file(s), but you are not obligated to do so. If you do not wish to do so,
26  *    delete this exception statement from your version. If you delete this
27  *    exception statement from all source files in the program, then also delete
28  *    it in the license file.
29  */
30 
31 #define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kSharding
32 
33 #include "mongo/platform/basic.h"
34 
35 #include "mongo/db/s/split_chunk.h"
36 
37 #include "mongo/base/status_with.h"
38 #include "mongo/bson/util/bson_extract.h"
39 #include "mongo/db/catalog/catalog_raii.h"
40 #include "mongo/db/catalog/index_catalog.h"
41 #include "mongo/db/commands.h"
42 #include "mongo/db/dbhelpers.h"
43 #include "mongo/db/index/index_descriptor.h"
44 #include "mongo/db/keypattern.h"
45 #include "mongo/db/namespace_string.h"
46 #include "mongo/db/query/internal_plans.h"
47 #include "mongo/db/s/collection_metadata.h"
48 #include "mongo/db/s/collection_sharding_state.h"
49 #include "mongo/db/s/sharding_state.h"
50 #include "mongo/s/catalog/type_chunk.h"
51 #include "mongo/s/client/shard_registry.h"
52 #include "mongo/s/grid.h"
53 #include "mongo/s/request_types/split_chunk_request_type.h"
54 #include "mongo/util/log.h"
55 
56 namespace mongo {
57 namespace {
58 
59 const ReadPreferenceSetting kPrimaryOnlyReadPreference{ReadPreference::PrimaryOnly};
60 
checkIfSingleDoc(OperationContext * opCtx,Collection * collection,const IndexDescriptor * idx,const ChunkType * chunk)61 bool checkIfSingleDoc(OperationContext* opCtx,
62                       Collection* collection,
63                       const IndexDescriptor* idx,
64                       const ChunkType* chunk) {
65     KeyPattern kp(idx->keyPattern());
66     BSONObj newmin = Helpers::toKeyFormat(kp.extendRangeBound(chunk->getMin(), false));
67     BSONObj newmax = Helpers::toKeyFormat(kp.extendRangeBound(chunk->getMax(), true));
68 
69     auto exec = InternalPlanner::indexScan(opCtx,
70                                            collection,
71                                            idx,
72                                            newmin,
73                                            newmax,
74                                            BoundInclusion::kIncludeStartKeyOnly,
75                                            PlanExecutor::NO_YIELD);
76     // check if exactly one document found
77     PlanExecutor::ExecState state;
78     BSONObj obj;
79     if (PlanExecutor::ADVANCED == (state = exec->getNext(&obj, NULL))) {
80         if (PlanExecutor::IS_EOF == (state = exec->getNext(&obj, NULL))) {
81             return true;
82         }
83     }
84 
85     // Non-yielding collection scans from InternalPlanner will never error.
86     invariant(PlanExecutor::ADVANCED == state || PlanExecutor::IS_EOF == state);
87 
88     return false;
89 }
90 
91 /**
92  * Checks the collection's metadata for a successful split on the specified chunkRange using the
93  * specified splitKeys. Returns false if the metadata's chunks don't match the new chunk
94  * boundaries exactly.
95  */
checkMetadataForSuccessfulSplitChunk(OperationContext * opCtx,const NamespaceString & nss,const ChunkRange & chunkRange,const std::vector<BSONObj> & splitKeys)96 bool checkMetadataForSuccessfulSplitChunk(OperationContext* opCtx,
97                                           const NamespaceString& nss,
98                                           const ChunkRange& chunkRange,
99                                           const std::vector<BSONObj>& splitKeys) {
100     const auto metadataAfterSplit = [&] {
101         AutoGetCollection autoColl(opCtx, nss, MODE_IS);
102         return CollectionShardingState::get(opCtx, nss.ns())->getMetadata();
103     }();
104 
105     uassert(ErrorCodes::StaleEpoch,
106             str::stream() << "Collection " << nss.ns() << " became unsharded",
107             metadataAfterSplit);
108 
109     auto newChunkBounds(splitKeys);
110     auto startKey = chunkRange.getMin();
111     newChunkBounds.push_back(chunkRange.getMax());
112 
113     ChunkType nextChunk;
114     for (const auto& endKey : newChunkBounds) {
115         // Check that all new chunks fit the new chunk boundaries
116         if (!metadataAfterSplit->getNextChunk(startKey, &nextChunk) ||
117             nextChunk.getMax().woCompare(endKey)) {
118             return false;
119         }
120 
121         startKey = endKey;
122     }
123 
124     return true;
125 }
126 
127 }  // anonymous namespace
128 
splitChunk(OperationContext * opCtx,const NamespaceString & nss,const BSONObj & keyPatternObj,const ChunkRange & chunkRange,const std::vector<BSONObj> & splitKeys,const std::string & shardName,const OID & expectedCollectionEpoch)129 StatusWith<boost::optional<ChunkRange>> splitChunk(OperationContext* opCtx,
130                                                    const NamespaceString& nss,
131                                                    const BSONObj& keyPatternObj,
132                                                    const ChunkRange& chunkRange,
133                                                    const std::vector<BSONObj>& splitKeys,
134                                                    const std::string& shardName,
135                                                    const OID& expectedCollectionEpoch) {
136 
137     ShardingState* shardingState = ShardingState::get(opCtx);
138     std::string errmsg;
139 
140     //
141     // Lock the collection's metadata and get highest version for the current shard
142     // TODO(SERVER-25086): Remove distLock acquisition from split chunk
143     //
144     const std::string whyMessage(
145         str::stream() << "splitting chunk " << chunkRange.toString() << " in " << nss.toString());
146     auto scopedDistLock = Grid::get(opCtx)->catalogClient()->getDistLockManager()->lock(
147         opCtx, nss.ns(), whyMessage, DistLockManager::kSingleLockAttemptTimeout);
148     if (!scopedDistLock.isOK()) {
149         errmsg = str::stream() << "could not acquire collection lock for " << nss.toString()
150                                << " to split chunk " << redact(chunkRange.toString()) << " "
151                                << causedBy(scopedDistLock.getStatus());
152         return {scopedDistLock.getStatus().code(), errmsg};
153     }
154 
155     // If the shard key is hashed, then we must make sure that the split points are of type
156     // NumberLong.
157     if (KeyPattern::isHashedKeyPattern(keyPatternObj)) {
158         for (BSONObj splitKey : splitKeys) {
159             BSONObjIterator it(splitKey);
160             while (it.more()) {
161                 BSONElement splitKeyElement = it.next();
162                 if (splitKeyElement.type() != NumberLong) {
163                     errmsg = str::stream() << "splitChunk cannot split chunk "
164                                            << chunkRange.toString() << ", split point "
165                                            << splitKeyElement.toString()
166                                            << " must be of type "
167                                               "NumberLong for hashed shard key patterns";
168                     return {ErrorCodes::CannotSplit, errmsg};
169                 }
170             }
171         }
172     }
173 
174     // Commit the split to the config server.
175     auto request =
176         SplitChunkRequest(nss, shardName, expectedCollectionEpoch, chunkRange, splitKeys);
177 
178     auto configCmdObj =
179         request.toConfigCommandBSON(ShardingCatalogClient::kMajorityWriteConcern.toBSON());
180 
181     auto cmdResponseStatus =
182         Grid::get(opCtx)->shardRegistry()->getConfigShard()->runCommandWithFixedRetryAttempts(
183             opCtx,
184             kPrimaryOnlyReadPreference,
185             "admin",
186             configCmdObj,
187             Shard::RetryPolicy::kIdempotent);
188 
189     // If we failed to get any response from the config server at all, despite retries, then we
190     // should just go ahead and fail the whole operation.
191     if (!cmdResponseStatus.isOK()) {
192         return cmdResponseStatus.getStatus();
193     }
194 
195     // Check commandStatus and writeConcernStatus
196     auto commandStatus = cmdResponseStatus.getValue().commandStatus;
197     auto writeConcernStatus = cmdResponseStatus.getValue().writeConcernStatus;
198 
199     // Send stale epoch if epoch of request did not match epoch of collection
200     if (commandStatus == ErrorCodes::StaleEpoch) {
201         return commandStatus;
202     }
203 
204     //
205     // If _configsvrCommitChunkSplit returned an error, refresh and look at the metadata to
206     // determine if the split actually did happen. This can happen if there's a network error
207     // getting the response from the first call to _configsvrCommitChunkSplit, but it actually
208     // succeeds, thus the automatic retry fails with a precondition violation, for example.
209     //
210     if (!commandStatus.isOK() || !writeConcernStatus.isOK()) {
211         {
212             ChunkVersion unusedShardVersion;
213             Status refreshStatus =
214                 shardingState->refreshMetadataNow(opCtx, nss, &unusedShardVersion);
215 
216             if (!refreshStatus.isOK()) {
217                 Status errorStatus = commandStatus.isOK() ? writeConcernStatus : commandStatus;
218                 errmsg = str::stream()
219                     << "splitChunk failed for chunk " << chunkRange.toString() << ", collection '"
220                     << nss.ns() << "' due to " << errorStatus.toString()
221                     << ". Attempt to verify if the commit succeeded anyway failed due to: "
222                     << refreshStatus.toString();
223 
224                 warning() << redact(errmsg);
225                 return {errorStatus.code(), errmsg};
226             }
227         }
228 
229         if (checkMetadataForSuccessfulSplitChunk(opCtx, nss, chunkRange, splitKeys)) {
230             // Split was committed.
231         } else if (!commandStatus.isOK()) {
232             return commandStatus;
233         } else if (!writeConcernStatus.isOK()) {
234             return writeConcernStatus;
235         }
236     }
237 
238     AutoGetCollection autoColl(opCtx, nss, MODE_IS);
239 
240     Collection* const collection = autoColl.getCollection();
241     if (!collection) {
242         warning() << "will not perform top-chunk checking since " << nss.toString()
243                   << " does not exist after splitting";
244         return boost::optional<ChunkRange>(boost::none);
245     }
246 
247     // Allow multiKey based on the invariant that shard keys must be single-valued. Therefore,
248     // any multi-key index prefixed by shard key cannot be multikey over the shard key fields.
249     IndexDescriptor* idx =
250         collection->getIndexCatalog()->findShardKeyPrefixedIndex(opCtx, keyPatternObj, false);
251     if (!idx) {
252         return boost::optional<ChunkRange>(boost::none);
253     }
254 
255     auto backChunk = ChunkType();
256     backChunk.setMin(splitKeys.back());
257     backChunk.setMax(chunkRange.getMax());
258 
259     auto frontChunk = ChunkType();
260     frontChunk.setMin(chunkRange.getMin());
261     frontChunk.setMax(splitKeys.front());
262 
263     KeyPattern shardKeyPattern(keyPatternObj);
264     if (shardKeyPattern.globalMax().woCompare(backChunk.getMax()) == 0 &&
265         checkIfSingleDoc(opCtx, collection, idx, &backChunk)) {
266         return boost::optional<ChunkRange>(ChunkRange(backChunk.getMin(), backChunk.getMax()));
267     } else if (shardKeyPattern.globalMin().woCompare(frontChunk.getMin()) == 0 &&
268                checkIfSingleDoc(opCtx, collection, idx, &frontChunk)) {
269         return boost::optional<ChunkRange>(ChunkRange(frontChunk.getMin(), frontChunk.getMax()));
270     }
271 
272     return boost::optional<ChunkRange>(boost::none);
273 }
274 
275 }  // namespace mongo
276