1
2 /**
3 * Copyright (C) 2018-present MongoDB, Inc.
4 *
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the Server Side Public License, version 1,
7 * as published by MongoDB, Inc.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * Server Side Public License for more details.
13 *
14 * You should have received a copy of the Server Side Public License
15 * along with this program. If not, see
16 * <http://www.mongodb.com/licensing/server-side-public-license>.
17 *
18 * As a special exception, the copyright holders give permission to link the
19 * code of portions of this program with the OpenSSL library under certain
20 * conditions as described in each individual source file and distribute
21 * linked combinations including the program with the OpenSSL library. You
22 * must comply with the Server Side Public License in all respects for
23 * all of the code used other than as permitted herein. If you modify file(s)
24 * with this exception, you may extend this exception to your version of the
25 * file(s), but you are not obligated to do so. If you do not wish to do so,
26 * delete this exception statement from your version. If you delete this
27 * exception statement from all source files in the program, then also delete
28 * it in the license file.
29 */
30
31 #define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kSharding
32
33 #include "mongo/platform/basic.h"
34
35 #include "mongo/db/s/split_chunk.h"
36
37 #include "mongo/base/status_with.h"
38 #include "mongo/bson/util/bson_extract.h"
39 #include "mongo/db/catalog/catalog_raii.h"
40 #include "mongo/db/catalog/index_catalog.h"
41 #include "mongo/db/commands.h"
42 #include "mongo/db/dbhelpers.h"
43 #include "mongo/db/index/index_descriptor.h"
44 #include "mongo/db/keypattern.h"
45 #include "mongo/db/namespace_string.h"
46 #include "mongo/db/query/internal_plans.h"
47 #include "mongo/db/s/collection_metadata.h"
48 #include "mongo/db/s/collection_sharding_state.h"
49 #include "mongo/db/s/sharding_state.h"
50 #include "mongo/s/catalog/type_chunk.h"
51 #include "mongo/s/client/shard_registry.h"
52 #include "mongo/s/grid.h"
53 #include "mongo/s/request_types/split_chunk_request_type.h"
54 #include "mongo/util/log.h"
55
56 namespace mongo {
57 namespace {
58
59 const ReadPreferenceSetting kPrimaryOnlyReadPreference{ReadPreference::PrimaryOnly};
60
checkIfSingleDoc(OperationContext * opCtx,Collection * collection,const IndexDescriptor * idx,const ChunkType * chunk)61 bool checkIfSingleDoc(OperationContext* opCtx,
62 Collection* collection,
63 const IndexDescriptor* idx,
64 const ChunkType* chunk) {
65 KeyPattern kp(idx->keyPattern());
66 BSONObj newmin = Helpers::toKeyFormat(kp.extendRangeBound(chunk->getMin(), false));
67 BSONObj newmax = Helpers::toKeyFormat(kp.extendRangeBound(chunk->getMax(), true));
68
69 auto exec = InternalPlanner::indexScan(opCtx,
70 collection,
71 idx,
72 newmin,
73 newmax,
74 BoundInclusion::kIncludeStartKeyOnly,
75 PlanExecutor::NO_YIELD);
76 // check if exactly one document found
77 PlanExecutor::ExecState state;
78 BSONObj obj;
79 if (PlanExecutor::ADVANCED == (state = exec->getNext(&obj, NULL))) {
80 if (PlanExecutor::IS_EOF == (state = exec->getNext(&obj, NULL))) {
81 return true;
82 }
83 }
84
85 // Non-yielding collection scans from InternalPlanner will never error.
86 invariant(PlanExecutor::ADVANCED == state || PlanExecutor::IS_EOF == state);
87
88 return false;
89 }
90
91 /**
92 * Checks the collection's metadata for a successful split on the specified chunkRange using the
93 * specified splitKeys. Returns false if the metadata's chunks don't match the new chunk
94 * boundaries exactly.
95 */
checkMetadataForSuccessfulSplitChunk(OperationContext * opCtx,const NamespaceString & nss,const ChunkRange & chunkRange,const std::vector<BSONObj> & splitKeys)96 bool checkMetadataForSuccessfulSplitChunk(OperationContext* opCtx,
97 const NamespaceString& nss,
98 const ChunkRange& chunkRange,
99 const std::vector<BSONObj>& splitKeys) {
100 const auto metadataAfterSplit = [&] {
101 AutoGetCollection autoColl(opCtx, nss, MODE_IS);
102 return CollectionShardingState::get(opCtx, nss.ns())->getMetadata();
103 }();
104
105 uassert(ErrorCodes::StaleEpoch,
106 str::stream() << "Collection " << nss.ns() << " became unsharded",
107 metadataAfterSplit);
108
109 auto newChunkBounds(splitKeys);
110 auto startKey = chunkRange.getMin();
111 newChunkBounds.push_back(chunkRange.getMax());
112
113 ChunkType nextChunk;
114 for (const auto& endKey : newChunkBounds) {
115 // Check that all new chunks fit the new chunk boundaries
116 if (!metadataAfterSplit->getNextChunk(startKey, &nextChunk) ||
117 nextChunk.getMax().woCompare(endKey)) {
118 return false;
119 }
120
121 startKey = endKey;
122 }
123
124 return true;
125 }
126
127 } // anonymous namespace
128
splitChunk(OperationContext * opCtx,const NamespaceString & nss,const BSONObj & keyPatternObj,const ChunkRange & chunkRange,const std::vector<BSONObj> & splitKeys,const std::string & shardName,const OID & expectedCollectionEpoch)129 StatusWith<boost::optional<ChunkRange>> splitChunk(OperationContext* opCtx,
130 const NamespaceString& nss,
131 const BSONObj& keyPatternObj,
132 const ChunkRange& chunkRange,
133 const std::vector<BSONObj>& splitKeys,
134 const std::string& shardName,
135 const OID& expectedCollectionEpoch) {
136
137 ShardingState* shardingState = ShardingState::get(opCtx);
138 std::string errmsg;
139
140 //
141 // Lock the collection's metadata and get highest version for the current shard
142 // TODO(SERVER-25086): Remove distLock acquisition from split chunk
143 //
144 const std::string whyMessage(
145 str::stream() << "splitting chunk " << chunkRange.toString() << " in " << nss.toString());
146 auto scopedDistLock = Grid::get(opCtx)->catalogClient()->getDistLockManager()->lock(
147 opCtx, nss.ns(), whyMessage, DistLockManager::kSingleLockAttemptTimeout);
148 if (!scopedDistLock.isOK()) {
149 errmsg = str::stream() << "could not acquire collection lock for " << nss.toString()
150 << " to split chunk " << redact(chunkRange.toString()) << " "
151 << causedBy(scopedDistLock.getStatus());
152 return {scopedDistLock.getStatus().code(), errmsg};
153 }
154
155 // If the shard key is hashed, then we must make sure that the split points are of type
156 // NumberLong.
157 if (KeyPattern::isHashedKeyPattern(keyPatternObj)) {
158 for (BSONObj splitKey : splitKeys) {
159 BSONObjIterator it(splitKey);
160 while (it.more()) {
161 BSONElement splitKeyElement = it.next();
162 if (splitKeyElement.type() != NumberLong) {
163 errmsg = str::stream() << "splitChunk cannot split chunk "
164 << chunkRange.toString() << ", split point "
165 << splitKeyElement.toString()
166 << " must be of type "
167 "NumberLong for hashed shard key patterns";
168 return {ErrorCodes::CannotSplit, errmsg};
169 }
170 }
171 }
172 }
173
174 // Commit the split to the config server.
175 auto request =
176 SplitChunkRequest(nss, shardName, expectedCollectionEpoch, chunkRange, splitKeys);
177
178 auto configCmdObj =
179 request.toConfigCommandBSON(ShardingCatalogClient::kMajorityWriteConcern.toBSON());
180
181 auto cmdResponseStatus =
182 Grid::get(opCtx)->shardRegistry()->getConfigShard()->runCommandWithFixedRetryAttempts(
183 opCtx,
184 kPrimaryOnlyReadPreference,
185 "admin",
186 configCmdObj,
187 Shard::RetryPolicy::kIdempotent);
188
189 // If we failed to get any response from the config server at all, despite retries, then we
190 // should just go ahead and fail the whole operation.
191 if (!cmdResponseStatus.isOK()) {
192 return cmdResponseStatus.getStatus();
193 }
194
195 // Check commandStatus and writeConcernStatus
196 auto commandStatus = cmdResponseStatus.getValue().commandStatus;
197 auto writeConcernStatus = cmdResponseStatus.getValue().writeConcernStatus;
198
199 // Send stale epoch if epoch of request did not match epoch of collection
200 if (commandStatus == ErrorCodes::StaleEpoch) {
201 return commandStatus;
202 }
203
204 //
205 // If _configsvrCommitChunkSplit returned an error, refresh and look at the metadata to
206 // determine if the split actually did happen. This can happen if there's a network error
207 // getting the response from the first call to _configsvrCommitChunkSplit, but it actually
208 // succeeds, thus the automatic retry fails with a precondition violation, for example.
209 //
210 if (!commandStatus.isOK() || !writeConcernStatus.isOK()) {
211 {
212 ChunkVersion unusedShardVersion;
213 Status refreshStatus =
214 shardingState->refreshMetadataNow(opCtx, nss, &unusedShardVersion);
215
216 if (!refreshStatus.isOK()) {
217 Status errorStatus = commandStatus.isOK() ? writeConcernStatus : commandStatus;
218 errmsg = str::stream()
219 << "splitChunk failed for chunk " << chunkRange.toString() << ", collection '"
220 << nss.ns() << "' due to " << errorStatus.toString()
221 << ". Attempt to verify if the commit succeeded anyway failed due to: "
222 << refreshStatus.toString();
223
224 warning() << redact(errmsg);
225 return {errorStatus.code(), errmsg};
226 }
227 }
228
229 if (checkMetadataForSuccessfulSplitChunk(opCtx, nss, chunkRange, splitKeys)) {
230 // Split was committed.
231 } else if (!commandStatus.isOK()) {
232 return commandStatus;
233 } else if (!writeConcernStatus.isOK()) {
234 return writeConcernStatus;
235 }
236 }
237
238 AutoGetCollection autoColl(opCtx, nss, MODE_IS);
239
240 Collection* const collection = autoColl.getCollection();
241 if (!collection) {
242 warning() << "will not perform top-chunk checking since " << nss.toString()
243 << " does not exist after splitting";
244 return boost::optional<ChunkRange>(boost::none);
245 }
246
247 // Allow multiKey based on the invariant that shard keys must be single-valued. Therefore,
248 // any multi-key index prefixed by shard key cannot be multikey over the shard key fields.
249 IndexDescriptor* idx =
250 collection->getIndexCatalog()->findShardKeyPrefixedIndex(opCtx, keyPatternObj, false);
251 if (!idx) {
252 return boost::optional<ChunkRange>(boost::none);
253 }
254
255 auto backChunk = ChunkType();
256 backChunk.setMin(splitKeys.back());
257 backChunk.setMax(chunkRange.getMax());
258
259 auto frontChunk = ChunkType();
260 frontChunk.setMin(chunkRange.getMin());
261 frontChunk.setMax(splitKeys.front());
262
263 KeyPattern shardKeyPattern(keyPatternObj);
264 if (shardKeyPattern.globalMax().woCompare(backChunk.getMax()) == 0 &&
265 checkIfSingleDoc(opCtx, collection, idx, &backChunk)) {
266 return boost::optional<ChunkRange>(ChunkRange(backChunk.getMin(), backChunk.getMax()));
267 } else if (shardKeyPattern.globalMin().woCompare(frontChunk.getMin()) == 0 &&
268 checkIfSingleDoc(opCtx, collection, idx, &frontChunk)) {
269 return boost::optional<ChunkRange>(ChunkRange(frontChunk.getMin(), frontChunk.getMax()));
270 }
271
272 return boost::optional<ChunkRange>(boost::none);
273 }
274
275 } // namespace mongo
276