1
2 /**
3 * Copyright (C) 2018-present MongoDB, Inc.
4 *
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the Server Side Public License, version 1,
7 * as published by MongoDB, Inc.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * Server Side Public License for more details.
13 *
14 * You should have received a copy of the Server Side Public License
15 * along with this program. If not, see
16 * <http://www.mongodb.com/licensing/server-side-public-license>.
17 *
18 * As a special exception, the copyright holders give permission to link the
19 * code of portions of this program with the OpenSSL library under certain
20 * conditions as described in each individual source file and distribute
21 * linked combinations including the program with the OpenSSL library. You
22 * must comply with the Server Side Public License in all respects for
23 * all of the code used other than as permitted herein. If you modify file(s)
24 * with this exception, you may extend this exception to your version of the
25 * file(s), but you are not obligated to do so. If you do not wish to do so,
26 * delete this exception statement from your version. If you delete this
27 * exception statement from all source files in the program, then also delete
28 * it in the license file.
29 */
30
31 #include "mongo/platform/basic.h"
32
33 #include "mongo/db/catalog/index_key_validate.h"
34
35 #include <boost/optional.hpp>
36 #include <cmath>
37 #include <limits>
38 #include <set>
39
40 #include "mongo/base/status.h"
41 #include "mongo/base/status_with.h"
42 #include "mongo/db/field_ref.h"
43 #include "mongo/db/index/index_descriptor.h"
44 #include "mongo/db/index_names.h"
45 #include "mongo/db/jsobj.h"
46 #include "mongo/db/matcher/expression_parser.h"
47 #include "mongo/db/namespace_string.h"
48 #include "mongo/db/query/collation/collator_factory_interface.h"
49 #include "mongo/db/service_context.h"
50 #include "mongo/util/fail_point_service.h"
51 #include "mongo/util/mongoutils/str.h"
52 #include "mongo/util/represent_as.h"
53
54 namespace mongo {
55 namespace index_key_validate {
56
57 using std::string;
58
59 using IndexVersion = IndexDescriptor::IndexVersion;
60
61 namespace {
62 // When the skipIndexCreateFieldNameValidation failpoint is enabled, validation for index field
63 // names will be disabled. This will allow for creation of indexes with invalid field names in their
64 // specification.
65 MONGO_FP_DECLARE(skipIndexCreateFieldNameValidation);
66
67 static const std::set<StringData> allowedFieldNames = {
68 IndexDescriptor::k2dIndexMaxFieldName,
69 IndexDescriptor::k2dIndexBitsFieldName,
70 IndexDescriptor::k2dIndexMaxFieldName,
71 IndexDescriptor::k2dIndexMinFieldName,
72 IndexDescriptor::k2dsphereCoarsestIndexedLevel,
73 IndexDescriptor::k2dsphereFinestIndexedLevel,
74 IndexDescriptor::k2dsphereVersionFieldName,
75 IndexDescriptor::kBackgroundFieldName,
76 IndexDescriptor::kCollationFieldName,
77 IndexDescriptor::kDefaultLanguageFieldName,
78 IndexDescriptor::kDropDuplicatesFieldName,
79 IndexDescriptor::kExpireAfterSecondsFieldName,
80 IndexDescriptor::kGeoHaystackBucketSize,
81 IndexDescriptor::kIndexNameFieldName,
82 IndexDescriptor::kIndexVersionFieldName,
83 IndexDescriptor::kKeyPatternFieldName,
84 IndexDescriptor::kLanguageOverrideFieldName,
85 IndexDescriptor::kNamespaceFieldName,
86 IndexDescriptor::kPartialFilterExprFieldName,
87 IndexDescriptor::kSparseFieldName,
88 IndexDescriptor::kStorageEngineFieldName,
89 IndexDescriptor::kTextVersionFieldName,
90 IndexDescriptor::kUniqueFieldName,
91 IndexDescriptor::kWeightsFieldName,
92 // Index creation under legacy writeMode can result in an index spec with an _id field.
93 "_id"};
94
95 static const std::set<StringData> allowedIdIndexFieldNames = {
96 IndexDescriptor::kCollationFieldName,
97 IndexDescriptor::kIndexNameFieldName,
98 IndexDescriptor::kIndexVersionFieldName,
99 IndexDescriptor::kKeyPatternFieldName,
100 IndexDescriptor::kNamespaceFieldName,
101 // Index creation under legacy writeMode can result in an index spec with an _id field.
102 "_id"};
103 }
104
validateKeyPattern(const BSONObj & key,IndexDescriptor::IndexVersion indexVersion)105 Status validateKeyPattern(const BSONObj& key, IndexDescriptor::IndexVersion indexVersion) {
106 const ErrorCodes::Error code = ErrorCodes::CannotCreateIndex;
107
108 if (key.objsize() > 2048)
109 return Status(code, "Index key pattern too large.");
110
111 if (key.isEmpty())
112 return Status(code, "Index keys cannot be empty.");
113
114 string pluginName = IndexNames::findPluginName(key);
115 if (pluginName.size()) {
116 if (!IndexNames::isKnownName(pluginName))
117 return Status(
118 code, mongoutils::str::stream() << "Unknown index plugin '" << pluginName << '\'');
119 }
120
121 BSONObjIterator it(key);
122 while (it.more()) {
123 BSONElement keyElement = it.next();
124
125 switch (indexVersion) {
126 case IndexVersion::kV0:
127 case IndexVersion::kV1: {
128 if (keyElement.type() == BSONType::Object || keyElement.type() == BSONType::Array) {
129 return {code,
130 str::stream() << "Values in index key pattern cannot be of type "
131 << typeName(keyElement.type())
132 << " for index version v:"
133 << static_cast<int>(indexVersion)};
134 }
135
136 break;
137 }
138 case IndexVersion::kV2: {
139 if (keyElement.isNumber()) {
140 double value = keyElement.number();
141 if (std::isnan(value)) {
142 return {code, "Values in the index key pattern cannot be NaN."};
143 } else if (value == 0.0) {
144 return {code, "Values in the index key pattern cannot be 0."};
145 }
146 } else if (keyElement.type() != BSONType::String) {
147 return {code,
148 str::stream()
149 << "Values in v:2 index key pattern cannot be of type "
150 << typeName(keyElement.type())
151 << ". Only numbers > 0, numbers < 0, and strings are allowed."};
152 }
153
154 break;
155 }
156 default:
157 MONGO_UNREACHABLE;
158 }
159
160 if (keyElement.type() == String && pluginName != keyElement.str()) {
161 return Status(code, "Can't use more than one index plugin for a single index.");
162 }
163
164 // Ensure that the fields on which we are building the index are valid: a field must not
165 // begin with a '$' unless it is part of a DBRef or text index, and a field path cannot
166 // contain an empty field. If a field cannot be created or updated, it should not be
167 // indexable.
168
169 FieldRef keyField(keyElement.fieldName());
170
171 const size_t numParts = keyField.numParts();
172 if (numParts == 0) {
173 return Status(code, "Index keys cannot be an empty field.");
174 }
175
176 // "$**" is acceptable for a text index.
177 if (mongoutils::str::equals(keyElement.fieldName(), "$**") &&
178 keyElement.valuestrsafe() == IndexNames::TEXT)
179 continue;
180
181 if (mongoutils::str::equals(keyElement.fieldName(), "_fts") &&
182 keyElement.valuestrsafe() != IndexNames::TEXT) {
183 return Status(code, "Index key contains an illegal field name: '_fts'");
184 }
185
186 for (size_t i = 0; i != numParts; ++i) {
187 const StringData part = keyField.getPart(i);
188
189 // Check if the index key path contains an empty field.
190 if (part.empty()) {
191 return Status(code, "Index keys cannot contain an empty field.");
192 }
193
194 if (part[0] != '$')
195 continue;
196
197 // Check if the '$'-prefixed field is part of a DBRef: since we don't have the
198 // necessary context to validate whether this is a proper DBRef, we allow index
199 // creation on '$'-prefixed names that match those used in a DBRef.
200 const bool mightBePartOfDbRef =
201 (i != 0) && (part == "$db" || part == "$id" || part == "$ref");
202
203 if (!mightBePartOfDbRef) {
204 return Status(code,
205 "Index key contains an illegal field name: "
206 "field name starts with '$'.");
207 }
208 }
209 }
210
211 return Status::OK();
212 }
213
validateIndexSpec(OperationContext * opCtx,const BSONObj & indexSpec,const NamespaceString & expectedNamespace,const ServerGlobalParams::FeatureCompatibility & featureCompatibility)214 StatusWith<BSONObj> validateIndexSpec(
215 OperationContext* opCtx,
216 const BSONObj& indexSpec,
217 const NamespaceString& expectedNamespace,
218 const ServerGlobalParams::FeatureCompatibility& featureCompatibility) {
219 bool hasKeyPatternField = false;
220 bool hasIndexNameField = false;
221 bool hasNamespaceField = false;
222 bool hasVersionField = false;
223 bool hasCollationField = false;
224
225 auto fieldNamesValidStatus = validateIndexSpecFieldNames(indexSpec);
226 if (!fieldNamesValidStatus.isOK()) {
227 return fieldNamesValidStatus;
228 }
229
230 boost::optional<IndexVersion> resolvedIndexVersion;
231
232 for (auto&& indexSpecElem : indexSpec) {
233 auto indexSpecElemFieldName = indexSpecElem.fieldNameStringData();
234 if (IndexDescriptor::kKeyPatternFieldName == indexSpecElemFieldName) {
235 if (indexSpecElem.type() != BSONType::Object) {
236 return {ErrorCodes::TypeMismatch,
237 str::stream() << "The field '" << IndexDescriptor::kKeyPatternFieldName
238 << "' must be an object, but got "
239 << typeName(indexSpecElem.type())};
240 }
241
242 std::vector<StringData> keys;
243 for (auto&& keyElem : indexSpecElem.Obj()) {
244 auto keyElemFieldName = keyElem.fieldNameStringData();
245 if (std::find(keys.begin(), keys.end(), keyElemFieldName) != keys.end()) {
246 return {ErrorCodes::BadValue,
247 str::stream() << "The field '" << keyElemFieldName
248 << "' appears multiple times in the index key pattern "
249 << indexSpecElem.Obj()};
250 }
251 keys.push_back(keyElemFieldName);
252 }
253
254 // Here we always validate the key pattern according to the most recent rules, in order
255 // to enforce that all new indexes have well-formed key patterns.
256 Status keyPatternValidateStatus =
257 validateKeyPattern(indexSpecElem.Obj(), IndexDescriptor::kLatestIndexVersion);
258 if (!keyPatternValidateStatus.isOK()) {
259 return keyPatternValidateStatus;
260 }
261
262 hasKeyPatternField = true;
263 } else if (IndexDescriptor::kIndexNameFieldName == indexSpecElemFieldName) {
264 if (indexSpecElem.type() != BSONType::String) {
265 return {ErrorCodes::TypeMismatch,
266 str::stream() << "The field '" << IndexDescriptor::kIndexNameFieldName
267 << "' must be a string, but got "
268 << typeName(indexSpecElem.type())};
269 }
270
271 hasIndexNameField = true;
272 } else if (IndexDescriptor::kNamespaceFieldName == indexSpecElemFieldName) {
273 if (indexSpecElem.type() != BSONType::String) {
274 return {ErrorCodes::TypeMismatch,
275 str::stream() << "The field '" << IndexDescriptor::kNamespaceFieldName
276 << "' must be a string, but got "
277 << typeName(indexSpecElem.type())};
278 }
279
280 StringData ns = indexSpecElem.valueStringData();
281 if (ns.empty()) {
282 return {ErrorCodes::BadValue,
283 str::stream() << "The field '" << IndexDescriptor::kNamespaceFieldName
284 << "' cannot be an empty string"};
285 }
286
287 if (ns != expectedNamespace.ns()) {
288 return {ErrorCodes::BadValue,
289 str::stream() << "The value of the field '"
290 << IndexDescriptor::kNamespaceFieldName
291 << "' ("
292 << ns
293 << ") doesn't match the namespace '"
294 << expectedNamespace.ns()
295 << "'"};
296 }
297
298 hasNamespaceField = true;
299 } else if (IndexDescriptor::kIndexVersionFieldName == indexSpecElemFieldName) {
300 if (!indexSpecElem.isNumber()) {
301 return {ErrorCodes::TypeMismatch,
302 str::stream() << "The field '" << IndexDescriptor::kIndexVersionFieldName
303 << "' must be a number, but got "
304 << typeName(indexSpecElem.type())};
305 }
306
307 auto requestedIndexVersionAsInt = representAs<int>(indexSpecElem.number());
308 if (!requestedIndexVersionAsInt) {
309 return {ErrorCodes::BadValue,
310 str::stream()
311 << "Index version must be representable as a 32-bit integer, but got "
312 << indexSpecElem.toString(false, false)};
313 }
314
315 const IndexVersion requestedIndexVersion =
316 static_cast<IndexVersion>(*requestedIndexVersionAsInt);
317 auto creationAllowedStatus = IndexDescriptor::isIndexVersionAllowedForCreation(
318 requestedIndexVersion, featureCompatibility, indexSpec);
319 if (!creationAllowedStatus.isOK()) {
320 return creationAllowedStatus;
321 }
322
323 hasVersionField = true;
324 resolvedIndexVersion = requestedIndexVersion;
325 } else if (IndexDescriptor::kCollationFieldName == indexSpecElemFieldName) {
326 if (indexSpecElem.type() != BSONType::Object) {
327 return {ErrorCodes::TypeMismatch,
328 str::stream() << "The field '" << IndexDescriptor::kCollationFieldName
329 << "' must be an object, but got "
330 << typeName(indexSpecElem.type())};
331 }
332
333 if (indexSpecElem.Obj().isEmpty()) {
334 return {ErrorCodes::BadValue,
335 str::stream() << "The field '" << IndexDescriptor::kCollationFieldName
336 << "' cannot be an empty object."};
337 }
338
339 hasCollationField = true;
340 } else if (IndexDescriptor::kPartialFilterExprFieldName == indexSpecElemFieldName) {
341 if (indexSpecElem.type() != BSONType::Object) {
342 return {ErrorCodes::TypeMismatch,
343 str::stream() << "The field '"
344 << IndexDescriptor::kPartialFilterExprFieldName
345 << "' must be an object, but got "
346 << typeName(indexSpecElem.type())};
347 }
348
349 // Just use the simple collator, even though the index may have a separate collation
350 // specified or may inherit the default collation from the collection. It's legal to
351 // parse with the wrong collation, since the collation can be set on a MatchExpression
352 // after the fact. Here, we don't bother checking the collation after the fact, since
353 // this invocation of the parser is just for validity checking.
354 auto simpleCollator = nullptr;
355 boost::intrusive_ptr<ExpressionContext> expCtx(
356 new ExpressionContext(opCtx, simpleCollator));
357
358 // Special match expression features (e.g. $jsonSchema, $expr, ...) are not allowed in
359 // a partialFilterExpression on index creation.
360 auto statusWithMatcher =
361 MatchExpressionParser::parse(indexSpecElem.Obj(),
362 std::move(expCtx),
363 ExtensionsCallbackNoop(),
364 MatchExpressionParser::kBanAllSpecialFeatures);
365 if (!statusWithMatcher.isOK()) {
366 return statusWithMatcher.getStatus();
367 }
368 } else {
369 // We can assume field name is valid at this point. Validation of fieldname is handled
370 // prior to this in validateIndexSpecFieldNames().
371 continue;
372 }
373 }
374
375 if (!resolvedIndexVersion) {
376 resolvedIndexVersion =
377 IndexDescriptor::getDefaultIndexVersion(featureCompatibility.getVersion());
378 }
379
380 if (!hasKeyPatternField) {
381 return {ErrorCodes::FailedToParse,
382 str::stream() << "The '" << IndexDescriptor::kKeyPatternFieldName
383 << "' field is a required property of an index specification"};
384 }
385
386 if (!hasIndexNameField) {
387 return {ErrorCodes::FailedToParse,
388 str::stream() << "The '" << IndexDescriptor::kIndexNameFieldName
389 << "' field is a required property of an index specification"};
390 }
391
392 if (hasCollationField && *resolvedIndexVersion < IndexVersion::kV2) {
393 return {ErrorCodes::CannotCreateIndex,
394 str::stream() << "Invalid index specification " << indexSpec
395 << "; cannot create an index with the '"
396 << IndexDescriptor::kCollationFieldName
397 << "' option and "
398 << IndexDescriptor::kIndexVersionFieldName
399 << "="
400 << static_cast<int>(*resolvedIndexVersion)};
401 }
402
403 if (!hasNamespaceField || !hasVersionField) {
404 BSONObjBuilder bob;
405
406 if (!hasNamespaceField) {
407 // We create a new index specification with the 'ns' field set as 'expectedNamespace' if
408 // the field was omitted.
409 bob.append(IndexDescriptor::kNamespaceFieldName, expectedNamespace.ns());
410 }
411
412 if (!hasVersionField) {
413 // We create a new index specification with the 'v' field set as 'defaultIndexVersion'
414 // if the field was omitted.
415 bob.append(IndexDescriptor::kIndexVersionFieldName,
416 static_cast<int>(*resolvedIndexVersion));
417 }
418
419 bob.appendElements(indexSpec);
420 return bob.obj();
421 }
422
423 return indexSpec;
424 }
425
validateIdIndexSpec(const BSONObj & indexSpec)426 Status validateIdIndexSpec(const BSONObj& indexSpec) {
427 for (auto&& indexSpecElem : indexSpec) {
428 auto indexSpecElemFieldName = indexSpecElem.fieldNameStringData();
429 if (!allowedIdIndexFieldNames.count(indexSpecElemFieldName)) {
430 return {
431 ErrorCodes::InvalidIndexSpecificationOption,
432 str::stream() << "The field '" << indexSpecElemFieldName
433 << "' is not valid for an _id index specification. Specification: "
434 << indexSpec};
435 }
436 }
437
438 auto keyPatternElem = indexSpec[IndexDescriptor::kKeyPatternFieldName];
439 // validateIndexSpec() should have already verified that 'keyPatternElem' is an object.
440 invariant(keyPatternElem.type() == BSONType::Object);
441 if (SimpleBSONObjComparator::kInstance.evaluate(keyPatternElem.Obj() != BSON("_id" << 1))) {
442 return {ErrorCodes::BadValue,
443 str::stream() << "The field '" << IndexDescriptor::kKeyPatternFieldName
444 << "' for an _id index must be {_id: 1}, but got "
445 << keyPatternElem.Obj()};
446 }
447
448 return Status::OK();
449 }
450
451 /**
452 * Top-level index spec field names are validated here. When adding a new field with a document as
453 * value, is the the sub-module's responsibility to ensure that the content is valid and that only
454 * expected fields are present at creation time
455 */
validateIndexSpecFieldNames(const BSONObj & indexSpec)456 Status validateIndexSpecFieldNames(const BSONObj& indexSpec) {
457 if (MONGO_FAIL_POINT(skipIndexCreateFieldNameValidation)) {
458 return Status::OK();
459 }
460
461 for (auto&& indexSpecElem : indexSpec) {
462 auto indexSpecElemFieldName = indexSpecElem.fieldNameStringData();
463 if (!allowedFieldNames.count(indexSpecElemFieldName)) {
464 return {ErrorCodes::InvalidIndexSpecificationOption,
465 str::stream() << "The field '" << indexSpecElemFieldName
466 << "' is not valid for an index specification. Specification: "
467 << indexSpec};
468 }
469 }
470
471 return Status::OK();
472 }
473
validateIndexSpecCollation(OperationContext * opCtx,const BSONObj & indexSpec,const CollatorInterface * defaultCollator)474 StatusWith<BSONObj> validateIndexSpecCollation(OperationContext* opCtx,
475 const BSONObj& indexSpec,
476 const CollatorInterface* defaultCollator) {
477 if (auto collationElem = indexSpec[IndexDescriptor::kCollationFieldName]) {
478 // validateIndexSpec() should have already verified that 'collationElem' is an object.
479 invariant(collationElem.type() == BSONType::Object);
480
481 auto collator = CollatorFactoryInterface::get(opCtx->getServiceContext())
482 ->makeFromBSON(collationElem.Obj());
483 if (!collator.isOK()) {
484 return collator.getStatus();
485 }
486
487 if (collator.getValue()) {
488 // If the collator factory returned a non-null collator, then inject the entire
489 // collation specification into the index specification. This is necessary to fill
490 // in any options that the user omitted.
491 BSONObjBuilder bob;
492
493 for (auto&& indexSpecElem : indexSpec) {
494 if (IndexDescriptor::kCollationFieldName != indexSpecElem.fieldNameStringData()) {
495 bob.append(indexSpecElem);
496 }
497 }
498 bob.append(IndexDescriptor::kCollationFieldName,
499 collator.getValue()->getSpec().toBSON());
500
501 return bob.obj();
502 } else {
503 // If the collator factory returned a null collator (representing the "simple"
504 // collation), then we simply omit the "collation" from the index specification.
505 // This is desirable to make the representation for the "simple" collation
506 // consistent between v=1 and v=2 indexes.
507 return indexSpec.removeField(IndexDescriptor::kCollationFieldName);
508 }
509 } else if (defaultCollator) {
510 // validateIndexSpec() should have added the "v" field if it was not present and
511 // verified that 'versionElem' is a number.
512 auto versionElem = indexSpec[IndexDescriptor::kIndexVersionFieldName];
513 invariant(versionElem.isNumber());
514
515 if (IndexVersion::kV2 <= static_cast<IndexVersion>(versionElem.numberInt())) {
516 // The user did not specify an explicit collation for this index and the collection
517 // has a default collator. If we're building a v=2 index, then we should inherit the
518 // collection default. However, if we're building a v=1 index, then we're implicitly
519 // building an index that's using the "simple" collation.
520 BSONObjBuilder bob;
521
522 bob.appendElements(indexSpec);
523 bob.append(IndexDescriptor::kCollationFieldName, defaultCollator->getSpec().toBSON());
524
525 return bob.obj();
526 }
527 }
528 return indexSpec;
529 }
530
531 } // namespace index_key_validate
532 } // namespace mongo
533