1 
2 /**
3  *    Copyright (C) 2018-present MongoDB, Inc.
4  *
5  *    This program is free software: you can redistribute it and/or modify
6  *    it under the terms of the Server Side Public License, version 1,
7  *    as published by MongoDB, Inc.
8  *
9  *    This program is distributed in the hope that it will be useful,
10  *    but WITHOUT ANY WARRANTY; without even the implied warranty of
11  *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  *    Server Side Public License for more details.
13  *
14  *    You should have received a copy of the Server Side Public License
15  *    along with this program. If not, see
16  *    <http://www.mongodb.com/licensing/server-side-public-license>.
17  *
18  *    As a special exception, the copyright holders give permission to link the
19  *    code of portions of this program with the OpenSSL library under certain
20  *    conditions as described in each individual source file and distribute
21  *    linked combinations including the program with the OpenSSL library. You
22  *    must comply with the Server Side Public License in all respects for
23  *    all of the code used other than as permitted herein. If you modify file(s)
24  *    with this exception, you may extend this exception to your version of the
25  *    file(s), but you are not obligated to do so. If you do not wish to do so,
26  *    delete this exception statement from your version. If you delete this
27  *    exception statement from all source files in the program, then also delete
28  *    it in the license file.
29  */
30 
31 #include "mongo/platform/basic.h"
32 
33 #include "mongo/db/catalog/index_key_validate.h"
34 
35 #include <boost/optional.hpp>
36 #include <cmath>
37 #include <limits>
38 #include <set>
39 
40 #include "mongo/base/status.h"
41 #include "mongo/base/status_with.h"
42 #include "mongo/db/field_ref.h"
43 #include "mongo/db/index/index_descriptor.h"
44 #include "mongo/db/index_names.h"
45 #include "mongo/db/jsobj.h"
46 #include "mongo/db/matcher/expression_parser.h"
47 #include "mongo/db/namespace_string.h"
48 #include "mongo/db/query/collation/collator_factory_interface.h"
49 #include "mongo/db/service_context.h"
50 #include "mongo/util/fail_point_service.h"
51 #include "mongo/util/mongoutils/str.h"
52 #include "mongo/util/represent_as.h"
53 
54 namespace mongo {
55 namespace index_key_validate {
56 
57 using std::string;
58 
59 using IndexVersion = IndexDescriptor::IndexVersion;
60 
61 namespace {
62 // When the skipIndexCreateFieldNameValidation failpoint is enabled, validation for index field
63 // names will be disabled. This will allow for creation of indexes with invalid field names in their
64 // specification.
65 MONGO_FP_DECLARE(skipIndexCreateFieldNameValidation);
66 
67 static const std::set<StringData> allowedFieldNames = {
68     IndexDescriptor::k2dIndexMaxFieldName,
69     IndexDescriptor::k2dIndexBitsFieldName,
70     IndexDescriptor::k2dIndexMaxFieldName,
71     IndexDescriptor::k2dIndexMinFieldName,
72     IndexDescriptor::k2dsphereCoarsestIndexedLevel,
73     IndexDescriptor::k2dsphereFinestIndexedLevel,
74     IndexDescriptor::k2dsphereVersionFieldName,
75     IndexDescriptor::kBackgroundFieldName,
76     IndexDescriptor::kCollationFieldName,
77     IndexDescriptor::kDefaultLanguageFieldName,
78     IndexDescriptor::kDropDuplicatesFieldName,
79     IndexDescriptor::kExpireAfterSecondsFieldName,
80     IndexDescriptor::kGeoHaystackBucketSize,
81     IndexDescriptor::kIndexNameFieldName,
82     IndexDescriptor::kIndexVersionFieldName,
83     IndexDescriptor::kKeyPatternFieldName,
84     IndexDescriptor::kLanguageOverrideFieldName,
85     IndexDescriptor::kNamespaceFieldName,
86     IndexDescriptor::kPartialFilterExprFieldName,
87     IndexDescriptor::kSparseFieldName,
88     IndexDescriptor::kStorageEngineFieldName,
89     IndexDescriptor::kTextVersionFieldName,
90     IndexDescriptor::kUniqueFieldName,
91     IndexDescriptor::kWeightsFieldName,
92     // Index creation under legacy writeMode can result in an index spec with an _id field.
93     "_id"};
94 
95 static const std::set<StringData> allowedIdIndexFieldNames = {
96     IndexDescriptor::kCollationFieldName,
97     IndexDescriptor::kIndexNameFieldName,
98     IndexDescriptor::kIndexVersionFieldName,
99     IndexDescriptor::kKeyPatternFieldName,
100     IndexDescriptor::kNamespaceFieldName,
101     // Index creation under legacy writeMode can result in an index spec with an _id field.
102     "_id"};
103 }
104 
validateKeyPattern(const BSONObj & key,IndexDescriptor::IndexVersion indexVersion)105 Status validateKeyPattern(const BSONObj& key, IndexDescriptor::IndexVersion indexVersion) {
106     const ErrorCodes::Error code = ErrorCodes::CannotCreateIndex;
107 
108     if (key.objsize() > 2048)
109         return Status(code, "Index key pattern too large.");
110 
111     if (key.isEmpty())
112         return Status(code, "Index keys cannot be empty.");
113 
114     string pluginName = IndexNames::findPluginName(key);
115     if (pluginName.size()) {
116         if (!IndexNames::isKnownName(pluginName))
117             return Status(
118                 code, mongoutils::str::stream() << "Unknown index plugin '" << pluginName << '\'');
119     }
120 
121     BSONObjIterator it(key);
122     while (it.more()) {
123         BSONElement keyElement = it.next();
124 
125         switch (indexVersion) {
126             case IndexVersion::kV0:
127             case IndexVersion::kV1: {
128                 if (keyElement.type() == BSONType::Object || keyElement.type() == BSONType::Array) {
129                     return {code,
130                             str::stream() << "Values in index key pattern cannot be of type "
131                                           << typeName(keyElement.type())
132                                           << " for index version v:"
133                                           << static_cast<int>(indexVersion)};
134                 }
135 
136                 break;
137             }
138             case IndexVersion::kV2: {
139                 if (keyElement.isNumber()) {
140                     double value = keyElement.number();
141                     if (std::isnan(value)) {
142                         return {code, "Values in the index key pattern cannot be NaN."};
143                     } else if (value == 0.0) {
144                         return {code, "Values in the index key pattern cannot be 0."};
145                     }
146                 } else if (keyElement.type() != BSONType::String) {
147                     return {code,
148                             str::stream()
149                                 << "Values in v:2 index key pattern cannot be of type "
150                                 << typeName(keyElement.type())
151                                 << ". Only numbers > 0, numbers < 0, and strings are allowed."};
152                 }
153 
154                 break;
155             }
156             default:
157                 MONGO_UNREACHABLE;
158         }
159 
160         if (keyElement.type() == String && pluginName != keyElement.str()) {
161             return Status(code, "Can't use more than one index plugin for a single index.");
162         }
163 
164         // Ensure that the fields on which we are building the index are valid: a field must not
165         // begin with a '$' unless it is part of a DBRef or text index, and a field path cannot
166         // contain an empty field. If a field cannot be created or updated, it should not be
167         // indexable.
168 
169         FieldRef keyField(keyElement.fieldName());
170 
171         const size_t numParts = keyField.numParts();
172         if (numParts == 0) {
173             return Status(code, "Index keys cannot be an empty field.");
174         }
175 
176         // "$**" is acceptable for a text index.
177         if (mongoutils::str::equals(keyElement.fieldName(), "$**") &&
178             keyElement.valuestrsafe() == IndexNames::TEXT)
179             continue;
180 
181         if (mongoutils::str::equals(keyElement.fieldName(), "_fts") &&
182             keyElement.valuestrsafe() != IndexNames::TEXT) {
183             return Status(code, "Index key contains an illegal field name: '_fts'");
184         }
185 
186         for (size_t i = 0; i != numParts; ++i) {
187             const StringData part = keyField.getPart(i);
188 
189             // Check if the index key path contains an empty field.
190             if (part.empty()) {
191                 return Status(code, "Index keys cannot contain an empty field.");
192             }
193 
194             if (part[0] != '$')
195                 continue;
196 
197             // Check if the '$'-prefixed field is part of a DBRef: since we don't have the
198             // necessary context to validate whether this is a proper DBRef, we allow index
199             // creation on '$'-prefixed names that match those used in a DBRef.
200             const bool mightBePartOfDbRef =
201                 (i != 0) && (part == "$db" || part == "$id" || part == "$ref");
202 
203             if (!mightBePartOfDbRef) {
204                 return Status(code,
205                               "Index key contains an illegal field name: "
206                               "field name starts with '$'.");
207             }
208         }
209     }
210 
211     return Status::OK();
212 }
213 
validateIndexSpec(OperationContext * opCtx,const BSONObj & indexSpec,const NamespaceString & expectedNamespace,const ServerGlobalParams::FeatureCompatibility & featureCompatibility)214 StatusWith<BSONObj> validateIndexSpec(
215     OperationContext* opCtx,
216     const BSONObj& indexSpec,
217     const NamespaceString& expectedNamespace,
218     const ServerGlobalParams::FeatureCompatibility& featureCompatibility) {
219     bool hasKeyPatternField = false;
220     bool hasIndexNameField = false;
221     bool hasNamespaceField = false;
222     bool hasVersionField = false;
223     bool hasCollationField = false;
224 
225     auto fieldNamesValidStatus = validateIndexSpecFieldNames(indexSpec);
226     if (!fieldNamesValidStatus.isOK()) {
227         return fieldNamesValidStatus;
228     }
229 
230     boost::optional<IndexVersion> resolvedIndexVersion;
231 
232     for (auto&& indexSpecElem : indexSpec) {
233         auto indexSpecElemFieldName = indexSpecElem.fieldNameStringData();
234         if (IndexDescriptor::kKeyPatternFieldName == indexSpecElemFieldName) {
235             if (indexSpecElem.type() != BSONType::Object) {
236                 return {ErrorCodes::TypeMismatch,
237                         str::stream() << "The field '" << IndexDescriptor::kKeyPatternFieldName
238                                       << "' must be an object, but got "
239                                       << typeName(indexSpecElem.type())};
240             }
241 
242             std::vector<StringData> keys;
243             for (auto&& keyElem : indexSpecElem.Obj()) {
244                 auto keyElemFieldName = keyElem.fieldNameStringData();
245                 if (std::find(keys.begin(), keys.end(), keyElemFieldName) != keys.end()) {
246                     return {ErrorCodes::BadValue,
247                             str::stream() << "The field '" << keyElemFieldName
248                                           << "' appears multiple times in the index key pattern "
249                                           << indexSpecElem.Obj()};
250                 }
251                 keys.push_back(keyElemFieldName);
252             }
253 
254             // Here we always validate the key pattern according to the most recent rules, in order
255             // to enforce that all new indexes have well-formed key patterns.
256             Status keyPatternValidateStatus =
257                 validateKeyPattern(indexSpecElem.Obj(), IndexDescriptor::kLatestIndexVersion);
258             if (!keyPatternValidateStatus.isOK()) {
259                 return keyPatternValidateStatus;
260             }
261 
262             hasKeyPatternField = true;
263         } else if (IndexDescriptor::kIndexNameFieldName == indexSpecElemFieldName) {
264             if (indexSpecElem.type() != BSONType::String) {
265                 return {ErrorCodes::TypeMismatch,
266                         str::stream() << "The field '" << IndexDescriptor::kIndexNameFieldName
267                                       << "' must be a string, but got "
268                                       << typeName(indexSpecElem.type())};
269             }
270 
271             hasIndexNameField = true;
272         } else if (IndexDescriptor::kNamespaceFieldName == indexSpecElemFieldName) {
273             if (indexSpecElem.type() != BSONType::String) {
274                 return {ErrorCodes::TypeMismatch,
275                         str::stream() << "The field '" << IndexDescriptor::kNamespaceFieldName
276                                       << "' must be a string, but got "
277                                       << typeName(indexSpecElem.type())};
278             }
279 
280             StringData ns = indexSpecElem.valueStringData();
281             if (ns.empty()) {
282                 return {ErrorCodes::BadValue,
283                         str::stream() << "The field '" << IndexDescriptor::kNamespaceFieldName
284                                       << "' cannot be an empty string"};
285             }
286 
287             if (ns != expectedNamespace.ns()) {
288                 return {ErrorCodes::BadValue,
289                         str::stream() << "The value of the field '"
290                                       << IndexDescriptor::kNamespaceFieldName
291                                       << "' ("
292                                       << ns
293                                       << ") doesn't match the namespace '"
294                                       << expectedNamespace.ns()
295                                       << "'"};
296             }
297 
298             hasNamespaceField = true;
299         } else if (IndexDescriptor::kIndexVersionFieldName == indexSpecElemFieldName) {
300             if (!indexSpecElem.isNumber()) {
301                 return {ErrorCodes::TypeMismatch,
302                         str::stream() << "The field '" << IndexDescriptor::kIndexVersionFieldName
303                                       << "' must be a number, but got "
304                                       << typeName(indexSpecElem.type())};
305             }
306 
307             auto requestedIndexVersionAsInt = representAs<int>(indexSpecElem.number());
308             if (!requestedIndexVersionAsInt) {
309                 return {ErrorCodes::BadValue,
310                         str::stream()
311                             << "Index version must be representable as a 32-bit integer, but got "
312                             << indexSpecElem.toString(false, false)};
313             }
314 
315             const IndexVersion requestedIndexVersion =
316                 static_cast<IndexVersion>(*requestedIndexVersionAsInt);
317             auto creationAllowedStatus = IndexDescriptor::isIndexVersionAllowedForCreation(
318                 requestedIndexVersion, featureCompatibility, indexSpec);
319             if (!creationAllowedStatus.isOK()) {
320                 return creationAllowedStatus;
321             }
322 
323             hasVersionField = true;
324             resolvedIndexVersion = requestedIndexVersion;
325         } else if (IndexDescriptor::kCollationFieldName == indexSpecElemFieldName) {
326             if (indexSpecElem.type() != BSONType::Object) {
327                 return {ErrorCodes::TypeMismatch,
328                         str::stream() << "The field '" << IndexDescriptor::kCollationFieldName
329                                       << "' must be an object, but got "
330                                       << typeName(indexSpecElem.type())};
331             }
332 
333             if (indexSpecElem.Obj().isEmpty()) {
334                 return {ErrorCodes::BadValue,
335                         str::stream() << "The field '" << IndexDescriptor::kCollationFieldName
336                                       << "' cannot be an empty object."};
337             }
338 
339             hasCollationField = true;
340         } else if (IndexDescriptor::kPartialFilterExprFieldName == indexSpecElemFieldName) {
341             if (indexSpecElem.type() != BSONType::Object) {
342                 return {ErrorCodes::TypeMismatch,
343                         str::stream() << "The field '"
344                                       << IndexDescriptor::kPartialFilterExprFieldName
345                                       << "' must be an object, but got "
346                                       << typeName(indexSpecElem.type())};
347             }
348 
349             // Just use the simple collator, even though the index may have a separate collation
350             // specified or may inherit the default collation from the collection. It's legal to
351             // parse with the wrong collation, since the collation can be set on a MatchExpression
352             // after the fact. Here, we don't bother checking the collation after the fact, since
353             // this invocation of the parser is just for validity checking.
354             auto simpleCollator = nullptr;
355             boost::intrusive_ptr<ExpressionContext> expCtx(
356                 new ExpressionContext(opCtx, simpleCollator));
357 
358             // Special match expression features (e.g. $jsonSchema, $expr, ...) are not allowed in
359             // a partialFilterExpression on index creation.
360             auto statusWithMatcher =
361                 MatchExpressionParser::parse(indexSpecElem.Obj(),
362                                              std::move(expCtx),
363                                              ExtensionsCallbackNoop(),
364                                              MatchExpressionParser::kBanAllSpecialFeatures);
365             if (!statusWithMatcher.isOK()) {
366                 return statusWithMatcher.getStatus();
367             }
368         } else {
369             // We can assume field name is valid at this point. Validation of fieldname is handled
370             // prior to this in validateIndexSpecFieldNames().
371             continue;
372         }
373     }
374 
375     if (!resolvedIndexVersion) {
376         resolvedIndexVersion =
377             IndexDescriptor::getDefaultIndexVersion(featureCompatibility.getVersion());
378     }
379 
380     if (!hasKeyPatternField) {
381         return {ErrorCodes::FailedToParse,
382                 str::stream() << "The '" << IndexDescriptor::kKeyPatternFieldName
383                               << "' field is a required property of an index specification"};
384     }
385 
386     if (!hasIndexNameField) {
387         return {ErrorCodes::FailedToParse,
388                 str::stream() << "The '" << IndexDescriptor::kIndexNameFieldName
389                               << "' field is a required property of an index specification"};
390     }
391 
392     if (hasCollationField && *resolvedIndexVersion < IndexVersion::kV2) {
393         return {ErrorCodes::CannotCreateIndex,
394                 str::stream() << "Invalid index specification " << indexSpec
395                               << "; cannot create an index with the '"
396                               << IndexDescriptor::kCollationFieldName
397                               << "' option and "
398                               << IndexDescriptor::kIndexVersionFieldName
399                               << "="
400                               << static_cast<int>(*resolvedIndexVersion)};
401     }
402 
403     if (!hasNamespaceField || !hasVersionField) {
404         BSONObjBuilder bob;
405 
406         if (!hasNamespaceField) {
407             // We create a new index specification with the 'ns' field set as 'expectedNamespace' if
408             // the field was omitted.
409             bob.append(IndexDescriptor::kNamespaceFieldName, expectedNamespace.ns());
410         }
411 
412         if (!hasVersionField) {
413             // We create a new index specification with the 'v' field set as 'defaultIndexVersion'
414             // if the field was omitted.
415             bob.append(IndexDescriptor::kIndexVersionFieldName,
416                        static_cast<int>(*resolvedIndexVersion));
417         }
418 
419         bob.appendElements(indexSpec);
420         return bob.obj();
421     }
422 
423     return indexSpec;
424 }
425 
validateIdIndexSpec(const BSONObj & indexSpec)426 Status validateIdIndexSpec(const BSONObj& indexSpec) {
427     for (auto&& indexSpecElem : indexSpec) {
428         auto indexSpecElemFieldName = indexSpecElem.fieldNameStringData();
429         if (!allowedIdIndexFieldNames.count(indexSpecElemFieldName)) {
430             return {
431                 ErrorCodes::InvalidIndexSpecificationOption,
432                 str::stream() << "The field '" << indexSpecElemFieldName
433                               << "' is not valid for an _id index specification. Specification: "
434                               << indexSpec};
435         }
436     }
437 
438     auto keyPatternElem = indexSpec[IndexDescriptor::kKeyPatternFieldName];
439     // validateIndexSpec() should have already verified that 'keyPatternElem' is an object.
440     invariant(keyPatternElem.type() == BSONType::Object);
441     if (SimpleBSONObjComparator::kInstance.evaluate(keyPatternElem.Obj() != BSON("_id" << 1))) {
442         return {ErrorCodes::BadValue,
443                 str::stream() << "The field '" << IndexDescriptor::kKeyPatternFieldName
444                               << "' for an _id index must be {_id: 1}, but got "
445                               << keyPatternElem.Obj()};
446     }
447 
448     return Status::OK();
449 }
450 
451 /**
452  * Top-level index spec field names are validated here. When adding a new field with a document as
453  * value, is the the sub-module's responsibility to ensure that the content is valid and that only
454  * expected fields are present at creation time
455  */
validateIndexSpecFieldNames(const BSONObj & indexSpec)456 Status validateIndexSpecFieldNames(const BSONObj& indexSpec) {
457     if (MONGO_FAIL_POINT(skipIndexCreateFieldNameValidation)) {
458         return Status::OK();
459     }
460 
461     for (auto&& indexSpecElem : indexSpec) {
462         auto indexSpecElemFieldName = indexSpecElem.fieldNameStringData();
463         if (!allowedFieldNames.count(indexSpecElemFieldName)) {
464             return {ErrorCodes::InvalidIndexSpecificationOption,
465                     str::stream() << "The field '" << indexSpecElemFieldName
466                                   << "' is not valid for an index specification. Specification: "
467                                   << indexSpec};
468         }
469     }
470 
471     return Status::OK();
472 }
473 
validateIndexSpecCollation(OperationContext * opCtx,const BSONObj & indexSpec,const CollatorInterface * defaultCollator)474 StatusWith<BSONObj> validateIndexSpecCollation(OperationContext* opCtx,
475                                                const BSONObj& indexSpec,
476                                                const CollatorInterface* defaultCollator) {
477     if (auto collationElem = indexSpec[IndexDescriptor::kCollationFieldName]) {
478         // validateIndexSpec() should have already verified that 'collationElem' is an object.
479         invariant(collationElem.type() == BSONType::Object);
480 
481         auto collator = CollatorFactoryInterface::get(opCtx->getServiceContext())
482                             ->makeFromBSON(collationElem.Obj());
483         if (!collator.isOK()) {
484             return collator.getStatus();
485         }
486 
487         if (collator.getValue()) {
488             // If the collator factory returned a non-null collator, then inject the entire
489             // collation specification into the index specification. This is necessary to fill
490             // in any options that the user omitted.
491             BSONObjBuilder bob;
492 
493             for (auto&& indexSpecElem : indexSpec) {
494                 if (IndexDescriptor::kCollationFieldName != indexSpecElem.fieldNameStringData()) {
495                     bob.append(indexSpecElem);
496                 }
497             }
498             bob.append(IndexDescriptor::kCollationFieldName,
499                        collator.getValue()->getSpec().toBSON());
500 
501             return bob.obj();
502         } else {
503             // If the collator factory returned a null collator (representing the "simple"
504             // collation), then we simply omit the "collation" from the index specification.
505             // This is desirable to make the representation for the "simple" collation
506             // consistent between v=1 and v=2 indexes.
507             return indexSpec.removeField(IndexDescriptor::kCollationFieldName);
508         }
509     } else if (defaultCollator) {
510         // validateIndexSpec() should have added the "v" field if it was not present and
511         // verified that 'versionElem' is a number.
512         auto versionElem = indexSpec[IndexDescriptor::kIndexVersionFieldName];
513         invariant(versionElem.isNumber());
514 
515         if (IndexVersion::kV2 <= static_cast<IndexVersion>(versionElem.numberInt())) {
516             // The user did not specify an explicit collation for this index and the collection
517             // has a default collator. If we're building a v=2 index, then we should inherit the
518             // collection default. However, if we're building a v=1 index, then we're implicitly
519             // building an index that's using the "simple" collation.
520             BSONObjBuilder bob;
521 
522             bob.appendElements(indexSpec);
523             bob.append(IndexDescriptor::kCollationFieldName, defaultCollator->getSpec().toBSON());
524 
525             return bob.obj();
526         }
527     }
528     return indexSpec;
529 }
530 
531 }  // namespace index_key_validate
532 }  // namespace mongo
533