1 
2 /**
3  *    Copyright (C) 2018-present MongoDB, Inc.
4  *
5  *    This program is free software: you can redistribute it and/or modify
6  *    it under the terms of the Server Side Public License, version 1,
7  *    as published by MongoDB, Inc.
8  *
9  *    This program is distributed in the hope that it will be useful,
10  *    but WITHOUT ANY WARRANTY; without even the implied warranty of
11  *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  *    Server Side Public License for more details.
13  *
14  *    You should have received a copy of the Server Side Public License
15  *    along with this program. If not, see
16  *    <http://www.mongodb.com/licensing/server-side-public-license>.
17  *
18  *    As a special exception, the copyright holders give permission to link the
19  *    code of portions of this program with the OpenSSL library under certain
20  *    conditions as described in each individual source file and distribute
21  *    linked combinations including the program with the OpenSSL library. You
22  *    must comply with the Server Side Public License in all respects for
23  *    all of the code used other than as permitted herein. If you modify file(s)
24  *    with this exception, you may extend this exception to your version of the
25  *    file(s), but you are not obligated to do so. If you do not wish to do so,
26  *    delete this exception statement from your version. If you delete this
27  *    exception statement from all source files in the program, then also delete
28  *    it in the license file.
29  */
30 
31 #define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kFTDC
32 
33 #include "mongo/platform/basic.h"
34 
35 #include "mongo/db/ftdc/util.h"
36 
37 #include <boost/filesystem.hpp>
38 
39 #include "mongo/bson/bsonobjbuilder.h"
40 #include "mongo/bson/util/bson_extract.h"
41 #include "mongo/config.h"
42 #include "mongo/db/ftdc/config.h"
43 #include "mongo/db/ftdc/constants.h"
44 #include "mongo/db/jsobj.h"
45 #include "mongo/db/service_context.h"
46 #include "mongo/util/assert_util.h"
47 #include "mongo/util/log.h"
48 #include "mongo/util/mongoutils/str.h"
49 #include "mongo/util/time_support.h"
50 
51 namespace mongo {
52 
53 const char kFTDCInterimFile[] = "metrics.interim";
54 const char kFTDCInterimTempFile[] = "metrics.interim.temp";
55 const char kFTDCArchiveFile[] = "metrics";
56 
57 const char kFTDCIdField[] = "_id";
58 const char kFTDCTypeField[] = "type";
59 
60 const char kFTDCDataField[] = "data";
61 const char kFTDCDocField[] = "doc";
62 
63 const char kFTDCDocsField[] = "docs";
64 
65 const char kFTDCCollectStartField[] = "start";
66 const char kFTDCCollectEndField[] = "end";
67 
68 const std::int64_t FTDCConfig::kPeriodMillisDefault = 1000;
69 
70 const std::size_t kMaxRecursion = 10;
71 
72 namespace FTDCUtil {
73 
74 namespace {
appendFileName(const boost::filesystem::path & file,const char * filename)75 boost::filesystem::path appendFileName(const boost::filesystem::path& file, const char* filename) {
76     if (boost::filesystem::is_directory(file)) {
77         return file / filename;
78     }
79 
80     auto p = file.parent_path();
81     p /= filename;
82 
83     return p;
84 }
85 }  // namespace
86 
getInterimFile(const boost::filesystem::path & file)87 boost::filesystem::path getInterimFile(const boost::filesystem::path& file) {
88     return appendFileName(file, kFTDCInterimFile);
89 }
90 
getInterimTempFile(const boost::filesystem::path & file)91 boost::filesystem::path getInterimTempFile(const boost::filesystem::path& file) {
92     return appendFileName(file, kFTDCInterimTempFile);
93 }
94 
roundTime(Date_t now,Milliseconds period)95 Date_t roundTime(Date_t now, Milliseconds period) {
96     // Note: auto type deduction is explicitly avoided here to ensure rigid type correctness
97     long long clock_duration = now.toMillisSinceEpoch();
98 
99     long long now_next_period = clock_duration + period.count();
100 
101     long long excess_time(now_next_period % period.count());
102 
103     long long next_time = now_next_period - excess_time;
104 
105     return Date_t::fromMillisSinceEpoch(next_time);
106 }
107 
getMongoSPath(const boost::filesystem::path & logFile)108 boost::filesystem::path getMongoSPath(const boost::filesystem::path& logFile) {
109     auto base = logFile;
110 
111     // Keep stripping file extensions until we are only left with the file name
112     while (base.has_extension()) {
113         auto full_path = base.generic_string();
114         base = full_path.substr(0, full_path.size() - base.extension().size());
115     }
116 
117     base += "." + kFTDCDefaultDirectory.toString();
118     return base;
119 }
120 
121 }  // namespace FTDCUtil
122 
123 
124 namespace FTDCBSONUtil {
125 
126 namespace {
127 
128 /**
129  * Iterate a BSONObj but only return fields that have types that FTDC cares about.
130  */
131 class FTDCBSONObjIterator {
132 public:
FTDCBSONObjIterator(const BSONObj & obj)133     FTDCBSONObjIterator(const BSONObj& obj) : _iterator(obj) {
134         advance();
135     }
136 
more()137     bool more() {
138         return !_current.eoo();
139     }
140 
next()141     BSONElement next() {
142         auto ret = _current;
143         advance();
144         return ret;
145     }
146 
147 private:
148     /**
149      * Find the next element that is a valid FTDC type.
150      */
advance()151     void advance() {
152         _current = BSONElement();
153 
154         while (_iterator.more()) {
155 
156             auto elem = _iterator.next();
157             if (isFTDCType(elem.type())) {
158                 _current = elem;
159                 break;
160             }
161         }
162     }
163 
164 private:
165     BSONObjIterator _iterator;
166     BSONElement _current;
167 };
168 
extractMetricsFromDocument(const BSONObj & referenceDoc,const BSONObj & currentDoc,std::vector<std::uint64_t> * metrics,bool matches,size_t recursion)169 StatusWith<bool> extractMetricsFromDocument(const BSONObj& referenceDoc,
170                                             const BSONObj& currentDoc,
171                                             std::vector<std::uint64_t>* metrics,
172                                             bool matches,
173                                             size_t recursion) {
174     if (recursion > kMaxRecursion) {
175         return {ErrorCodes::BadValue, "Recursion limit reached."};
176     }
177 
178     FTDCBSONObjIterator itCurrent(currentDoc);
179     FTDCBSONObjIterator itReference(referenceDoc);
180 
181     while (itCurrent.more()) {
182         // Schema mismatch if current document is longer than reference document
183         if (matches && !itReference.more()) {
184             LOG(4) << "full-time diagnostic data capture schema change: currrent document is "
185                       "longer than reference document";
186             matches = false;
187         }
188 
189         BSONElement currentElement = itCurrent.next();
190         BSONElement referenceElement = matches ? itReference.next() : BSONElement();
191 
192         if (matches) {
193             // Check for matching field names
194             if (referenceElement.fieldNameStringData() != currentElement.fieldNameStringData()) {
195                 LOG(4)
196                     << "full-time diagnostic data capture schema change: field name change - from '"
197                     << referenceElement.fieldNameStringData() << "' to '"
198                     << currentElement.fieldNameStringData() << "'";
199                 matches = false;
200             }
201 
202             // Check that types match, allowing any numeric type to match any other numeric type.
203             // This looseness is necessary because some metrics use varying numeric types,
204             // and if that was considered a schema mismatch, it would increase the number of
205             // reference samples required.
206             if ((currentElement.type() != referenceElement.type()) &&
207                 !(referenceElement.isNumber() == true &&
208                   currentElement.isNumber() == referenceElement.isNumber())) {
209                 LOG(4) << "full-time diagnostic data capture  schema change: field type change for "
210                           "field '"
211                        << referenceElement.fieldNameStringData() << "' from '"
212                        << static_cast<int>(referenceElement.type()) << "' to '"
213                        << static_cast<int>(currentElement.type()) << "'";
214                 matches = false;
215             }
216         }
217 
218         switch (currentElement.type()) {
219             // all numeric types are extracted as long (int64)
220             // this supports the loose schema matching mentioned above,
221             // but does create a range issue for doubles, and requires doubles to be integer
222             case NumberDouble:
223             case NumberInt:
224             case NumberLong:
225             case NumberDecimal:
226                 metrics->emplace_back(currentElement.numberLong());
227                 break;
228 
229             case Bool:
230                 metrics->emplace_back(currentElement.Bool());
231                 break;
232 
233             case Date:
234                 metrics->emplace_back(currentElement.Date().toMillisSinceEpoch());
235                 break;
236 
237             case bsonTimestamp:
238                 // very slightly more space efficient to treat these as two separate metrics
239                 metrics->emplace_back(currentElement.timestamp().getSecs());
240                 metrics->emplace_back(currentElement.timestamp().getInc());
241                 break;
242 
243             case Object:
244             case Array: {
245                 // Maximum recursion is controlled by the documents we collect. Maximum is 5 in the
246                 // current implementation.
247                 auto sw = extractMetricsFromDocument(matches ? referenceElement.Obj() : BSONObj(),
248                                                      currentElement.Obj(),
249                                                      metrics,
250                                                      matches,
251                                                      recursion + 1);
252                 if (!sw.isOK()) {
253                     return sw;
254                 }
255                 matches = matches && sw.getValue();
256             } break;
257 
258             default:
259                 break;
260         }
261     }
262 
263     // schema mismatch if ref is longer than curr
264     if (matches && itReference.more()) {
265         LOG(4) << "full-time diagnostic data capture schema change: reference document is longer "
266                   "then current";
267         matches = false;
268     }
269 
270     return {matches};
271 }
272 
273 }  // namespace
274 
isFTDCType(BSONType type)275 bool isFTDCType(BSONType type) {
276     switch (type) {
277         case NumberDouble:
278         case NumberInt:
279         case NumberLong:
280         case NumberDecimal:
281         case Bool:
282         case Date:
283         case bsonTimestamp:
284         case Object:
285         case Array:
286             return true;
287 
288         default:
289             return false;
290     }
291 }
292 
extractMetricsFromDocument(const BSONObj & referenceDoc,const BSONObj & currentDoc,std::vector<std::uint64_t> * metrics)293 StatusWith<bool> extractMetricsFromDocument(const BSONObj& referenceDoc,
294                                             const BSONObj& currentDoc,
295                                             std::vector<std::uint64_t>* metrics) {
296     return extractMetricsFromDocument(referenceDoc, currentDoc, metrics, true, 0);
297 }
298 
299 namespace {
constructDocumentFromMetrics(const BSONObj & referenceDocument,BSONObjBuilder & builder,const std::vector<std::uint64_t> & metrics,size_t * pos,size_t recursion)300 Status constructDocumentFromMetrics(const BSONObj& referenceDocument,
301                                     BSONObjBuilder& builder,
302                                     const std::vector<std::uint64_t>& metrics,
303                                     size_t* pos,
304                                     size_t recursion) {
305     if (recursion > kMaxRecursion) {
306         return {ErrorCodes::BadValue, "Recursion limit reached."};
307     }
308 
309     BSONObjIterator iterator(referenceDocument);
310     while (iterator.more()) {
311         BSONElement currentElement = iterator.next();
312 
313         switch (currentElement.type()) {
314             case NumberDouble:
315             case NumberInt:
316             case NumberLong:
317             case NumberDecimal:
318                 if (*pos >= metrics.size()) {
319                     return Status(
320                         ErrorCodes::BadValue,
321                         "There are more metrics in the reference document then expected.");
322                 }
323 
324                 builder.append(currentElement.fieldName(),
325                                static_cast<long long int>(metrics[(*pos)++]));
326                 break;
327 
328             case Bool:
329                 if (*pos >= metrics.size()) {
330                     return Status(
331                         ErrorCodes::BadValue,
332                         "There are more metrics in the reference document then expected.");
333                 }
334 
335                 builder.append(currentElement.fieldName(), static_cast<bool>(metrics[(*pos)++]));
336                 break;
337 
338             case Date:
339                 if (*pos >= metrics.size()) {
340                     return Status(
341                         ErrorCodes::BadValue,
342                         "There are more metrics in the reference document then expected.");
343                 }
344 
345                 builder.append(
346                     currentElement.fieldName(),
347                     Date_t::fromMillisSinceEpoch(static_cast<std::uint64_t>(metrics[(*pos)++])));
348                 break;
349 
350             case bsonTimestamp: {
351                 if (*pos + 1 >= metrics.size()) {
352                     return Status(
353                         ErrorCodes::BadValue,
354                         "There are more metrics in the reference document then expected.");
355                 }
356 
357                 std::uint64_t seconds = metrics[(*pos)++];
358                 std::uint64_t increment = metrics[(*pos)++];
359                 builder.append(currentElement.fieldName(), Timestamp(seconds, increment));
360                 break;
361             }
362 
363             case Object: {
364                 BSONObjBuilder sub(builder.subobjStart(currentElement.fieldName()));
365                 auto s = constructDocumentFromMetrics(
366                     currentElement.Obj(), sub, metrics, pos, recursion + 1);
367                 if (!s.isOK()) {
368                     return s;
369                 }
370                 break;
371             }
372 
373             case Array: {
374                 BSONObjBuilder sub(builder.subarrayStart(currentElement.fieldName()));
375                 auto s = constructDocumentFromMetrics(
376                     currentElement.Obj(), sub, metrics, pos, recursion + 1);
377                 if (!s.isOK()) {
378                     return s;
379                 }
380                 break;
381             }
382 
383             default:
384                 builder.append(currentElement);
385                 break;
386         }
387     }
388 
389     return Status::OK();
390 }
391 
392 }  // namespace
393 
constructDocumentFromMetrics(const BSONObj & ref,const std::vector<std::uint64_t> & metrics)394 StatusWith<BSONObj> constructDocumentFromMetrics(const BSONObj& ref,
395                                                  const std::vector<std::uint64_t>& metrics) {
396     size_t at = 0;
397     BSONObjBuilder b;
398     Status s = constructDocumentFromMetrics(ref, b, metrics, &at, 0);
399     if (!s.isOK()) {
400         return StatusWith<BSONObj>(s);
401     }
402 
403     return b.obj();
404 }
405 
createBSONMetadataDocument(const BSONObj & metadata,Date_t date)406 BSONObj createBSONMetadataDocument(const BSONObj& metadata, Date_t date) {
407     BSONObjBuilder builder;
408     builder.appendDate(kFTDCIdField, date);
409     builder.appendNumber(kFTDCTypeField, static_cast<int>(FTDCType::kMetadata));
410     builder.appendObject(kFTDCDocField, metadata.objdata(), metadata.objsize());
411 
412     return builder.obj();
413 }
414 
createBSONMetricChunkDocument(ConstDataRange buf,Date_t date)415 BSONObj createBSONMetricChunkDocument(ConstDataRange buf, Date_t date) {
416     BSONObjBuilder builder;
417 
418     builder.appendDate(kFTDCIdField, date);
419     builder.appendNumber(kFTDCTypeField, static_cast<int>(FTDCType::kMetricChunk));
420     builder.appendBinData(kFTDCDataField, buf.length(), BinDataType::BinDataGeneral, buf.data());
421 
422     return builder.obj();
423 }
424 
getBSONDocumentId(const BSONObj & obj)425 StatusWith<Date_t> getBSONDocumentId(const BSONObj& obj) {
426     BSONElement element;
427 
428     Status status = bsonExtractTypedField(obj, kFTDCIdField, BSONType::Date, &element);
429     if (!status.isOK()) {
430         return {status};
431     }
432 
433     return {element.Date()};
434 }
435 
getBSONDocumentType(const BSONObj & obj)436 StatusWith<FTDCType> getBSONDocumentType(const BSONObj& obj) {
437     long long value;
438 
439     Status status = bsonExtractIntegerField(obj, kFTDCTypeField, &value);
440     if (!status.isOK()) {
441         return {status};
442     }
443 
444     if (static_cast<FTDCType>(value) != FTDCType::kMetricChunk &&
445         static_cast<FTDCType>(value) != FTDCType::kMetadata) {
446         return {ErrorCodes::BadValue,
447                 str::stream() << "Field '" << std::string(kFTDCTypeField)
448                               << "' is not an expected value, found '"
449                               << value
450                               << "'"};
451     }
452 
453     return {static_cast<FTDCType>(value)};
454 }
455 
getBSONDocumentFromMetadataDoc(const BSONObj & obj)456 StatusWith<BSONObj> getBSONDocumentFromMetadataDoc(const BSONObj& obj) {
457     if (kDebugBuild) {
458         auto swType = getBSONDocumentType(obj);
459         dassert(swType.isOK() && swType.getValue() == FTDCType::kMetadata);
460     }
461 
462     BSONElement element;
463 
464     Status status = bsonExtractTypedField(obj, kFTDCDocField, BSONType::Object, &element);
465     if (!status.isOK()) {
466         return {status};
467     }
468 
469     return {element.Obj()};
470 }
471 
getMetricsFromMetricDoc(const BSONObj & obj,FTDCDecompressor * decompressor)472 StatusWith<std::vector<BSONObj>> getMetricsFromMetricDoc(const BSONObj& obj,
473                                                          FTDCDecompressor* decompressor) {
474     if (kDebugBuild) {
475         auto swType = getBSONDocumentType(obj);
476         dassert(swType.isOK() && swType.getValue() == FTDCType::kMetricChunk);
477     }
478 
479     BSONElement element;
480 
481     Status status = bsonExtractTypedField(obj, kFTDCDataField, BSONType::BinData, &element);
482     if (!status.isOK()) {
483         return {status};
484     }
485 
486     int length;
487     const char* buffer = element.binData(length);
488     if (length < 0) {
489         return {ErrorCodes::BadValue,
490                 str::stream() << "Field " << std::string(kFTDCTypeField) << " is not a BinData."};
491     }
492 
493     return decompressor->uncompress({buffer, static_cast<std::size_t>(length)});
494 }
495 
496 }  // namespace FTDCBSONUtil
497 
498 }  // namespace mongo
499