1 
2 /**
3  *    Copyright (C) 2018-present MongoDB, Inc.
4  *
5  *    This program is free software: you can redistribute it and/or modify
6  *    it under the terms of the Server Side Public License, version 1,
7  *    as published by MongoDB, Inc.
8  *
9  *    This program is distributed in the hope that it will be useful,
10  *    but WITHOUT ANY WARRANTY; without even the implied warranty of
11  *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  *    Server Side Public License for more details.
13  *
14  *    You should have received a copy of the Server Side Public License
15  *    along with this program. If not, see
16  *    <http://www.mongodb.com/licensing/server-side-public-license>.
17  *
18  *    As a special exception, the copyright holders give permission to link the
19  *    code of portions of this program with the OpenSSL library under certain
20  *    conditions as described in each individual source file and distribute
21  *    linked combinations including the program with the OpenSSL library. You
22  *    must comply with the Server Side Public License in all respects for
23  *    all of the code used other than as permitted herein. If you modify file(s)
24  *    with this exception, you may extend this exception to your version of the
25  *    file(s), but you are not obligated to do so. If you do not wish to do so,
26  *    delete this exception statement from your version. If you delete this
27  *    exception statement from all source files in the program, then also delete
28  *    it in the license file.
29  */
30 
31 #include "mongo/platform/basic.h"
32 
33 #include <limits>
34 #include <random>
35 
36 #include "mongo/base/status_with.h"
37 #include "mongo/bson/bsonmisc.h"
38 #include "mongo/bson/bsonobjbuilder.h"
39 #include "mongo/db/ftdc/compressor.h"
40 #include "mongo/db/ftdc/config.h"
41 #include "mongo/db/ftdc/decompressor.h"
42 #include "mongo/db/ftdc/ftdc_test.h"
43 #include "mongo/db/jsobj.h"
44 #include "mongo/unittest/unittest.h"
45 #include "mongo/util/assert_util.h"
46 
47 namespace mongo {
48 
49 #define ASSERT_HAS_SPACE(st) \
50     ASSERT_TRUE(st.isOK());  \
51     ASSERT_FALSE(st.getValue().is_initialized());
52 
53 #define ASSERT_SCHEMA_CHANGED(st)                   \
54     ASSERT_TRUE(st.isOK());                         \
55     ASSERT_TRUE(st.getValue().is_initialized());    \
56     ASSERT_TRUE(std::get<1>(st.getValue().get()) == \
57                 FTDCCompressor::CompressorState::kSchemaChanged);
58 
59 #define ASSERT_FULL(st)                             \
60     ASSERT_TRUE(st.isOK());                         \
61     ASSERT_TRUE(st.getValue().is_initialized());    \
62     ASSERT_TRUE(std::get<1>(st.getValue().get()) == \
63                 FTDCCompressor::CompressorState::kCompressorFull);
64 
65 // Sanity check
TEST(FTDCCompressor,TestBasic)66 TEST(FTDCCompressor, TestBasic) {
67     FTDCConfig config;
68     FTDCCompressor c(&config);
69 
70     auto st = c.addSample(BSON("name"
71                                << "joe"
72                                << "key1"
73                                << 33
74                                << "key2"
75                                << 42),
76                           Date_t());
77     ASSERT_HAS_SPACE(st);
78     st = c.addSample(BSON("name"
79                           << "joe"
80                           << "key1"
81                           << 34
82                           << "key2"
83                           << 45),
84                      Date_t());
85     ASSERT_HAS_SPACE(st);
86 
87 
88     StatusWith<std::tuple<ConstDataRange, Date_t>> swBuf = c.getCompressedSamples();
89 
90     ASSERT_TRUE(swBuf.isOK());
91     ASSERT_TRUE(std::get<0>(swBuf.getValue()).length() > 0);
92     ASSERT_TRUE(std::get<0>(swBuf.getValue()).data() != nullptr);
93 }
94 
95 // Test strings only
TEST(FTDCCompressor,TestStrings)96 TEST(FTDCCompressor, TestStrings) {
97     FTDCConfig config;
98     FTDCCompressor c(&config);
99 
100     auto st = c.addSample(BSON("name"
101                                << "joe"
102                                << "key1"
103                                << "value1"
104                                << "key2"
105                                << "value2"),
106                           Date_t());
107     ASSERT_HAS_SPACE(st);
108     st = c.addSample(BSON("name"
109                           << "joe"
110                           << "key1"
111                           << "value3"
112                           << "key2"
113                           << "value6"),
114                      Date_t());
115     ASSERT_HAS_SPACE(st);
116 
117     StatusWith<std::tuple<ConstDataRange, Date_t>> swBuf = c.getCompressedSamples();
118 
119     ASSERT_TRUE(swBuf.isOK());
120     ASSERT_TRUE(std::get<0>(swBuf.getValue()).length() > 0);
121     ASSERT_TRUE(std::get<0>(swBuf.getValue()).data() != nullptr);
122 }
123 
124 /**
125  * Test class that records a series of samples and ensures that compress + decompress round trips
126  * them correctly.
127  */
128 class TestTie {
129 public:
TestTie(FTDCValidationMode mode=FTDCValidationMode::kStrict)130     TestTie(FTDCValidationMode mode = FTDCValidationMode::kStrict)
131         : _compressor(&_config), _mode(mode) {}
132 
~TestTie()133     ~TestTie() {
134         validate(boost::none);
135     }
136 
137     StatusWith<boost::optional<std::tuple<ConstDataRange, FTDCCompressor::CompressorState, Date_t>>>
addSample(const BSONObj & sample)138     addSample(const BSONObj& sample) {
139         auto st = _compressor.addSample(sample, Date_t());
140 
141         if (!st.getValue().is_initialized()) {
142             _docs.emplace_back(sample);
143         } else if (std::get<1>(st.getValue().get()) ==
144                    FTDCCompressor::CompressorState::kSchemaChanged) {
145             validate(std::get<0>(st.getValue().get()));
146             _docs.clear();
147             _docs.emplace_back(sample);
148         } else if (std::get<1>(st.getValue().get()) ==
149                    FTDCCompressor::CompressorState::kCompressorFull) {
150             _docs.emplace_back(sample);
151             validate(std::get<0>(st.getValue().get()));
152             _docs.clear();
153         } else {
154             MONGO_UNREACHABLE;
155         }
156 
157         return st;
158     }
159 
validate(boost::optional<ConstDataRange> cdr)160     void validate(boost::optional<ConstDataRange> cdr) {
161         std::vector<BSONObj> list;
162         if (cdr.is_initialized()) {
163             auto sw = _decompressor.uncompress(cdr.get());
164             ASSERT_TRUE(sw.isOK());
165             list = sw.getValue();
166         } else {
167             auto swBuf = _compressor.getCompressedSamples();
168             ASSERT_TRUE(swBuf.isOK());
169             auto sw = _decompressor.uncompress(std::get<0>(swBuf.getValue()));
170             ASSERT_TRUE(sw.isOK());
171 
172             list = sw.getValue();
173         }
174 
175         ValidateDocumentList(list, _docs, _mode);
176     }
177 
178 private:
179     std::vector<BSONObj> _docs;
180     FTDCConfig _config;
181     FTDCCompressor _compressor;
182     FTDCDecompressor _decompressor;
183     FTDCValidationMode _mode;
184 };
185 
186 // Test various schema changes
TEST(FTDCCompressor,TestSchemaChanges)187 TEST(FTDCCompressor, TestSchemaChanges) {
188     TestTie c;
189 
190     auto st = c.addSample(BSON("name"
191                                << "joe"
192                                << "key1"
193                                << 33
194                                << "key2"
195                                << 42));
196     ASSERT_HAS_SPACE(st);
197     st = c.addSample(BSON("name"
198                           << "joe"
199                           << "key1"
200                           << 34
201                           << "key2"
202                           << 45));
203     ASSERT_HAS_SPACE(st);
204     st = c.addSample(BSON("name"
205                           << "joe"
206                           << "key1"
207                           << 34
208                           << "key2"
209                           << 45));
210     ASSERT_HAS_SPACE(st);
211 
212     // Add Field
213     st = c.addSample(BSON("name"
214                           << "joe"
215                           << "key1"
216                           << 34
217                           << "key2"
218                           << 45
219                           << "key3"
220                           << 47));
221     ASSERT_SCHEMA_CHANGED(st);
222 
223     st = c.addSample(BSON("name"
224                           << "joe"
225                           << "key1"
226                           << 34
227                           << "key2"
228                           << 45
229                           << "key3"
230                           << 47));
231     ASSERT_HAS_SPACE(st);
232 
233     // Rename field
234     st = c.addSample(BSON("name"
235                           << "joe"
236                           << "key1"
237                           << 34
238                           << "key5"
239                           << 45
240                           << "key3"
241                           << 47));
242     ASSERT_SCHEMA_CHANGED(st);
243 
244     // Change type
245     st = c.addSample(BSON("name"
246                           << "joe"
247                           << "key1"
248                           << 34
249                           << "key5"
250                           << "45"
251                           << "key3"
252                           << 47));
253     ASSERT_SCHEMA_CHANGED(st);
254 
255     // Add Field
256     st = c.addSample(BSON("name"
257                           << "joe"
258                           << "key1"
259                           << 34
260                           << "key2"
261                           << 45
262                           << "key3"
263                           << 47
264                           << "key7"
265                           << 34
266                           << "key9"
267                           << 45
268                           << "key13"
269                           << 47));
270     ASSERT_SCHEMA_CHANGED(st);
271 
272     // Remove Field
273     st = c.addSample(BSON("name"
274                           << "joe"
275                           << "key7"
276                           << 34
277                           << "key9"
278                           << 45
279                           << "key13"
280                           << 47));
281     ASSERT_SCHEMA_CHANGED(st);
282 
283     st = c.addSample(BSON("name"
284                           << "joe"
285                           << "key7"
286                           << 34
287                           << "key9"
288                           << 45
289                           << "key13"
290                           << 47));
291     ASSERT_HAS_SPACE(st);
292 
293     // Start new batch
294     st = c.addSample(BSON("name"
295                           << "joe"
296                           << "key7"
297                           << 5));
298     ASSERT_SCHEMA_CHANGED(st);
299 
300     // Change field to object
301     st = c.addSample(BSON("name"
302                           << "joe"
303                           << "key7"
304                           << BSON(  // nested object
305                                  "a" << 1)));
306     ASSERT_SCHEMA_CHANGED(st);
307 
308     // Change field from object to number
309     st = c.addSample(BSON("name"
310                           << "joe"
311                           << "key7"
312                           << 7));
313     ASSERT_SCHEMA_CHANGED(st);
314 
315     // Change field from number to array
316     st = c.addSample(BSON("name"
317                           << "joe"
318                           << "key7"
319                           << BSON_ARRAY(13 << 17)));
320     ASSERT_SCHEMA_CHANGED(st);
321 
322     // Change field from array to number
323     st = c.addSample(BSON("name"
324                           << "joe"
325                           << "key7"
326                           << 19));
327     ASSERT_SCHEMA_CHANGED(st);
328 
329 
330     // New Schema
331     st = c.addSample(BSON("_id" << 1));
332     ASSERT_SCHEMA_CHANGED(st);
333 
334     // Change field to oid
335     st = c.addSample(BSON(GENOID));
336     ASSERT_SCHEMA_CHANGED(st);
337 
338     // Change field from oid to object
339     st = c.addSample(BSON("_id" << BSON("sub1" << 1)));
340     ASSERT_SCHEMA_CHANGED(st);
341 
342     // Change field from object to oid
343     st = c.addSample(BSON(GENOID));
344     ASSERT_SCHEMA_CHANGED(st);
345 }
346 
347 // Test various schema changes with strings
TEST(FTDCCompressorTest,TestStringSchemaChanges)348 TEST(FTDCCompressorTest, TestStringSchemaChanges) {
349     TestTie c(FTDCValidationMode::kWeak);
350 
351     auto st = c.addSample(BSON("str1"
352                                << "joe"
353                                << "int1"
354                                << 42));
355     ASSERT_HAS_SPACE(st);
356     st = c.addSample(BSON("str1"
357                           << "joe"
358                           << "int1"
359                           << 45));
360     ASSERT_HAS_SPACE(st);
361 
362     // Add string field
363     st = c.addSample(BSON("str1"
364                           << "joe"
365                           << "str2"
366                           << "smith"
367                           << "int1"
368                           << 47));
369     ASSERT_HAS_SPACE(st);
370 
371     // Reset schema by renaming a int field
372     st = c.addSample(BSON("str1"
373                           << "joe"
374                           << "str2"
375                           << "smith"
376                           << "int2"
377                           << 48));
378     ASSERT_SCHEMA_CHANGED(st);
379 
380     // Remove string field
381     st = c.addSample(BSON("str1"
382                           << "joe"
383                           << "int2"
384                           << 49));
385     ASSERT_HAS_SPACE(st);
386 
387 
388     // Add string field as last element
389     st = c.addSample(BSON("str1"
390                           << "joe"
391                           << "int2"
392                           << 50
393                           << "str3"
394                           << "bar"));
395     ASSERT_HAS_SPACE(st);
396 
397     // Reset schema by renaming a int field
398     st = c.addSample(BSON("str1"
399                           << "joe"
400                           << "int1"
401                           << 51
402                           << "str3"
403                           << "bar"));
404     ASSERT_SCHEMA_CHANGED(st);
405 
406     // Remove string field as last element
407     st = c.addSample(BSON("str1"
408                           << "joe"
409                           << "int1"
410                           << 52));
411     ASSERT_HAS_SPACE(st);
412 
413 
414     // Add 2 string fields
415     st = c.addSample(BSON("str1"
416                           << "joe"
417                           << "str2"
418                           << "smith"
419                           << "str3"
420                           << "foo"
421                           << "int1"
422                           << 53));
423     ASSERT_HAS_SPACE(st);
424 
425     // Reset schema by renaming a int field
426     st = c.addSample(BSON("str1"
427                           << "joe"
428                           << "str2"
429                           << "smith"
430                           << "str3"
431                           << "foo"
432                           << "int2"
433                           << 54));
434     ASSERT_SCHEMA_CHANGED(st);
435 
436     // Remove 2 string fields
437     st = c.addSample(BSON("str1"
438                           << "joe"
439                           << "int2"
440                           << 55));
441     ASSERT_HAS_SPACE(st);
442 
443     // Change string to number
444     st = c.addSample(BSON("str1" << 12 << "int1" << 56));
445     ASSERT_SCHEMA_CHANGED(st);
446 
447     // Change number to string
448     st = c.addSample(BSON("str1"
449                           << "joe"
450                           << "int1"
451                           << 67));
452     ASSERT_SCHEMA_CHANGED(st);
453 }
454 
455 // Ensure changing between the various number formats is considered compatible
TEST(FTDCCompressor,TestNumbersCompat)456 TEST(FTDCCompressor, TestNumbersCompat) {
457     TestTie c;
458 
459     auto st = c.addSample(BSON("name"
460                                << "joe"
461                                << "key1"
462                                << 33
463                                << "key2"
464                                << 42LL));
465     ASSERT_HAS_SPACE(st);
466     st = c.addSample(BSON("name"
467                           << "joe"
468                           << "key1"
469                           << 34LL
470                           << "key2"
471                           << 45.0f));
472     ASSERT_HAS_SPACE(st);
473     st = c.addSample(BSON("name"
474                           << "joe"
475                           << "key1"
476                           << static_cast<char>(32)
477                           << "key2"
478                           << 45.0F));
479     ASSERT_HAS_SPACE(st);
480 }
481 
482 // Test various date time types
TEST(FTDCCompressor,TestDateTimeTypes)483 TEST(FTDCCompressor, TestDateTimeTypes) {
484     TestTie c;
485     for (int i = 0; i < 10; i++) {
486         BSONObjBuilder builder1;
487         builder1.append("ts", Timestamp(0x556677LL + i * 1356, 0x11223344LL + i * 2396));
488         builder1.append("d1", Date_t::fromMillisSinceEpoch((0x556677LL + i * 1356) / 1000));
489         BSONObj obj = builder1.obj().getOwned();
490 
491         auto st = c.addSample(obj);
492         ASSERT_HAS_SPACE(st);
493     }
494 }
495 
496 // Test all types
TEST(FTDCCompressor,Types)497 TEST(FTDCCompressor, Types) {
498     TestTie c;
499 
500     auto st = c.addSample(BSON("name"
501                                << "joe"
502                                << "key1"
503                                << 33
504                                << "key2"
505                                << 42LL));
506     ASSERT_HAS_SPACE(st);
507 
508     const char bytes[] = {0x1, 0x2, 0x3};
509     BSONObj o = BSON("created" << DATENOW  // date_t
510                                << "null"
511                                << BSONNULL  // { a : null }
512                                << "undefined"
513                                << BSONUndefined  // { a : undefined }
514                                << "obj"
515                                << BSON(  // nested object
516                                       "a"
517                                       << "abc"
518                                       << "b"
519                                       << 123LL)
520                                << "foo"
521                                << BSON_ARRAY("bar"
522                                              << "baz"
523                                              << "qux")  // array of strings
524                                << "foo2"
525                                << BSON_ARRAY(5 << 6 << 7)  // array of ints
526                                << "bindata"
527                                << BSONBinData(&bytes[0], 3, bdtCustom)  // bindata
528                                << "oid"
529                                << OID("010203040506070809101112")  // oid
530                                << "bool"
531                                << true  // bool
532                                << "regex"
533                                << BSONRegEx("mongodb")  // regex
534                                << "ref"
535                                << BSONDBRef("c", OID("010203040506070809101112"))  // ref
536                                << "code"
537                                << BSONCode("func f() { return 1; }")  // code
538                                << "codewscope"
539                                << BSONCodeWScope("func f() { return 1; }",
540                                                  BSON("c" << true))  // codew
541                                << "minkey"
542                                << MINKEY  // minkey
543                                << "maxkey"
544                                << MAXKEY  // maxkey
545                      );
546 
547     st = c.addSample(o);
548     ASSERT_SCHEMA_CHANGED(st);
549 
550     st = c.addSample(o);
551     ASSERT_HAS_SPACE(st);
552 
553     st = c.addSample(BSON("name"
554                           << "joe"
555                           << "key1"
556                           << 34LL
557                           << "key2"
558                           << 45.0f));
559     ASSERT_SCHEMA_CHANGED(st);
560     st = c.addSample(BSON("name"
561                           << "joe"
562                           << "key1"
563                           << static_cast<char>(32)
564                           << "key2"
565                           << 45.0F));
566     ASSERT_HAS_SPACE(st);
567 }
568 
569 // Test a full buffer
TEST(FTDCCompressor,TestFull)570 TEST(FTDCCompressor, TestFull) {
571     // Test a large numbers of zeros, and incremental numbers in a full buffer
572     for (int j = 0; j < 2; j++) {
573         TestTie c;
574 
575         auto st = c.addSample(BSON("name"
576                                    << "joe"
577                                    << "key1"
578                                    << 33
579                                    << "key2"
580                                    << 42));
581         ASSERT_HAS_SPACE(st);
582 
583         for (size_t i = 0; i != FTDCConfig::kMaxSamplesPerArchiveMetricChunkDefault - 2; i++) {
584             st = c.addSample(BSON("name"
585                                   << "joe"
586                                   << "key1"
587                                   << static_cast<long long int>(i * j)
588                                   << "key2"
589                                   << 45));
590             ASSERT_HAS_SPACE(st);
591         }
592 
593         st = c.addSample(BSON("name"
594                               << "joe"
595                               << "key1"
596                               << 34
597                               << "key2"
598                               << 45));
599         ASSERT_FULL(st);
600 
601         // Add Value
602         st = c.addSample(BSON("name"
603                               << "joe"
604                               << "key1"
605                               << 34
606                               << "key2"
607                               << 45));
608         ASSERT_HAS_SPACE(st);
609     }
610 }
611 
612 template <typename T>
generateSample(std::random_device & rd,T generator,size_t count)613 BSONObj generateSample(std::random_device& rd, T generator, size_t count) {
614     BSONObjBuilder builder;
615 
616     for (size_t i = 0; i < count; ++i) {
617         builder.append("key", generator(rd));
618     }
619 
620     return builder.obj();
621 }
622 
623 // Test many metrics
TEST(ZFTDCCompressor,TestManyMetrics)624 TEST(ZFTDCCompressor, TestManyMetrics) {
625     std::random_device rd;
626     std::mt19937 gen(rd());
627 
628     std::uniform_int_distribution<long long> genValues(1, std::numeric_limits<long long>::max());
629     const size_t metrics = 1000;
630 
631     // Test a large numbers of zeros, and incremental numbers in a full buffer
632     for (int j = 0; j < 2; j++) {
633         TestTie c;
634 
635         auto st = c.addSample(generateSample(rd, genValues, metrics));
636         ASSERT_HAS_SPACE(st);
637 
638         for (size_t i = 0; i != FTDCConfig::kMaxSamplesPerArchiveMetricChunkDefault - 2; i++) {
639             st = c.addSample(generateSample(rd, genValues, metrics));
640             ASSERT_HAS_SPACE(st);
641         }
642 
643         st = c.addSample(generateSample(rd, genValues, metrics));
644         ASSERT_FULL(st);
645 
646         // Add Value
647         st = c.addSample(generateSample(rd, genValues, metrics));
648         ASSERT_HAS_SPACE(st);
649     }
650 }
651 
652 }  // namespace mongo
653