1 //  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
2 //  This source code is licensed under both the GPLv2 (found in the
3 //  COPYING file in the root directory) and Apache 2.0 License
4 //  (found in the LICENSE.Apache file in the root directory).
5 //
6 // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
7 // Use of this source code is governed by a BSD-style license that can be
8 // found in the LICENSE file. See the AUTHORS file for names of contributors.
9 
10 #ifndef ROCKSDB_LITE
11 
12 #include <stdlib.h>
13 #include <algorithm>
14 #include <map>
15 #include <string>
16 #include <vector>
17 #include "db/db_impl/db_impl.h"
18 #include "db/db_test_util.h"
19 #include "db/version_set.h"
20 #include "db/write_batch_internal.h"
21 #include "file/filename.h"
22 #include "port/stack_trace.h"
23 #include "rocksdb/db.h"
24 #include "rocksdb/env.h"
25 #include "rocksdb/transaction_log.h"
26 #include "test_util/sync_point.h"
27 #include "test_util/testharness.h"
28 #include "test_util/testutil.h"
29 #include "util/string_util.h"
30 
31 
32 namespace ROCKSDB_NAMESPACE {
33 
34 class ObsoleteFilesTest : public DBTestBase {
35  public:
ObsoleteFilesTest()36   ObsoleteFilesTest()
37       : DBTestBase("/obsolete_files_test", /*env_do_fsync=*/true),
38         wal_dir_(dbname_ + "/wal_files") {}
39 
AddKeys(int numkeys,int startkey)40   void AddKeys(int numkeys, int startkey) {
41     WriteOptions options;
42     options.sync = false;
43     for (int i = startkey; i < (numkeys + startkey) ; i++) {
44       std::string temp = ToString(i);
45       Slice key(temp);
46       Slice value(temp);
47       ASSERT_OK(db_->Put(options, key, value));
48     }
49   }
50 
createLevel0Files(int numFiles,int numKeysPerFile)51   void createLevel0Files(int numFiles, int numKeysPerFile) {
52     int startKey = 0;
53     for (int i = 0; i < numFiles; i++) {
54       AddKeys(numKeysPerFile, startKey);
55       startKey += numKeysPerFile;
56       ASSERT_OK(dbfull()->TEST_FlushMemTable());
57       ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());
58     }
59   }
60 
CheckFileTypeCounts(const std::string & dir,int required_log,int required_sst,int required_manifest)61   void CheckFileTypeCounts(const std::string& dir, int required_log,
62                            int required_sst, int required_manifest) {
63     std::vector<std::string> filenames;
64     ASSERT_OK(env_->GetChildren(dir, &filenames));
65 
66     int log_cnt = 0;
67     int sst_cnt = 0;
68     int manifest_cnt = 0;
69     for (auto file : filenames) {
70       uint64_t number;
71       FileType type;
72       if (ParseFileName(file, &number, &type)) {
73         log_cnt += (type == kWalFile);
74         sst_cnt += (type == kTableFile);
75         manifest_cnt += (type == kDescriptorFile);
76       }
77     }
78     ASSERT_EQ(required_log, log_cnt);
79     ASSERT_EQ(required_sst, sst_cnt);
80     ASSERT_EQ(required_manifest, manifest_cnt);
81   }
82 
ReopenDB()83   void ReopenDB() {
84     Options options = CurrentOptions();
85     // Trigger compaction when the number of level 0 files reaches 2.
86     options.create_if_missing = true;
87     options.level0_file_num_compaction_trigger = 2;
88     options.disable_auto_compactions = false;
89     options.delete_obsolete_files_period_micros = 0;  // always do full purge
90     options.enable_thread_tracking = true;
91     options.write_buffer_size = 1024 * 1024 * 1000;
92     options.target_file_size_base = 1024 * 1024 * 1000;
93     options.max_bytes_for_level_base = 1024 * 1024 * 1000;
94     options.WAL_ttl_seconds = 300;     // Used to test log files
95     options.WAL_size_limit_MB = 1024;  // Used to test log files
96     options.wal_dir = wal_dir_;
97 
98     // Note: the following prevents an otherwise harmless data race between the
99     // test setup code (AddBlobFile) in ObsoleteFilesTest.BlobFiles and the
100     // periodic stat dumping thread.
101     options.stats_dump_period_sec = 0;
102 
103     Destroy(options);
104     Reopen(options);
105   }
106 
107   const std::string wal_dir_;
108 };
109 
TEST_F(ObsoleteFilesTest,RaceForObsoleteFileDeletion)110 TEST_F(ObsoleteFilesTest, RaceForObsoleteFileDeletion) {
111   ReopenDB();
112   SyncPoint::GetInstance()->DisableProcessing();
113   SyncPoint::GetInstance()->LoadDependency({
114       {"DBImpl::BackgroundCallCompaction:FoundObsoleteFiles",
115        "ObsoleteFilesTest::RaceForObsoleteFileDeletion:1"},
116       {"DBImpl::BackgroundCallCompaction:PurgedObsoleteFiles",
117        "ObsoleteFilesTest::RaceForObsoleteFileDeletion:2"},
118       });
119   SyncPoint::GetInstance()->SetCallBack(
120       "DBImpl::DeleteObsoleteFileImpl:AfterDeletion", [&](void* arg) {
121         Status* p_status = reinterpret_cast<Status*>(arg);
122         ASSERT_OK(*p_status);
123       });
124   SyncPoint::GetInstance()->SetCallBack(
125       "DBImpl::CloseHelper:PendingPurgeFinished", [&](void* arg) {
126         std::unordered_set<uint64_t>* files_grabbed_for_purge_ptr =
127             reinterpret_cast<std::unordered_set<uint64_t>*>(arg);
128         ASSERT_TRUE(files_grabbed_for_purge_ptr->empty());
129       });
130   SyncPoint::GetInstance()->EnableProcessing();
131 
132   createLevel0Files(2, 50000);
133   CheckFileTypeCounts(wal_dir_, 1, 0, 0);
134 
135   port::Thread user_thread([this]() {
136     JobContext jobCxt(0);
137     TEST_SYNC_POINT("ObsoleteFilesTest::RaceForObsoleteFileDeletion:1");
138     dbfull()->TEST_LockMutex();
139     dbfull()->FindObsoleteFiles(&jobCxt, true /* force=true */,
140                                 false /* no_full_scan=false */);
141     dbfull()->TEST_UnlockMutex();
142     TEST_SYNC_POINT("ObsoleteFilesTest::RaceForObsoleteFileDeletion:2");
143     dbfull()->PurgeObsoleteFiles(jobCxt);
144     jobCxt.Clean();
145   });
146 
147   user_thread.join();
148 }
149 
TEST_F(ObsoleteFilesTest,DeleteObsoleteOptionsFile)150 TEST_F(ObsoleteFilesTest, DeleteObsoleteOptionsFile) {
151   ReopenDB();
152   SyncPoint::GetInstance()->DisableProcessing();
153   std::vector<uint64_t> optsfiles_nums;
154   std::vector<bool> optsfiles_keep;
155   SyncPoint::GetInstance()->SetCallBack(
156       "DBImpl::PurgeObsoleteFiles:CheckOptionsFiles:1", [&](void* arg) {
157         optsfiles_nums.push_back(*reinterpret_cast<uint64_t*>(arg));
158       });
159   SyncPoint::GetInstance()->SetCallBack(
160       "DBImpl::PurgeObsoleteFiles:CheckOptionsFiles:2", [&](void* arg) {
161         optsfiles_keep.push_back(*reinterpret_cast<bool*>(arg));
162       });
163   SyncPoint::GetInstance()->EnableProcessing();
164 
165   createLevel0Files(2, 50000);
166   CheckFileTypeCounts(wal_dir_, 1, 0, 0);
167 
168   ASSERT_OK(dbfull()->DisableFileDeletions());
169   for (int i = 0; i != 4; ++i) {
170     if (i % 2) {
171       ASSERT_OK(dbfull()->SetOptions(dbfull()->DefaultColumnFamily(),
172                                      {{"paranoid_file_checks", "false"}}));
173     } else {
174       ASSERT_OK(dbfull()->SetOptions(dbfull()->DefaultColumnFamily(),
175                                      {{"paranoid_file_checks", "true"}}));
176     }
177   }
178   ASSERT_OK(dbfull()->EnableFileDeletions(true /* force */));
179   ASSERT_EQ(optsfiles_nums.size(), optsfiles_keep.size());
180 
181   Close();
182 
183   std::vector<std::string> files;
184   int opts_file_count = 0;
185   ASSERT_OK(env_->GetChildren(dbname_, &files));
186   for (const auto& file : files) {
187     uint64_t file_num;
188     Slice dummy_info_log_name_prefix;
189     FileType type;
190     WalFileType log_type;
191     if (ParseFileName(file, &file_num, dummy_info_log_name_prefix, &type,
192                       &log_type) &&
193         type == kOptionsFile) {
194       opts_file_count++;
195     }
196   }
197   ASSERT_EQ(2, opts_file_count);
198 }
199 
TEST_F(ObsoleteFilesTest,BlobFiles)200 TEST_F(ObsoleteFilesTest, BlobFiles) {
201   ReopenDB();
202 
203   VersionSet* const versions = dbfull()->TEST_GetVersionSet();
204   assert(versions);
205   assert(versions->GetColumnFamilySet());
206 
207   ColumnFamilyData* const cfd = versions->GetColumnFamilySet()->GetDefault();
208   assert(cfd);
209 
210   const ImmutableCFOptions* const ioptions = cfd->ioptions();
211   assert(ioptions);
212   assert(!ioptions->cf_paths.empty());
213 
214   const std::string& path = ioptions->cf_paths.front().path;
215 
216   // Add an obsolete blob file.
217   constexpr uint64_t first_blob_file_number = 234;
218   versions->AddObsoleteBlobFile(first_blob_file_number, path);
219 
220   // Add a live blob file.
221   Version* const version = cfd->current();
222   assert(version);
223 
224   VersionStorageInfo* const storage_info = version->storage_info();
225   assert(storage_info);
226 
227   constexpr uint64_t second_blob_file_number = 456;
228   constexpr uint64_t second_total_blob_count = 100;
229   constexpr uint64_t second_total_blob_bytes = 2000000;
230   constexpr char second_checksum_method[] = "CRC32B";
231   constexpr char second_checksum_value[] = "6dbdf23a";
232 
233   auto shared_meta = SharedBlobFileMetaData::Create(
234       second_blob_file_number, second_total_blob_count, second_total_blob_bytes,
235       second_checksum_method, second_checksum_value);
236 
237   constexpr uint64_t second_garbage_blob_count = 0;
238   constexpr uint64_t second_garbage_blob_bytes = 0;
239 
240   auto meta = BlobFileMetaData::Create(
241       std::move(shared_meta), BlobFileMetaData::LinkedSsts(),
242       second_garbage_blob_count, second_garbage_blob_bytes);
243 
244   storage_info->AddBlobFile(std::move(meta));
245 
246   // Check for obsolete files and make sure the first blob file is picked up
247   // and grabbed for purge. The second blob file should be on the live list.
248   constexpr int job_id = 0;
249   JobContext job_context{job_id};
250 
251   dbfull()->TEST_LockMutex();
252   constexpr bool force_full_scan = false;
253   dbfull()->FindObsoleteFiles(&job_context, force_full_scan);
254   dbfull()->TEST_UnlockMutex();
255 
256   ASSERT_TRUE(job_context.HaveSomethingToDelete());
257   ASSERT_EQ(job_context.blob_delete_files.size(), 1);
258   ASSERT_EQ(job_context.blob_delete_files[0].GetBlobFileNumber(),
259             first_blob_file_number);
260 
261   const auto& files_grabbed_for_purge =
262       dbfull()->TEST_GetFilesGrabbedForPurge();
263   ASSERT_NE(files_grabbed_for_purge.find(first_blob_file_number),
264             files_grabbed_for_purge.end());
265 
266   ASSERT_EQ(job_context.blob_live.size(), 1);
267   ASSERT_EQ(job_context.blob_live[0], second_blob_file_number);
268 
269   // Hack the job context a bit by adding a few files to the full scan
270   // list and adjusting the pending file number. We add the two files
271   // above as well as two additional ones, where one is old
272   // and should be cleaned up, and the other is still pending.
273   constexpr uint64_t old_blob_file_number = 123;
274   constexpr uint64_t pending_blob_file_number = 567;
275 
276   job_context.full_scan_candidate_files.emplace_back(
277       BlobFileName(old_blob_file_number), path);
278   job_context.full_scan_candidate_files.emplace_back(
279       BlobFileName(first_blob_file_number), path);
280   job_context.full_scan_candidate_files.emplace_back(
281       BlobFileName(second_blob_file_number), path);
282   job_context.full_scan_candidate_files.emplace_back(
283       BlobFileName(pending_blob_file_number), path);
284 
285   job_context.min_pending_output = pending_blob_file_number;
286 
287   // Purge obsolete files and make sure we purge the old file and the first file
288   // (and keep the second file and the pending file).
289   std::vector<std::string> deleted_files;
290   SyncPoint::GetInstance()->SetCallBack(
291       "DBImpl::DeleteObsoleteFileImpl::BeforeDeletion", [&](void* arg) {
292         const std::string* file = static_cast<std::string*>(arg);
293         assert(file);
294 
295         constexpr char blob_extension[] = ".blob";
296 
297         if (file->find(blob_extension) != std::string::npos) {
298           deleted_files.emplace_back(*file);
299         }
300       });
301   SyncPoint::GetInstance()->EnableProcessing();
302 
303   dbfull()->PurgeObsoleteFiles(job_context);
304   job_context.Clean();
305 
306   SyncPoint::GetInstance()->DisableProcessing();
307   SyncPoint::GetInstance()->ClearAllCallBacks();
308 
309   ASSERT_EQ(files_grabbed_for_purge.find(first_blob_file_number),
310             files_grabbed_for_purge.end());
311 
312   std::sort(deleted_files.begin(), deleted_files.end());
313   const std::vector<std::string> expected_deleted_files{
314       BlobFileName(path, old_blob_file_number),
315       BlobFileName(path, first_blob_file_number)};
316 
317   ASSERT_EQ(deleted_files, expected_deleted_files);
318 }
319 
320 }  // namespace ROCKSDB_NAMESPACE
321 
322 #ifdef ROCKSDB_UNITTESTS_WITH_CUSTOM_OBJECTS_FROM_STATIC_LIBS
323 extern "C" {
324 void RegisterCustomObjects(int argc, char** argv);
325 }
326 #else
RegisterCustomObjects(int,char **)327 void RegisterCustomObjects(int /*argc*/, char** /*argv*/) {}
328 #endif  // !ROCKSDB_UNITTESTS_WITH_CUSTOM_OBJECTS_FROM_STATIC_LIBS
329 
main(int argc,char ** argv)330 int main(int argc, char** argv) {
331   ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
332   ::testing::InitGoogleTest(&argc, argv);
333   RegisterCustomObjects(argc, argv);
334   return RUN_ALL_TESTS();
335 }
336 
337 #else
338 #include <stdio.h>
339 
main(int,char **)340 int main(int /*argc*/, char** /*argv*/) {
341   fprintf(stderr,
342           "SKIPPED as DBImpl::DeleteFile is not supported in ROCKSDB_LITE\n");
343   return 0;
344 }
345 
346 #endif  // !ROCKSDB_LITE
347