1 //  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
2 //  This source code is licensed under both the GPLv2 (found in the
3 //  COPYING file in the root directory) and Apache 2.0 License
4 //  (found in the LICENSE.Apache file in the root directory).
5 //
6 // Copyright (c) 2012 Facebook.
7 // Use of this source code is governed by a BSD-style license that can be
8 // found in the LICENSE file.
9 
10 #ifndef ROCKSDB_LITE
11 
12 #include "utilities/checkpoint/checkpoint_impl.h"
13 
14 #include <algorithm>
15 #include <cinttypes>
16 #include <string>
17 #include <tuple>
18 #include <unordered_set>
19 #include <vector>
20 
21 #include "db/wal_manager.h"
22 #include "file/file_util.h"
23 #include "file/filename.h"
24 #include "logging/logging.h"
25 #include "port/port.h"
26 #include "rocksdb/db.h"
27 #include "rocksdb/env.h"
28 #include "rocksdb/metadata.h"
29 #include "rocksdb/options.h"
30 #include "rocksdb/transaction_log.h"
31 #include "rocksdb/types.h"
32 #include "rocksdb/utilities/checkpoint.h"
33 #include "test_util/sync_point.h"
34 #include "util/cast_util.h"
35 #include "util/file_checksum_helper.h"
36 
37 namespace ROCKSDB_NAMESPACE {
38 
Create(DB * db,Checkpoint ** checkpoint_ptr)39 Status Checkpoint::Create(DB* db, Checkpoint** checkpoint_ptr) {
40   *checkpoint_ptr = new CheckpointImpl(db);
41   return Status::OK();
42 }
43 
CreateCheckpoint(const std::string &,uint64_t,uint64_t *)44 Status Checkpoint::CreateCheckpoint(const std::string& /*checkpoint_dir*/,
45                                     uint64_t /*log_size_for_flush*/,
46                                     uint64_t* /*sequence_number_ptr*/) {
47   return Status::NotSupported("");
48 }
49 
CleanStagingDirectory(const std::string & full_private_path,Logger * info_log)50 void CheckpointImpl::CleanStagingDirectory(const std::string& full_private_path,
51                                            Logger* info_log) {
52   std::vector<std::string> subchildren;
53   Status s = db_->GetEnv()->FileExists(full_private_path);
54   if (s.IsNotFound()) {
55     return;
56   }
57   ROCKS_LOG_INFO(info_log, "File exists %s -- %s", full_private_path.c_str(),
58                  s.ToString().c_str());
59   s = db_->GetEnv()->GetChildren(full_private_path, &subchildren);
60   if (s.ok()) {
61     for (auto& subchild : subchildren) {
62       std::string subchild_path = full_private_path + "/" + subchild;
63       s = db_->GetEnv()->DeleteFile(subchild_path);
64       ROCKS_LOG_INFO(info_log, "Delete file %s -- %s", subchild_path.c_str(),
65                      s.ToString().c_str());
66     }
67   }
68   // finally delete the private dir
69   s = db_->GetEnv()->DeleteDir(full_private_path);
70   ROCKS_LOG_INFO(info_log, "Delete dir %s -- %s", full_private_path.c_str(),
71                  s.ToString().c_str());
72 }
73 
ExportColumnFamily(ColumnFamilyHandle *,const std::string &,ExportImportFilesMetaData **)74 Status Checkpoint::ExportColumnFamily(
75     ColumnFamilyHandle* /*handle*/, const std::string& /*export_dir*/,
76     ExportImportFilesMetaData** /*metadata*/) {
77   return Status::NotSupported("");
78 }
79 
80 // Builds an openable snapshot of RocksDB
CreateCheckpoint(const std::string & checkpoint_dir,uint64_t log_size_for_flush,uint64_t * sequence_number_ptr)81 Status CheckpointImpl::CreateCheckpoint(const std::string& checkpoint_dir,
82                                         uint64_t log_size_for_flush,
83                                         uint64_t* sequence_number_ptr) {
84   DBOptions db_options = db_->GetDBOptions();
85 
86   Status s = db_->GetEnv()->FileExists(checkpoint_dir);
87   if (s.ok()) {
88     return Status::InvalidArgument("Directory exists");
89   } else if (!s.IsNotFound()) {
90     assert(s.IsIOError());
91     return s;
92   }
93 
94   ROCKS_LOG_INFO(
95       db_options.info_log,
96       "Started the snapshot process -- creating snapshot in directory %s",
97       checkpoint_dir.c_str());
98 
99   size_t final_nonslash_idx = checkpoint_dir.find_last_not_of('/');
100   if (final_nonslash_idx == std::string::npos) {
101     // npos means it's only slashes or empty. Non-empty means it's the root
102     // directory, but it shouldn't be because we verified above the directory
103     // doesn't exist.
104     assert(checkpoint_dir.empty());
105     return Status::InvalidArgument("invalid checkpoint directory name");
106   }
107 
108   std::string full_private_path =
109       checkpoint_dir.substr(0, final_nonslash_idx + 1) + ".tmp";
110   ROCKS_LOG_INFO(db_options.info_log,
111                  "Snapshot process -- using temporary directory %s",
112                  full_private_path.c_str());
113   CleanStagingDirectory(full_private_path, db_options.info_log.get());
114   // create snapshot directory
115   s = db_->GetEnv()->CreateDir(full_private_path);
116   uint64_t sequence_number = 0;
117   if (s.ok()) {
118     // enable file deletions
119     s = db_->DisableFileDeletions();
120     const bool disabled_file_deletions = s.ok();
121 
122     if (s.ok() || s.IsNotSupported()) {
123       s = CreateCustomCheckpoint(
124           [&](const std::string& src_dirname, const std::string& fname,
125               FileType) {
126             ROCKS_LOG_INFO(db_options.info_log, "Hard Linking %s",
127                            fname.c_str());
128             return db_->GetFileSystem()->LinkFile(
129                 src_dirname + "/" + fname, full_private_path + "/" + fname,
130                 IOOptions(), nullptr);
131           } /* link_file_cb */,
132           [&](const std::string& src_dirname, const std::string& fname,
133               uint64_t size_limit_bytes, FileType,
134               const std::string& /* checksum_func_name */,
135               const std::string& /* checksum_val */) {
136             ROCKS_LOG_INFO(db_options.info_log, "Copying %s", fname.c_str());
137             return CopyFile(db_->GetFileSystem(), src_dirname + "/" + fname,
138                             full_private_path + "/" + fname, size_limit_bytes,
139                             db_options.use_fsync);
140           } /* copy_file_cb */,
141           [&](const std::string& fname, const std::string& contents, FileType) {
142             ROCKS_LOG_INFO(db_options.info_log, "Creating %s", fname.c_str());
143             return CreateFile(db_->GetFileSystem(),
144                               full_private_path + "/" + fname, contents,
145                               db_options.use_fsync);
146           } /* create_file_cb */,
147           &sequence_number, log_size_for_flush);
148 
149       // we copied all the files, enable file deletions
150       if (disabled_file_deletions) {
151         Status ss = db_->EnableFileDeletions(false);
152         assert(ss.ok());
153         ss.PermitUncheckedError();
154       }
155     }
156   }
157 
158   if (s.ok()) {
159     // move tmp private backup to real snapshot directory
160     s = db_->GetEnv()->RenameFile(full_private_path, checkpoint_dir);
161   }
162   if (s.ok()) {
163     std::unique_ptr<Directory> checkpoint_directory;
164     s = db_->GetEnv()->NewDirectory(checkpoint_dir, &checkpoint_directory);
165     if (s.ok() && checkpoint_directory != nullptr) {
166       s = checkpoint_directory->Fsync();
167     }
168   }
169 
170   if (s.ok()) {
171     if (sequence_number_ptr != nullptr) {
172       *sequence_number_ptr = sequence_number;
173     }
174     // here we know that we succeeded and installed the new snapshot
175     ROCKS_LOG_INFO(db_options.info_log, "Snapshot DONE. All is good");
176     ROCKS_LOG_INFO(db_options.info_log, "Snapshot sequence number: %" PRIu64,
177                    sequence_number);
178   } else {
179     // clean all the files we might have created
180     ROCKS_LOG_INFO(db_options.info_log, "Snapshot failed -- %s",
181                    s.ToString().c_str());
182     CleanStagingDirectory(full_private_path, db_options.info_log.get());
183   }
184   return s;
185 }
186 
CreateCustomCheckpoint(std::function<Status (const std::string & src_dirname,const std::string & src_fname,FileType type)> link_file_cb,std::function<Status (const std::string & src_dirname,const std::string & src_fname,uint64_t size_limit_bytes,FileType type,const std::string & checksum_func_name,const std::string & checksum_val)> copy_file_cb,std::function<Status (const std::string & fname,const std::string & contents,FileType type)> create_file_cb,uint64_t * sequence_number,uint64_t log_size_for_flush,bool get_live_table_checksum)187 Status CheckpointImpl::CreateCustomCheckpoint(
188     std::function<Status(const std::string& src_dirname,
189                          const std::string& src_fname, FileType type)>
190         link_file_cb,
191     std::function<Status(
192         const std::string& src_dirname, const std::string& src_fname,
193         uint64_t size_limit_bytes, FileType type,
194         const std::string& checksum_func_name, const std::string& checksum_val)>
195         copy_file_cb,
196     std::function<Status(const std::string& fname, const std::string& contents,
197                          FileType type)>
198         create_file_cb,
199     uint64_t* sequence_number, uint64_t log_size_for_flush,
200     bool get_live_table_checksum) {
201   *sequence_number = db_->GetLatestSequenceNumber();
202 
203   LiveFilesStorageInfoOptions opts;
204   opts.include_checksum_info = get_live_table_checksum;
205   opts.wal_size_for_flush = log_size_for_flush;
206 
207   std::vector<LiveFileStorageInfo> infos;
208   {
209     Status s = db_->GetLiveFilesStorageInfo(opts, &infos);
210     if (!s.ok()) {
211       return s;
212     }
213   }
214 
215   // Verify that everything except WAL files are in same directory
216   // (db_paths / cf_paths not supported)
217   std::unordered_set<std::string> dirs;
218   for (auto& info : infos) {
219     if (info.file_type != kWalFile) {
220       dirs.insert(info.directory);
221     }
222   }
223   if (dirs.size() > 1) {
224     return Status::NotSupported(
225         "db_paths / cf_paths not supported for Checkpoint nor BackupEngine");
226   }
227 
228   bool same_fs = true;
229 
230   for (auto& info : infos) {
231     Status s;
232     if (!info.replacement_contents.empty()) {
233       // Currently should only be used for CURRENT file.
234       assert(info.file_type == kCurrentFile);
235 
236       if (info.size != info.replacement_contents.size()) {
237         s = Status::Corruption("Inconsistent size metadata for " +
238                                info.relative_filename);
239       } else {
240         s = create_file_cb(info.relative_filename, info.replacement_contents,
241                            info.file_type);
242       }
243     } else {
244       if (same_fs && !info.trim_to_size) {
245         s = link_file_cb(info.directory, info.relative_filename,
246                          info.file_type);
247         if (s.IsNotSupported()) {
248           same_fs = false;
249           s = Status::OK();
250         }
251         s.MustCheck();
252       }
253       if (!same_fs || info.trim_to_size) {
254         assert(info.file_checksum_func_name.empty() ==
255                !opts.include_checksum_info);
256         // no assertion on file_checksum because empty is used for both "not
257         // set" and "unknown"
258         if (opts.include_checksum_info) {
259           s = copy_file_cb(info.directory, info.relative_filename, info.size,
260                            info.file_type, info.file_checksum_func_name,
261                            info.file_checksum);
262         } else {
263           s = copy_file_cb(info.directory, info.relative_filename, info.size,
264                            info.file_type, kUnknownFileChecksumFuncName,
265                            kUnknownFileChecksum);
266         }
267       }
268     }
269     if (!s.ok()) {
270       return s;
271     }
272   }
273 
274   return Status::OK();
275 }
276 
277 // Exports all live SST files of a specified Column Family onto export_dir,
278 // returning SST files information in metadata.
ExportColumnFamily(ColumnFamilyHandle * handle,const std::string & export_dir,ExportImportFilesMetaData ** metadata)279 Status CheckpointImpl::ExportColumnFamily(
280     ColumnFamilyHandle* handle, const std::string& export_dir,
281     ExportImportFilesMetaData** metadata) {
282   auto cfh = static_cast_with_check<ColumnFamilyHandleImpl>(handle);
283   const auto cf_name = cfh->GetName();
284   const auto db_options = db_->GetDBOptions();
285 
286   assert(metadata != nullptr);
287   assert(*metadata == nullptr);
288   auto s = db_->GetEnv()->FileExists(export_dir);
289   if (s.ok()) {
290     return Status::InvalidArgument("Specified export_dir exists");
291   } else if (!s.IsNotFound()) {
292     assert(s.IsIOError());
293     return s;
294   }
295 
296   const auto final_nonslash_idx = export_dir.find_last_not_of('/');
297   if (final_nonslash_idx == std::string::npos) {
298     return Status::InvalidArgument("Specified export_dir invalid");
299   }
300   ROCKS_LOG_INFO(db_options.info_log,
301                  "[%s] export column family onto export directory %s",
302                  cf_name.c_str(), export_dir.c_str());
303 
304   // Create a temporary export directory.
305   const auto tmp_export_dir =
306       export_dir.substr(0, final_nonslash_idx + 1) + ".tmp";
307   s = db_->GetEnv()->CreateDir(tmp_export_dir);
308 
309   if (s.ok()) {
310     s = db_->Flush(ROCKSDB_NAMESPACE::FlushOptions(), handle);
311   }
312 
313   ColumnFamilyMetaData db_metadata;
314   if (s.ok()) {
315     // Export live sst files with file deletions disabled.
316     s = db_->DisableFileDeletions();
317     if (s.ok()) {
318       db_->GetColumnFamilyMetaData(handle, &db_metadata);
319 
320       s = ExportFilesInMetaData(
321           db_options, db_metadata,
322           [&](const std::string& src_dirname, const std::string& fname) {
323             ROCKS_LOG_INFO(db_options.info_log, "[%s] HardLinking %s",
324                            cf_name.c_str(), fname.c_str());
325             return db_->GetEnv()->LinkFile(src_dirname + fname,
326                                            tmp_export_dir + fname);
327           } /*link_file_cb*/,
328           [&](const std::string& src_dirname, const std::string& fname) {
329             ROCKS_LOG_INFO(db_options.info_log, "[%s] Copying %s",
330                            cf_name.c_str(), fname.c_str());
331             return CopyFile(db_->GetFileSystem(), src_dirname + fname,
332                             tmp_export_dir + fname, 0, db_options.use_fsync);
333           } /*copy_file_cb*/);
334 
335       const auto enable_status = db_->EnableFileDeletions(false /*force*/);
336       if (s.ok()) {
337         s = enable_status;
338       }
339     }
340   }
341 
342   auto moved_to_user_specified_dir = false;
343   if (s.ok()) {
344     // Move temporary export directory to the actual export directory.
345     s = db_->GetEnv()->RenameFile(tmp_export_dir, export_dir);
346   }
347 
348   if (s.ok()) {
349     // Fsync export directory.
350     moved_to_user_specified_dir = true;
351     std::unique_ptr<Directory> dir_ptr;
352     s = db_->GetEnv()->NewDirectory(export_dir, &dir_ptr);
353     if (s.ok()) {
354       assert(dir_ptr != nullptr);
355       s = dir_ptr->Fsync();
356     }
357   }
358 
359   if (s.ok()) {
360     // Export of files succeeded. Fill in the metadata information.
361     auto result_metadata = new ExportImportFilesMetaData();
362     result_metadata->db_comparator_name = handle->GetComparator()->Name();
363     for (const auto& level_metadata : db_metadata.levels) {
364       for (const auto& file_metadata : level_metadata.files) {
365         LiveFileMetaData live_file_metadata;
366         live_file_metadata.size = file_metadata.size;
367         live_file_metadata.name = std::move(file_metadata.name);
368         live_file_metadata.file_number = file_metadata.file_number;
369         live_file_metadata.db_path = export_dir;
370         live_file_metadata.smallest_seqno = file_metadata.smallest_seqno;
371         live_file_metadata.largest_seqno = file_metadata.largest_seqno;
372         live_file_metadata.smallestkey = std::move(file_metadata.smallestkey);
373         live_file_metadata.largestkey = std::move(file_metadata.largestkey);
374         live_file_metadata.oldest_blob_file_number =
375             file_metadata.oldest_blob_file_number;
376         live_file_metadata.level = level_metadata.level;
377         result_metadata->files.push_back(live_file_metadata);
378       }
379       *metadata = result_metadata;
380     }
381     ROCKS_LOG_INFO(db_options.info_log, "[%s] Export succeeded.",
382                    cf_name.c_str());
383   } else {
384     // Failure: Clean up all the files/directories created.
385     ROCKS_LOG_INFO(db_options.info_log, "[%s] Export failed. %s",
386                    cf_name.c_str(), s.ToString().c_str());
387     std::vector<std::string> subchildren;
388     const auto cleanup_dir =
389         moved_to_user_specified_dir ? export_dir : tmp_export_dir;
390     db_->GetEnv()->GetChildren(cleanup_dir, &subchildren);
391     for (const auto& subchild : subchildren) {
392       const auto subchild_path = cleanup_dir + "/" + subchild;
393       const auto status = db_->GetEnv()->DeleteFile(subchild_path);
394       if (!status.ok()) {
395         ROCKS_LOG_WARN(db_options.info_log, "Failed to cleanup file %s: %s",
396                        subchild_path.c_str(), status.ToString().c_str());
397       }
398     }
399     const auto status = db_->GetEnv()->DeleteDir(cleanup_dir);
400     if (!status.ok()) {
401       ROCKS_LOG_WARN(db_options.info_log, "Failed to cleanup dir %s: %s",
402                      cleanup_dir.c_str(), status.ToString().c_str());
403     }
404   }
405   return s;
406 }
407 
ExportFilesInMetaData(const DBOptions & db_options,const ColumnFamilyMetaData & metadata,std::function<Status (const std::string & src_dirname,const std::string & src_fname)> link_file_cb,std::function<Status (const std::string & src_dirname,const std::string & src_fname)> copy_file_cb)408 Status CheckpointImpl::ExportFilesInMetaData(
409     const DBOptions& db_options, const ColumnFamilyMetaData& metadata,
410     std::function<Status(const std::string& src_dirname,
411                          const std::string& src_fname)>
412         link_file_cb,
413     std::function<Status(const std::string& src_dirname,
414                          const std::string& src_fname)>
415         copy_file_cb) {
416   Status s;
417   auto hardlink_file = true;
418 
419   // Copy/hard link files in metadata.
420   size_t num_files = 0;
421   for (const auto& level_metadata : metadata.levels) {
422     for (const auto& file_metadata : level_metadata.files) {
423       uint64_t number;
424       FileType type;
425       const auto ok = ParseFileName(file_metadata.name, &number, &type);
426       if (!ok) {
427         s = Status::Corruption("Could not parse file name");
428         break;
429       }
430 
431       // We should only get sst files here.
432       assert(type == kTableFile);
433       assert(file_metadata.size > 0 && file_metadata.name[0] == '/');
434       const auto src_fname = file_metadata.name;
435       ++num_files;
436 
437       if (hardlink_file) {
438         s = link_file_cb(db_->GetName(), src_fname);
439         if (num_files == 1 && s.IsNotSupported()) {
440           // Fallback to copy if link failed due to cross-device directories.
441           hardlink_file = false;
442           s = Status::OK();
443         }
444       }
445       if (!hardlink_file) {
446         s = copy_file_cb(db_->GetName(), src_fname);
447       }
448       if (!s.ok()) {
449         break;
450       }
451     }
452   }
453   ROCKS_LOG_INFO(db_options.info_log, "Number of table files %" ROCKSDB_PRIszt,
454                  num_files);
455 
456   return s;
457 }
458 }  // namespace ROCKSDB_NAMESPACE
459 
460 #endif  // ROCKSDB_LITE
461