1 //  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
2 //  This source code is licensed under both the GPLv2 (found in the
3 //  COPYING file in the root directory) and Apache 2.0 License
4 //  (found in the LICENSE.Apache file in the root directory).
5 
6 #ifndef ROCKSDB_LITE
7 
8 #include <algorithm>
9 #include <cassert>
10 #include <cctype>
11 #include <iostream>
12 
13 #include "rocksdb/env_encryption.h"
14 #include "util/aligned_buffer.h"
15 #include "util/coding.h"
16 #include "util/random.h"
17 
18 #endif
19 
20 namespace ROCKSDB_NAMESPACE {
21 
22 #ifndef ROCKSDB_LITE
23 
24 class EncryptedSequentialFile : public SequentialFile {
25   private:
26     std::unique_ptr<SequentialFile> file_;
27     std::unique_ptr<BlockAccessCipherStream> stream_;
28     uint64_t offset_;
29     size_t prefixLength_;
30 
31      public:
32   // Default ctor. Given underlying sequential file is supposed to be at
33   // offset == prefixLength.
EncryptedSequentialFile(SequentialFile * f,BlockAccessCipherStream * s,size_t prefixLength)34   EncryptedSequentialFile(SequentialFile* f, BlockAccessCipherStream* s, size_t prefixLength)
35       : file_(f), stream_(s), offset_(prefixLength), prefixLength_(prefixLength) {
36   }
37 
38   // Read up to "n" bytes from the file.  "scratch[0..n-1]" may be
39   // written by this routine.  Sets "*result" to the data that was
40   // read (including if fewer than "n" bytes were successfully read).
41   // May set "*result" to point at data in "scratch[0..n-1]", so
42   // "scratch[0..n-1]" must be live when "*result" is used.
43   // If an error was encountered, returns a non-OK status.
44   //
45   // REQUIRES: External synchronization
Read(size_t n,Slice * result,char * scratch)46   Status Read(size_t n, Slice* result, char* scratch) override {
47     assert(scratch);
48     Status status = file_->Read(n, result, scratch);
49     if (!status.ok()) {
50       return status;
51     }
52     status = stream_->Decrypt(offset_, (char*)result->data(), result->size());
53     offset_ += result->size(); // We've already ready data from disk, so update offset_ even if decryption fails.
54     return status;
55   }
56 
57   // Skip "n" bytes from the file. This is guaranteed to be no
58   // slower that reading the same data, but may be faster.
59   //
60   // If end of file is reached, skipping will stop at the end of the
61   // file, and Skip will return OK.
62   //
63   // REQUIRES: External synchronization
Skip(uint64_t n)64   Status Skip(uint64_t n) override {
65     auto status = file_->Skip(n);
66     if (!status.ok()) {
67       return status;
68     }
69     offset_ += n;
70     return status;
71   }
72 
73   // Indicates the upper layers if the current SequentialFile implementation
74   // uses direct IO.
use_direct_io() const75   bool use_direct_io() const override { return file_->use_direct_io(); }
76 
77   // Use the returned alignment value to allocate
78   // aligned buffer for Direct I/O
GetRequiredBufferAlignment() const79   size_t GetRequiredBufferAlignment() const override {
80     return file_->GetRequiredBufferAlignment();
81   }
82 
83   // Remove any kind of caching of data from the offset to offset+length
84   // of this file. If the length is 0, then it refers to the end of file.
85   // If the system is not caching the file contents, then this is a noop.
InvalidateCache(size_t offset,size_t length)86   Status InvalidateCache(size_t offset, size_t length) override {
87     return file_->InvalidateCache(offset + prefixLength_, length);
88   }
89 
90   // Positioned Read for direct I/O
91   // If Direct I/O enabled, offset, n, and scratch should be properly aligned
PositionedRead(uint64_t offset,size_t n,Slice * result,char * scratch)92   Status PositionedRead(uint64_t offset, size_t n, Slice* result,
93                         char* scratch) override {
94     assert(scratch);
95     offset += prefixLength_; // Skip prefix
96     auto status = file_->PositionedRead(offset, n, result, scratch);
97     if (!status.ok()) {
98       return status;
99     }
100     offset_ = offset + result->size();
101     status = stream_->Decrypt(offset, (char*)result->data(), result->size());
102     return status;
103   }
104 };
105 
106 // A file abstraction for randomly reading the contents of a file.
107 class EncryptedRandomAccessFile : public RandomAccessFile {
108   private:
109     std::unique_ptr<RandomAccessFile> file_;
110     std::unique_ptr<BlockAccessCipherStream> stream_;
111     size_t prefixLength_;
112 
113  public:
EncryptedRandomAccessFile(RandomAccessFile * f,BlockAccessCipherStream * s,size_t prefixLength)114   EncryptedRandomAccessFile(RandomAccessFile* f, BlockAccessCipherStream* s, size_t prefixLength)
115     : file_(f), stream_(s), prefixLength_(prefixLength) { }
116 
117   // Read up to "n" bytes from the file starting at "offset".
118   // "scratch[0..n-1]" may be written by this routine.  Sets "*result"
119   // to the data that was read (including if fewer than "n" bytes were
120   // successfully read).  May set "*result" to point at data in
121   // "scratch[0..n-1]", so "scratch[0..n-1]" must be live when
122   // "*result" is used.  If an error was encountered, returns a non-OK
123   // status.
124   //
125   // Safe for concurrent use by multiple threads.
126   // If Direct I/O enabled, offset, n, and scratch should be aligned properly.
Read(uint64_t offset,size_t n,Slice * result,char * scratch) const127   Status Read(uint64_t offset, size_t n, Slice* result,
128               char* scratch) const override {
129     assert(scratch);
130     offset += prefixLength_;
131     auto status = file_->Read(offset, n, result, scratch);
132     if (!status.ok()) {
133       return status;
134     }
135     status = stream_->Decrypt(offset, (char*)result->data(), result->size());
136     return status;
137   }
138 
139   // Readahead the file starting from offset by n bytes for caching.
Prefetch(uint64_t offset,size_t n)140   Status Prefetch(uint64_t offset, size_t n) override {
141     //return Status::OK();
142     return file_->Prefetch(offset + prefixLength_, n);
143   }
144 
145   // Tries to get an unique ID for this file that will be the same each time
146   // the file is opened (and will stay the same while the file is open).
147   // Furthermore, it tries to make this ID at most "max_size" bytes. If such an
148   // ID can be created this function returns the length of the ID and places it
149   // in "id"; otherwise, this function returns 0, in which case "id"
150   // may not have been modified.
151   //
152   // This function guarantees, for IDs from a given environment, two unique ids
153   // cannot be made equal to each other by adding arbitrary bytes to one of
154   // them. That is, no unique ID is the prefix of another.
155   //
156   // This function guarantees that the returned ID will not be interpretable as
157   // a single varint.
158   //
159   // Note: these IDs are only valid for the duration of the process.
GetUniqueId(char * id,size_t max_size) const160   size_t GetUniqueId(char* id, size_t max_size) const override {
161     return file_->GetUniqueId(id, max_size);
162   };
163 
Hint(AccessPattern pattern)164   void Hint(AccessPattern pattern) override { file_->Hint(pattern); }
165 
166   // Indicates the upper layers if the current RandomAccessFile implementation
167   // uses direct IO.
use_direct_io() const168   bool use_direct_io() const override { return file_->use_direct_io(); }
169 
170   // Use the returned alignment value to allocate
171   // aligned buffer for Direct I/O
GetRequiredBufferAlignment() const172   size_t GetRequiredBufferAlignment() const override {
173     return file_->GetRequiredBufferAlignment();
174   }
175 
176   // Remove any kind of caching of data from the offset to offset+length
177   // of this file. If the length is 0, then it refers to the end of file.
178   // If the system is not caching the file contents, then this is a noop.
InvalidateCache(size_t offset,size_t length)179   Status InvalidateCache(size_t offset, size_t length) override {
180     return file_->InvalidateCache(offset + prefixLength_, length);
181   }
182 };
183 
184 // A file abstraction for sequential writing.  The implementation
185 // must provide buffering since callers may append small fragments
186 // at a time to the file.
187 class EncryptedWritableFile : public WritableFileWrapper {
188   private:
189     std::unique_ptr<WritableFile> file_;
190     std::unique_ptr<BlockAccessCipherStream> stream_;
191     size_t prefixLength_;
192 
193  public:
194   // Default ctor. Prefix is assumed to be written already.
EncryptedWritableFile(WritableFile * f,BlockAccessCipherStream * s,size_t prefixLength)195   EncryptedWritableFile(WritableFile* f, BlockAccessCipherStream* s, size_t prefixLength)
196     : WritableFileWrapper(f), file_(f), stream_(s), prefixLength_(prefixLength) { }
197 
Append(const Slice & data)198   Status Append(const Slice& data) override {
199     AlignedBuffer buf;
200     Status status;
201     Slice dataToAppend(data);
202     if (data.size() > 0) {
203       auto offset = file_->GetFileSize(); // size including prefix
204       // Encrypt in cloned buffer
205       buf.Alignment(GetRequiredBufferAlignment());
206       buf.AllocateNewBuffer(data.size());
207       // TODO (sagar0): Modify AlignedBuffer.Append to allow doing a memmove
208       // so that the next two lines can be replaced with buf.Append().
209       memmove(buf.BufferStart(), data.data(), data.size());
210       buf.Size(data.size());
211       status = stream_->Encrypt(offset, buf.BufferStart(), buf.CurrentSize());
212       if (!status.ok()) {
213         return status;
214       }
215       dataToAppend = Slice(buf.BufferStart(), buf.CurrentSize());
216     }
217     status = file_->Append(dataToAppend);
218     if (!status.ok()) {
219       return status;
220     }
221     return status;
222   }
223 
PositionedAppend(const Slice & data,uint64_t offset)224   Status PositionedAppend(const Slice& data, uint64_t offset) override {
225     AlignedBuffer buf;
226     Status status;
227     Slice dataToAppend(data);
228     offset += prefixLength_;
229     if (data.size() > 0) {
230       // Encrypt in cloned buffer
231       buf.Alignment(GetRequiredBufferAlignment());
232       buf.AllocateNewBuffer(data.size());
233       memmove(buf.BufferStart(), data.data(), data.size());
234       buf.Size(data.size());
235       status = stream_->Encrypt(offset, buf.BufferStart(), buf.CurrentSize());
236       if (!status.ok()) {
237         return status;
238       }
239       dataToAppend = Slice(buf.BufferStart(), buf.CurrentSize());
240     }
241     status = file_->PositionedAppend(dataToAppend, offset);
242     if (!status.ok()) {
243       return status;
244     }
245     return status;
246   }
247 
248   // Indicates the upper layers if the current WritableFile implementation
249   // uses direct IO.
use_direct_io() const250   bool use_direct_io() const override { return file_->use_direct_io(); }
251 
252   // Use the returned alignment value to allocate
253   // aligned buffer for Direct I/O
GetRequiredBufferAlignment() const254   size_t GetRequiredBufferAlignment() const override {
255     return file_->GetRequiredBufferAlignment();
256   }
257 
258     /*
259    * Get the size of valid data in the file.
260    */
GetFileSize()261   uint64_t GetFileSize() override {
262     return file_->GetFileSize() - prefixLength_;
263   }
264 
265   // Truncate is necessary to trim the file to the correct size
266   // before closing. It is not always possible to keep track of the file
267   // size due to whole pages writes. The behavior is undefined if called
268   // with other writes to follow.
Truncate(uint64_t size)269   Status Truncate(uint64_t size) override {
270     return file_->Truncate(size + prefixLength_);
271   }
272 
273     // Remove any kind of caching of data from the offset to offset+length
274   // of this file. If the length is 0, then it refers to the end of file.
275   // If the system is not caching the file contents, then this is a noop.
276   // This call has no effect on dirty pages in the cache.
InvalidateCache(size_t offset,size_t length)277   Status InvalidateCache(size_t offset, size_t length) override {
278     return file_->InvalidateCache(offset + prefixLength_, length);
279   }
280 
281   // Sync a file range with disk.
282   // offset is the starting byte of the file range to be synchronized.
283   // nbytes specifies the length of the range to be synchronized.
284   // This asks the OS to initiate flushing the cached data to disk,
285   // without waiting for completion.
286   // Default implementation does nothing.
RangeSync(uint64_t offset,uint64_t nbytes)287   Status RangeSync(uint64_t offset, uint64_t nbytes) override {
288     return file_->RangeSync(offset + prefixLength_, nbytes);
289   }
290 
291   // PrepareWrite performs any necessary preparation for a write
292   // before the write actually occurs.  This allows for pre-allocation
293   // of space on devices where it can result in less file
294   // fragmentation and/or less waste from over-zealous filesystem
295   // pre-allocation.
PrepareWrite(size_t offset,size_t len)296   void PrepareWrite(size_t offset, size_t len) override {
297     file_->PrepareWrite(offset + prefixLength_, len);
298   }
299 
300   // Pre-allocates space for a file.
Allocate(uint64_t offset,uint64_t len)301   Status Allocate(uint64_t offset, uint64_t len) override {
302     return file_->Allocate(offset + prefixLength_, len);
303   }
304 };
305 
306 // A file abstraction for random reading and writing.
307 class EncryptedRandomRWFile : public RandomRWFile {
308   private:
309     std::unique_ptr<RandomRWFile> file_;
310     std::unique_ptr<BlockAccessCipherStream> stream_;
311     size_t prefixLength_;
312 
313  public:
EncryptedRandomRWFile(RandomRWFile * f,BlockAccessCipherStream * s,size_t prefixLength)314   EncryptedRandomRWFile(RandomRWFile* f, BlockAccessCipherStream* s, size_t prefixLength)
315     : file_(f), stream_(s), prefixLength_(prefixLength) {}
316 
317   // Indicates if the class makes use of direct I/O
318   // If false you must pass aligned buffer to Write()
use_direct_io() const319   bool use_direct_io() const override { return file_->use_direct_io(); }
320 
321   // Use the returned alignment value to allocate
322   // aligned buffer for Direct I/O
GetRequiredBufferAlignment() const323   size_t GetRequiredBufferAlignment() const override {
324     return file_->GetRequiredBufferAlignment();
325   }
326 
327   // Write bytes in `data` at  offset `offset`, Returns Status::OK() on success.
328   // Pass aligned buffer when use_direct_io() returns true.
Write(uint64_t offset,const Slice & data)329   Status Write(uint64_t offset, const Slice& data) override {
330     AlignedBuffer buf;
331     Status status;
332     Slice dataToWrite(data);
333     offset += prefixLength_;
334     if (data.size() > 0) {
335       // Encrypt in cloned buffer
336       buf.Alignment(GetRequiredBufferAlignment());
337       buf.AllocateNewBuffer(data.size());
338       memmove(buf.BufferStart(), data.data(), data.size());
339       buf.Size(data.size());
340       status = stream_->Encrypt(offset, buf.BufferStart(), buf.CurrentSize());
341       if (!status.ok()) {
342         return status;
343       }
344       dataToWrite = Slice(buf.BufferStart(), buf.CurrentSize());
345     }
346     status = file_->Write(offset, dataToWrite);
347     return status;
348   }
349 
350   // Read up to `n` bytes starting from offset `offset` and store them in
351   // result, provided `scratch` size should be at least `n`.
352   // Returns Status::OK() on success.
Read(uint64_t offset,size_t n,Slice * result,char * scratch) const353   Status Read(uint64_t offset, size_t n, Slice* result,
354               char* scratch) const override {
355     assert(scratch);
356     offset += prefixLength_;
357     auto status = file_->Read(offset, n, result, scratch);
358     if (!status.ok()) {
359       return status;
360     }
361     status = stream_->Decrypt(offset, (char*)result->data(), result->size());
362     return status;
363   }
364 
Flush()365   Status Flush() override { return file_->Flush(); }
366 
Sync()367   Status Sync() override { return file_->Sync(); }
368 
Fsync()369   Status Fsync() override { return file_->Fsync(); }
370 
Close()371   Status Close() override { return file_->Close(); }
372 };
373 
374 // EncryptedEnv implements an Env wrapper that adds encryption to files stored on disk.
375 class EncryptedEnv : public EnvWrapper {
376  public:
EncryptedEnv(Env * base_env,EncryptionProvider * provider)377   EncryptedEnv(Env* base_env, EncryptionProvider *provider)
378       : EnvWrapper(base_env) {
379     provider_ = provider;
380   }
381 
382   // NewSequentialFile opens a file for sequential reading.
NewSequentialFile(const std::string & fname,std::unique_ptr<SequentialFile> * result,const EnvOptions & options)383   Status NewSequentialFile(const std::string& fname,
384                            std::unique_ptr<SequentialFile>* result,
385                            const EnvOptions& options) override {
386     result->reset();
387     if (options.use_mmap_reads) {
388       return Status::InvalidArgument();
389     }
390     // Open file using underlying Env implementation
391     std::unique_ptr<SequentialFile> underlying;
392     auto status = EnvWrapper::NewSequentialFile(fname, &underlying, options);
393     if (!status.ok()) {
394       return status;
395     }
396     // Read prefix (if needed)
397     AlignedBuffer prefixBuf;
398     Slice prefixSlice;
399     size_t prefixLength = provider_->GetPrefixLength();
400     if (prefixLength > 0) {
401       // Read prefix
402       prefixBuf.Alignment(underlying->GetRequiredBufferAlignment());
403       prefixBuf.AllocateNewBuffer(prefixLength);
404       status = underlying->Read(prefixLength, &prefixSlice, prefixBuf.BufferStart());
405       if (!status.ok()) {
406         return status;
407       }
408       prefixBuf.Size(prefixLength);
409     }
410     // Create cipher stream
411     std::unique_ptr<BlockAccessCipherStream> stream;
412     status = provider_->CreateCipherStream(fname, options, prefixSlice, &stream);
413     if (!status.ok()) {
414       return status;
415     }
416     (*result) = std::unique_ptr<SequentialFile>(new EncryptedSequentialFile(underlying.release(), stream.release(), prefixLength));
417     return Status::OK();
418   }
419 
420   // NewRandomAccessFile opens a file for random read access.
NewRandomAccessFile(const std::string & fname,std::unique_ptr<RandomAccessFile> * result,const EnvOptions & options)421   Status NewRandomAccessFile(const std::string& fname,
422                              std::unique_ptr<RandomAccessFile>* result,
423                              const EnvOptions& options) override {
424     result->reset();
425     if (options.use_mmap_reads) {
426       return Status::InvalidArgument();
427     }
428     // Open file using underlying Env implementation
429     std::unique_ptr<RandomAccessFile> underlying;
430     auto status = EnvWrapper::NewRandomAccessFile(fname, &underlying, options);
431     if (!status.ok()) {
432       return status;
433     }
434     // Read prefix (if needed)
435     AlignedBuffer prefixBuf;
436     Slice prefixSlice;
437     size_t prefixLength = provider_->GetPrefixLength();
438     if (prefixLength > 0) {
439       // Read prefix
440       prefixBuf.Alignment(underlying->GetRequiredBufferAlignment());
441       prefixBuf.AllocateNewBuffer(prefixLength);
442       status = underlying->Read(0, prefixLength, &prefixSlice, prefixBuf.BufferStart());
443       if (!status.ok()) {
444         return status;
445       }
446       prefixBuf.Size(prefixLength);
447     }
448     // Create cipher stream
449     std::unique_ptr<BlockAccessCipherStream> stream;
450     status = provider_->CreateCipherStream(fname, options, prefixSlice, &stream);
451     if (!status.ok()) {
452       return status;
453     }
454     (*result) = std::unique_ptr<RandomAccessFile>(new EncryptedRandomAccessFile(underlying.release(), stream.release(), prefixLength));
455     return Status::OK();
456   }
457 
458   // NewWritableFile opens a file for sequential writing.
NewWritableFile(const std::string & fname,std::unique_ptr<WritableFile> * result,const EnvOptions & options)459   Status NewWritableFile(const std::string& fname,
460                          std::unique_ptr<WritableFile>* result,
461                          const EnvOptions& options) override {
462     result->reset();
463     if (options.use_mmap_writes) {
464       return Status::InvalidArgument();
465     }
466     // Open file using underlying Env implementation
467     std::unique_ptr<WritableFile> underlying;
468     Status status = EnvWrapper::NewWritableFile(fname, &underlying, options);
469     if (!status.ok()) {
470       return status;
471     }
472     // Initialize & write prefix (if needed)
473     AlignedBuffer prefixBuf;
474     Slice prefixSlice;
475     size_t prefixLength = provider_->GetPrefixLength();
476     if (prefixLength > 0) {
477       // Initialize prefix
478       prefixBuf.Alignment(underlying->GetRequiredBufferAlignment());
479       prefixBuf.AllocateNewBuffer(prefixLength);
480       provider_->CreateNewPrefix(fname, prefixBuf.BufferStart(), prefixLength);
481       prefixBuf.Size(prefixLength);
482       prefixSlice = Slice(prefixBuf.BufferStart(), prefixBuf.CurrentSize());
483       // Write prefix
484       status = underlying->Append(prefixSlice);
485       if (!status.ok()) {
486         return status;
487       }
488     }
489     // Create cipher stream
490     std::unique_ptr<BlockAccessCipherStream> stream;
491     status = provider_->CreateCipherStream(fname, options, prefixSlice, &stream);
492     if (!status.ok()) {
493       return status;
494     }
495     (*result) = std::unique_ptr<WritableFile>(new EncryptedWritableFile(underlying.release(), stream.release(), prefixLength));
496     return Status::OK();
497   }
498 
499   // Create an object that writes to a new file with the specified
500   // name.  Deletes any existing file with the same name and creates a
501   // new file.  On success, stores a pointer to the new file in
502   // *result and returns OK.  On failure stores nullptr in *result and
503   // returns non-OK.
504   //
505   // The returned file will only be accessed by one thread at a time.
ReopenWritableFile(const std::string & fname,std::unique_ptr<WritableFile> * result,const EnvOptions & options)506   Status ReopenWritableFile(const std::string& fname,
507                             std::unique_ptr<WritableFile>* result,
508                             const EnvOptions& options) override {
509     result->reset();
510     if (options.use_mmap_writes) {
511       return Status::InvalidArgument();
512     }
513     // Open file using underlying Env implementation
514     std::unique_ptr<WritableFile> underlying;
515     Status status = EnvWrapper::ReopenWritableFile(fname, &underlying, options);
516     if (!status.ok()) {
517       return status;
518     }
519     // Initialize & write prefix (if needed)
520     AlignedBuffer prefixBuf;
521     Slice prefixSlice;
522     size_t prefixLength = provider_->GetPrefixLength();
523     if (prefixLength > 0) {
524       // Initialize prefix
525       prefixBuf.Alignment(underlying->GetRequiredBufferAlignment());
526       prefixBuf.AllocateNewBuffer(prefixLength);
527       provider_->CreateNewPrefix(fname, prefixBuf.BufferStart(), prefixLength);
528       prefixBuf.Size(prefixLength);
529       prefixSlice = Slice(prefixBuf.BufferStart(), prefixBuf.CurrentSize());
530       // Write prefix
531       status = underlying->Append(prefixSlice);
532       if (!status.ok()) {
533         return status;
534       }
535     }
536     // Create cipher stream
537     std::unique_ptr<BlockAccessCipherStream> stream;
538     status = provider_->CreateCipherStream(fname, options, prefixSlice, &stream);
539     if (!status.ok()) {
540       return status;
541     }
542     (*result) = std::unique_ptr<WritableFile>(new EncryptedWritableFile(underlying.release(), stream.release(), prefixLength));
543     return Status::OK();
544   }
545 
546   // Reuse an existing file by renaming it and opening it as writable.
ReuseWritableFile(const std::string & fname,const std::string & old_fname,std::unique_ptr<WritableFile> * result,const EnvOptions & options)547   Status ReuseWritableFile(const std::string& fname,
548                            const std::string& old_fname,
549                            std::unique_ptr<WritableFile>* result,
550                            const EnvOptions& options) override {
551     result->reset();
552     if (options.use_mmap_writes) {
553       return Status::InvalidArgument();
554     }
555     // Open file using underlying Env implementation
556     std::unique_ptr<WritableFile> underlying;
557     Status status = EnvWrapper::ReuseWritableFile(fname, old_fname, &underlying, options);
558     if (!status.ok()) {
559       return status;
560     }
561     // Initialize & write prefix (if needed)
562     AlignedBuffer prefixBuf;
563     Slice prefixSlice;
564     size_t prefixLength = provider_->GetPrefixLength();
565     if (prefixLength > 0) {
566       // Initialize prefix
567       prefixBuf.Alignment(underlying->GetRequiredBufferAlignment());
568       prefixBuf.AllocateNewBuffer(prefixLength);
569       provider_->CreateNewPrefix(fname, prefixBuf.BufferStart(), prefixLength);
570       prefixBuf.Size(prefixLength);
571       prefixSlice = Slice(prefixBuf.BufferStart(), prefixBuf.CurrentSize());
572       // Write prefix
573       status = underlying->Append(prefixSlice);
574       if (!status.ok()) {
575         return status;
576       }
577     }
578     // Create cipher stream
579     std::unique_ptr<BlockAccessCipherStream> stream;
580     status = provider_->CreateCipherStream(fname, options, prefixSlice, &stream);
581     if (!status.ok()) {
582       return status;
583     }
584     (*result) = std::unique_ptr<WritableFile>(new EncryptedWritableFile(underlying.release(), stream.release(), prefixLength));
585     return Status::OK();
586   }
587 
588   // Open `fname` for random read and write, if file doesn't exist the file
589   // will be created.  On success, stores a pointer to the new file in
590   // *result and returns OK.  On failure returns non-OK.
591   //
592   // The returned file will only be accessed by one thread at a time.
NewRandomRWFile(const std::string & fname,std::unique_ptr<RandomRWFile> * result,const EnvOptions & options)593   Status NewRandomRWFile(const std::string& fname,
594                          std::unique_ptr<RandomRWFile>* result,
595                          const EnvOptions& options) override {
596     result->reset();
597     if (options.use_mmap_reads || options.use_mmap_writes) {
598       return Status::InvalidArgument();
599     }
600     // Check file exists
601     bool isNewFile = !FileExists(fname).ok();
602 
603     // Open file using underlying Env implementation
604     std::unique_ptr<RandomRWFile> underlying;
605     Status status = EnvWrapper::NewRandomRWFile(fname, &underlying, options);
606     if (!status.ok()) {
607       return status;
608     }
609     // Read or Initialize & write prefix (if needed)
610     AlignedBuffer prefixBuf;
611     Slice prefixSlice;
612     size_t prefixLength = provider_->GetPrefixLength();
613     if (prefixLength > 0) {
614       prefixBuf.Alignment(underlying->GetRequiredBufferAlignment());
615       prefixBuf.AllocateNewBuffer(prefixLength);
616       if (!isNewFile) {
617         // File already exists, read prefix
618         status = underlying->Read(0, prefixLength, &prefixSlice, prefixBuf.BufferStart());
619         if (!status.ok()) {
620           return status;
621         }
622         prefixBuf.Size(prefixLength);
623       } else {
624         // File is new, initialize & write prefix
625         provider_->CreateNewPrefix(fname, prefixBuf.BufferStart(), prefixLength);
626         prefixBuf.Size(prefixLength);
627         prefixSlice = Slice(prefixBuf.BufferStart(), prefixBuf.CurrentSize());
628         // Write prefix
629         status = underlying->Write(0, prefixSlice);
630         if (!status.ok()) {
631           return status;
632         }
633       }
634     }
635     // Create cipher stream
636     std::unique_ptr<BlockAccessCipherStream> stream;
637     status = provider_->CreateCipherStream(fname, options, prefixSlice, &stream);
638     if (!status.ok()) {
639       return status;
640     }
641     (*result) = std::unique_ptr<RandomRWFile>(new EncryptedRandomRWFile(underlying.release(), stream.release(), prefixLength));
642     return Status::OK();
643   }
644 
645   // Store in *result the attributes of the children of the specified directory.
646   // In case the implementation lists the directory prior to iterating the files
647   // and files are concurrently deleted, the deleted files will be omitted from
648   // result.
649   // The name attributes are relative to "dir".
650   // Original contents of *results are dropped.
651   // Returns OK if "dir" exists and "*result" contains its children.
652   //         NotFound if "dir" does not exist, the calling process does not have
653   //                  permission to access "dir", or if "dir" is invalid.
654   //         IOError if an IO Error was encountered
GetChildrenFileAttributes(const std::string & dir,std::vector<FileAttributes> * result)655   Status GetChildrenFileAttributes(
656       const std::string& dir, std::vector<FileAttributes>* result) override {
657     auto status = EnvWrapper::GetChildrenFileAttributes(dir, result);
658     if (!status.ok()) {
659       return status;
660     }
661     size_t prefixLength = provider_->GetPrefixLength();
662     for (auto it = std::begin(*result); it!=std::end(*result); ++it) {
663       assert(it->size_bytes >= prefixLength);
664       it->size_bytes -= prefixLength;
665     }
666     return Status::OK();
667   }
668 
669   // Store the size of fname in *file_size.
GetFileSize(const std::string & fname,uint64_t * file_size)670   Status GetFileSize(const std::string& fname, uint64_t* file_size) override {
671     auto status = EnvWrapper::GetFileSize(fname, file_size);
672     if (!status.ok()) {
673       return status;
674     }
675     size_t prefixLength = provider_->GetPrefixLength();
676     assert(*file_size >= prefixLength);
677     *file_size -= prefixLength;
678     return Status::OK();
679   }
680 
681  private:
682   EncryptionProvider *provider_;
683 };
684 
685 // Returns an Env that encrypts data when stored on disk and decrypts data when
686 // read from disk.
NewEncryptedEnv(Env * base_env,EncryptionProvider * provider)687 Env* NewEncryptedEnv(Env* base_env, EncryptionProvider* provider) {
688   return new EncryptedEnv(base_env, provider);
689 }
690 
691 // Encrypt one or more (partial) blocks of data at the file offset.
692 // Length of data is given in dataSize.
Encrypt(uint64_t fileOffset,char * data,size_t dataSize)693 Status BlockAccessCipherStream::Encrypt(uint64_t fileOffset, char *data, size_t dataSize) {
694   // Calculate block index
695   auto blockSize = BlockSize();
696   uint64_t blockIndex = fileOffset / blockSize;
697   size_t blockOffset = fileOffset % blockSize;
698   std::unique_ptr<char[]> blockBuffer;
699 
700   std::string scratch;
701   AllocateScratch(scratch);
702 
703   // Encrypt individual blocks.
704   while (1) {
705     char *block = data;
706     size_t n = std::min(dataSize, blockSize - blockOffset);
707     if (n != blockSize) {
708       // We're not encrypting a full block.
709       // Copy data to blockBuffer
710       if (!blockBuffer.get()) {
711         // Allocate buffer
712         blockBuffer = std::unique_ptr<char[]>(new char[blockSize]);
713       }
714       block = blockBuffer.get();
715       // Copy plain data to block buffer
716       memmove(block + blockOffset, data, n);
717     }
718     auto status = EncryptBlock(blockIndex, block, (char*)scratch.data());
719     if (!status.ok()) {
720       return status;
721     }
722     if (block != data) {
723       // Copy encrypted data back to `data`.
724       memmove(data, block + blockOffset, n);
725     }
726     dataSize -= n;
727     if (dataSize == 0) {
728       return Status::OK();
729     }
730     data += n;
731     blockOffset = 0;
732     blockIndex++;
733   }
734 }
735 
736 // Decrypt one or more (partial) blocks of data at the file offset.
737 // Length of data is given in dataSize.
Decrypt(uint64_t fileOffset,char * data,size_t dataSize)738 Status BlockAccessCipherStream::Decrypt(uint64_t fileOffset, char *data, size_t dataSize) {
739   // Calculate block index
740   auto blockSize = BlockSize();
741   uint64_t blockIndex = fileOffset / blockSize;
742   size_t blockOffset = fileOffset % blockSize;
743   std::unique_ptr<char[]> blockBuffer;
744 
745   std::string scratch;
746   AllocateScratch(scratch);
747 
748   // Decrypt individual blocks.
749   while (1) {
750     char *block = data;
751     size_t n = std::min(dataSize, blockSize - blockOffset);
752     if (n != blockSize) {
753       // We're not decrypting a full block.
754       // Copy data to blockBuffer
755       if (!blockBuffer.get()) {
756         // Allocate buffer
757         blockBuffer = std::unique_ptr<char[]>(new char[blockSize]);
758       }
759       block = blockBuffer.get();
760       // Copy encrypted data to block buffer
761       memmove(block + blockOffset, data, n);
762     }
763     auto status = DecryptBlock(blockIndex, block, (char*)scratch.data());
764     if (!status.ok()) {
765       return status;
766     }
767     if (block != data) {
768       // Copy decrypted data back to `data`.
769       memmove(data, block + blockOffset, n);
770     }
771 
772     // Simply decrementing dataSize by n could cause it to underflow,
773     // which will very likely make it read over the original bounds later
774     assert(dataSize >= n);
775     if (dataSize < n) {
776       return Status::Corruption("Cannot decrypt data at given offset");
777     }
778 
779     dataSize -= n;
780     if (dataSize == 0) {
781       return Status::OK();
782     }
783     data += n;
784     blockOffset = 0;
785     blockIndex++;
786   }
787 }
788 
789 // Encrypt a block of data.
790 // Length of data is equal to BlockSize().
Encrypt(char * data)791 Status ROT13BlockCipher::Encrypt(char *data) {
792   for (size_t i = 0; i < blockSize_; ++i) {
793       data[i] += 13;
794   }
795   return Status::OK();
796 }
797 
798 // Decrypt a block of data.
799 // Length of data is equal to BlockSize().
Decrypt(char * data)800 Status ROT13BlockCipher::Decrypt(char *data) {
801   return Encrypt(data);
802 }
803 
804 // Allocate scratch space which is passed to EncryptBlock/DecryptBlock.
AllocateScratch(std::string & scratch)805 void CTRCipherStream::AllocateScratch(std::string& scratch) {
806   auto blockSize = cipher_.BlockSize();
807   scratch.reserve(blockSize);
808 }
809 
810 // Encrypt a block of data at the given block index.
811 // Length of data is equal to BlockSize();
EncryptBlock(uint64_t blockIndex,char * data,char * scratch)812 Status CTRCipherStream::EncryptBlock(uint64_t blockIndex, char *data, char* scratch) {
813 
814   // Create nonce + counter
815   auto blockSize = cipher_.BlockSize();
816   memmove(scratch, iv_.data(), blockSize);
817   EncodeFixed64(scratch, blockIndex + initialCounter_);
818 
819   // Encrypt nonce+counter
820   auto status = cipher_.Encrypt(scratch);
821   if (!status.ok()) {
822     return status;
823   }
824 
825   // XOR data with ciphertext.
826   for (size_t i = 0; i < blockSize; i++) {
827     data[i] = data[i] ^ scratch[i];
828   }
829   return Status::OK();
830 }
831 
832 // Decrypt a block of data at the given block index.
833 // Length of data is equal to BlockSize();
DecryptBlock(uint64_t blockIndex,char * data,char * scratch)834 Status CTRCipherStream::DecryptBlock(uint64_t blockIndex, char *data, char* scratch) {
835   // For CTR decryption & encryption are the same
836   return EncryptBlock(blockIndex, data, scratch);
837 }
838 
839 // GetPrefixLength returns the length of the prefix that is added to every file
840 // and used for storing encryption options.
841 // For optimal performance, the prefix length should be a multiple of
842 // the page size.
GetPrefixLength()843 size_t CTREncryptionProvider::GetPrefixLength() {
844   return defaultPrefixLength;
845 }
846 
847 // decodeCTRParameters decodes the initial counter & IV from the given
848 // (plain text) prefix.
decodeCTRParameters(const char * prefix,size_t blockSize,uint64_t & initialCounter,Slice & iv)849 static void decodeCTRParameters(const char *prefix, size_t blockSize, uint64_t &initialCounter, Slice &iv) {
850   // First block contains 64-bit initial counter
851   initialCounter = DecodeFixed64(prefix);
852   // Second block contains IV
853   iv = Slice(prefix + blockSize, blockSize);
854 }
855 
856 // CreateNewPrefix initialized an allocated block of prefix memory
857 // for a new file.
CreateNewPrefix(const std::string &,char * prefix,size_t prefixLength)858 Status CTREncryptionProvider::CreateNewPrefix(const std::string& /*fname*/,
859                                               char* prefix,
860                                               size_t prefixLength) {
861   // Create & seed rnd.
862   Random rnd((uint32_t)Env::Default()->NowMicros());
863   // Fill entire prefix block with random values.
864   for (size_t i = 0; i < prefixLength; i++) {
865     prefix[i] = rnd.Uniform(256) & 0xFF;
866   }
867   // Take random data to extract initial counter & IV
868   auto blockSize = cipher_.BlockSize();
869   uint64_t initialCounter;
870   Slice prefixIV;
871   decodeCTRParameters(prefix, blockSize, initialCounter, prefixIV);
872 
873   // Now populate the rest of the prefix, starting from the third block.
874   PopulateSecretPrefixPart(prefix + (2 * blockSize), prefixLength - (2 * blockSize), blockSize);
875 
876   // Encrypt the prefix, starting from block 2 (leave block 0, 1 with initial counter & IV unencrypted)
877   CTRCipherStream cipherStream(cipher_, prefixIV.data(), initialCounter);
878   auto status = cipherStream.Encrypt(0, prefix + (2 * blockSize), prefixLength - (2 * blockSize));
879   if (!status.ok()) {
880     return status;
881   }
882   return Status::OK();
883 }
884 
885 // PopulateSecretPrefixPart initializes the data into a new prefix block
886 // in plain text.
887 // Returns the amount of space (starting from the start of the prefix)
888 // that has been initialized.
PopulateSecretPrefixPart(char *,size_t,size_t)889 size_t CTREncryptionProvider::PopulateSecretPrefixPart(char* /*prefix*/,
890                                                        size_t /*prefixLength*/,
891                                                        size_t /*blockSize*/) {
892   // Nothing to do here, put in custom data in override when needed.
893   return 0;
894 }
895 
CreateCipherStream(const std::string & fname,const EnvOptions & options,Slice & prefix,std::unique_ptr<BlockAccessCipherStream> * result)896 Status CTREncryptionProvider::CreateCipherStream(
897     const std::string& fname, const EnvOptions& options, Slice& prefix,
898     std::unique_ptr<BlockAccessCipherStream>* result) {
899   // Read plain text part of prefix.
900   auto blockSize = cipher_.BlockSize();
901   uint64_t initialCounter;
902   Slice iv;
903   decodeCTRParameters(prefix.data(), blockSize, initialCounter, iv);
904 
905   // If the prefix is smaller than twice the block size, we would below read a
906   // very large chunk of the file (and very likely read over the bounds)
907   assert(prefix.size() >= 2 * blockSize);
908   if (prefix.size() < 2 * blockSize) {
909     return Status::Corruption("Unable to read from file " + fname +
910                               ": read attempt would read beyond file bounds");
911   }
912 
913   // Decrypt the encrypted part of the prefix, starting from block 2 (block 0, 1 with initial counter & IV are unencrypted)
914   CTRCipherStream cipherStream(cipher_, iv.data(), initialCounter);
915   auto status = cipherStream.Decrypt(0, (char*)prefix.data() + (2 * blockSize), prefix.size() - (2 * blockSize));
916   if (!status.ok()) {
917     return status;
918   }
919 
920   // Create cipher stream
921   return CreateCipherStreamFromPrefix(fname, options, initialCounter, iv, prefix, result);
922 }
923 
924 // CreateCipherStreamFromPrefix creates a block access cipher stream for a file given
925 // given name and options. The given prefix is already decrypted.
CreateCipherStreamFromPrefix(const std::string &,const EnvOptions &,uint64_t initialCounter,const Slice & iv,const Slice &,std::unique_ptr<BlockAccessCipherStream> * result)926 Status CTREncryptionProvider::CreateCipherStreamFromPrefix(
927     const std::string& /*fname*/, const EnvOptions& /*options*/,
928     uint64_t initialCounter, const Slice& iv, const Slice& /*prefix*/,
929     std::unique_ptr<BlockAccessCipherStream>* result) {
930   (*result) = std::unique_ptr<BlockAccessCipherStream>(
931       new CTRCipherStream(cipher_, iv.data(), initialCounter));
932   return Status::OK();
933 }
934 
935 #endif // ROCKSDB_LITE
936 
937 }  // namespace ROCKSDB_NAMESPACE
938