1 //  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
2 //  This source code is licensed under both the GPLv2 (found in the
3 //  COPYING file in the root directory) and Apache 2.0 License
4 //  (found in the LICENSE.Apache file in the root directory).
5 //
6 // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
7 // Use of this source code is governed by a BSD-style license that can be
8 // found in the LICENSE file. See the AUTHORS file for names of contributors.
9 #pragma once
10 
11 #include <stdint.h>
12 #include <mutex>
13 #include <string>
14 
15 #include "rocksdb/status.h"
16 #include "rocksdb/env.h"
17 #include "util/aligned_buffer.h"
18 
19 #include <windows.h>
20 
21 namespace ROCKSDB_NAMESPACE {
22 namespace port {
23 
24 std::string GetWindowsErrSz(DWORD err);
25 
IOErrorFromWindowsError(const std::string & context,DWORD err)26 inline Status IOErrorFromWindowsError(const std::string& context, DWORD err) {
27   return ((err == ERROR_HANDLE_DISK_FULL) || (err == ERROR_DISK_FULL))
28              ? Status::NoSpace(context, GetWindowsErrSz(err))
29              : ((err == ERROR_FILE_NOT_FOUND) || (err == ERROR_PATH_NOT_FOUND))
30                    ? Status::PathNotFound(context, GetWindowsErrSz(err))
31                    : Status::IOError(context, GetWindowsErrSz(err));
32 }
33 
IOErrorFromLastWindowsError(const std::string & context)34 inline Status IOErrorFromLastWindowsError(const std::string& context) {
35   return IOErrorFromWindowsError(context, GetLastError());
36 }
37 
IOError(const std::string & context,int err_number)38 inline Status IOError(const std::string& context, int err_number) {
39   return (err_number == ENOSPC)
40              ? Status::NoSpace(context, strerror(err_number))
41              : (err_number == ENOENT)
42                    ? Status::PathNotFound(context, strerror(err_number))
43                    : Status::IOError(context, strerror(err_number));
44 }
45 
46 class WinFileData;
47 
48 Status pwrite(const WinFileData* file_data, const Slice& data,
49   uint64_t offset, size_t& bytes_written);
50 
51 Status pread(const WinFileData* file_data, char* src, size_t num_bytes,
52   uint64_t offset, size_t& bytes_read);
53 
54 Status fallocate(const std::string& filename, HANDLE hFile, uint64_t to_size);
55 
56 Status ftruncate(const std::string& filename, HANDLE hFile, uint64_t toSize);
57 
58 size_t GetUniqueIdFromFile(HANDLE hFile, char* id, size_t max_size);
59 
60 class WinFileData {
61  protected:
62   const std::string filename_;
63   HANDLE hFile_;
64   // If true, the I/O issued would be direct I/O which the buffer
65   // will need to be aligned (not sure there is a guarantee that the buffer
66   // passed in is aligned).
67   const bool use_direct_io_;
68 
69  public:
70   // We want this class be usable both for inheritance (prive
71   // or protected) and for containment so __ctor and __dtor public
WinFileData(const std::string & filename,HANDLE hFile,bool direct_io)72   WinFileData(const std::string& filename, HANDLE hFile, bool direct_io)
73       : filename_(filename), hFile_(hFile), use_direct_io_(direct_io) {}
74 
~WinFileData()75   virtual ~WinFileData() { this->CloseFile(); }
76 
CloseFile()77   bool CloseFile() {
78     bool result = true;
79 
80     if (hFile_ != NULL && hFile_ != INVALID_HANDLE_VALUE) {
81       result = ::CloseHandle(hFile_);
82       assert(result);
83       hFile_ = NULL;
84     }
85     return result;
86   }
87 
GetName()88   const std::string& GetName() const { return filename_; }
89 
GetFileHandle()90   HANDLE GetFileHandle() const { return hFile_; }
91 
use_direct_io()92   bool use_direct_io() const { return use_direct_io_; }
93 
94   WinFileData(const WinFileData&) = delete;
95   WinFileData& operator=(const WinFileData&) = delete;
96 };
97 
98 class WinSequentialFile : protected WinFileData, public SequentialFile {
99 
100   // Override for behavior change when creating a custom env
101   virtual Status PositionedReadInternal(char* src, size_t numBytes,
102     uint64_t offset, size_t& bytes_read) const;
103 
104 public:
105   WinSequentialFile(const std::string& fname, HANDLE f,
106     const EnvOptions& options);
107 
108   ~WinSequentialFile();
109 
110   WinSequentialFile(const WinSequentialFile&) = delete;
111   WinSequentialFile& operator=(const WinSequentialFile&) = delete;
112 
113   virtual Status Read(size_t n, Slice* result, char* scratch) override;
114   virtual Status PositionedRead(uint64_t offset, size_t n, Slice* result,
115     char* scratch) override;
116 
117   virtual Status Skip(uint64_t n) override;
118 
119   virtual Status InvalidateCache(size_t offset, size_t length) override;
120 
use_direct_io()121   virtual bool use_direct_io() const override { return WinFileData::use_direct_io(); }
122 };
123 
124 // mmap() based random-access
125 class WinMmapReadableFile : private WinFileData, public RandomAccessFile {
126   HANDLE hMap_;
127 
128   const void* mapped_region_;
129   const size_t length_;
130 
131  public:
132   // mapped_region_[0,length-1] contains the mmapped contents of the file.
133   WinMmapReadableFile(const std::string& fileName, HANDLE hFile, HANDLE hMap,
134                       const void* mapped_region, size_t length);
135 
136   ~WinMmapReadableFile();
137 
138   WinMmapReadableFile(const WinMmapReadableFile&) = delete;
139   WinMmapReadableFile& operator=(const WinMmapReadableFile&) = delete;
140 
141   virtual Status Read(uint64_t offset, size_t n, Slice* result,
142                       char* scratch) const override;
143 
144   virtual Status InvalidateCache(size_t offset, size_t length) override;
145 
146   virtual size_t GetUniqueId(char* id, size_t max_size) const override;
147 };
148 
149 // We preallocate and use memcpy to append new
150 // data to the file.  This is safe since we either properly close the
151 // file before reading from it, or for log files, the reading code
152 // knows enough to skip zero suffixes.
153 class WinMmapFile : private WinFileData, public WritableFile {
154  private:
155   HANDLE hMap_;
156 
157   const size_t page_size_;  // We flush the mapping view in page_size
158   // increments. We may decide if this is a memory
159   // page size or SSD page size
160   const size_t
161       allocation_granularity_;  // View must start at such a granularity
162 
163   size_t reserved_size_;  // Preallocated size
164 
165   size_t mapping_size_;  // The max size of the mapping object
166   // we want to guess the final file size to minimize the remapping
167   size_t view_size_;  // How much memory to map into a view at a time
168 
169   char* mapped_begin_;  // Must begin at the file offset that is aligned with
170   // allocation_granularity_
171   char* mapped_end_;
172   char* dst_;  // Where to write next  (in range [mapped_begin_,mapped_end_])
173   char* last_sync_;  // Where have we synced up to
174 
175   uint64_t file_offset_;  // Offset of mapped_begin_ in file
176 
177   // Do we have unsynced writes?
178   bool pending_sync_;
179 
180   // Can only truncate or reserve to a sector size aligned if
181   // used on files that are opened with Unbuffered I/O
182   Status TruncateFile(uint64_t toSize);
183 
184   Status UnmapCurrentRegion();
185 
186   Status MapNewRegion();
187 
188   virtual Status PreallocateInternal(uint64_t spaceToReserve);
189 
190  public:
191   WinMmapFile(const std::string& fname, HANDLE hFile, size_t page_size,
192               size_t allocation_granularity, const EnvOptions& options);
193 
194   ~WinMmapFile();
195 
196   WinMmapFile(const WinMmapFile&) = delete;
197   WinMmapFile& operator=(const WinMmapFile&) = delete;
198 
199   virtual Status Append(const Slice& data) override;
200 
201   // Means Close() will properly take care of truncate
202   // and it does not need any additional information
203   virtual Status Truncate(uint64_t size) override;
204 
205   virtual Status Close() override;
206 
207   virtual Status Flush() override;
208 
209   // Flush only data
210   virtual Status Sync() override;
211 
212   /**
213   * Flush data as well as metadata to stable storage.
214   */
215   virtual Status Fsync() override;
216 
217   /**
218   * Get the size of valid data in the file. This will not match the
219   * size that is returned from the filesystem because we use mmap
220   * to extend file by map_size every time.
221   */
222   virtual uint64_t GetFileSize() override;
223 
224   virtual Status InvalidateCache(size_t offset, size_t length) override;
225 
226   virtual Status Allocate(uint64_t offset, uint64_t len) override;
227 
228   virtual size_t GetUniqueId(char* id, size_t max_size) const override;
229 };
230 
231 class WinRandomAccessImpl {
232  protected:
233   WinFileData* file_base_;
234   size_t       alignment_;
235 
236   // Override for behavior change when creating a custom env
237   virtual Status PositionedReadInternal(char* src, size_t numBytes,
238                                         uint64_t offset, size_t& bytes_read) const;
239 
240   WinRandomAccessImpl(WinFileData* file_base, size_t alignment,
241                       const EnvOptions& options);
242 
~WinRandomAccessImpl()243   virtual ~WinRandomAccessImpl() {}
244 
245   Status ReadImpl(uint64_t offset, size_t n, Slice* result,
246                   char* scratch) const;
247 
GetAlignment()248   size_t GetAlignment() const { return alignment_; }
249 
250  public:
251 
252   WinRandomAccessImpl(const WinRandomAccessImpl&) = delete;
253   WinRandomAccessImpl& operator=(const WinRandomAccessImpl&) = delete;
254 };
255 
256 // pread() based random-access
257 class WinRandomAccessFile
258     : private WinFileData,
259       protected WinRandomAccessImpl,  // Want to be able to override
260                                       // PositionedReadInternal
261       public RandomAccessFile {
262  public:
263   WinRandomAccessFile(const std::string& fname, HANDLE hFile, size_t alignment,
264                       const EnvOptions& options);
265 
266   ~WinRandomAccessFile();
267 
268   virtual Status Read(uint64_t offset, size_t n, Slice* result,
269                       char* scratch) const override;
270 
271   virtual size_t GetUniqueId(char* id, size_t max_size) const override;
272 
use_direct_io()273   virtual bool use_direct_io() const override { return WinFileData::use_direct_io(); }
274 
275   virtual Status InvalidateCache(size_t offset, size_t length) override;
276 
277   virtual size_t GetRequiredBufferAlignment() const override;
278 };
279 
280 // This is a sequential write class. It has been mimicked (as others) after
281 // the original Posix class. We add support for unbuffered I/O on windows as
282 // well
283 // we utilize the original buffer as an alignment buffer to write directly to
284 // file with no buffering.
285 // No buffering requires that the provided buffer is aligned to the physical
286 // sector size (SSD page size) and
287 // that all SetFilePointer() operations to occur with such an alignment.
288 // We thus always write in sector/page size increments to the drive and leave
289 // the tail for the next write OR for Close() at which point we pad with zeros.
290 // No padding is required for
291 // buffered access.
292 class WinWritableImpl {
293  protected:
294   WinFileData* file_data_;
295   const uint64_t alignment_;
296   uint64_t next_write_offset_; // Needed because Windows does not support O_APPEND
297   uint64_t reservedsize_;  // how far we have reserved space
298 
299   virtual Status PreallocateInternal(uint64_t spaceToReserve);
300 
301   WinWritableImpl(WinFileData* file_data, size_t alignment);
302 
~WinWritableImpl()303   ~WinWritableImpl() {}
304 
GetAlignement()305   uint64_t GetAlignement() const { return alignment_; }
306 
307   Status AppendImpl(const Slice& data);
308 
309   // Requires that the data is aligned as specified by
310   // GetRequiredBufferAlignment()
311   Status PositionedAppendImpl(const Slice& data, uint64_t offset);
312 
313   Status TruncateImpl(uint64_t size);
314 
315   Status CloseImpl();
316 
317   Status SyncImpl();
318 
GetFileNextWriteOffset()319   uint64_t GetFileNextWriteOffset() {
320     // Double accounting now here with WritableFileWriter
321     // and this size will be wrong when unbuffered access is used
322     // but tests implement their own writable files and do not use
323     // WritableFileWrapper
324     // so we need to squeeze a square peg through
325     // a round hole here.
326     return next_write_offset_;
327   }
328 
329   Status AllocateImpl(uint64_t offset, uint64_t len);
330 
331  public:
332   WinWritableImpl(const WinWritableImpl&) = delete;
333   WinWritableImpl& operator=(const WinWritableImpl&) = delete;
334 };
335 
336 class WinWritableFile : private WinFileData,
337                         protected WinWritableImpl,
338                         public WritableFile {
339  public:
340   WinWritableFile(const std::string& fname, HANDLE hFile, size_t alignment,
341                   size_t capacity, const EnvOptions& options);
342 
343   ~WinWritableFile();
344 
345   virtual Status Append(const Slice& data) override;
346 
347   // Requires that the data is aligned as specified by
348   // GetRequiredBufferAlignment()
349   virtual Status PositionedAppend(const Slice& data, uint64_t offset) override;
350 
351   // Need to implement this so the file is truncated correctly
352   // when buffered and unbuffered mode
353   virtual Status Truncate(uint64_t size) override;
354 
355   virtual Status Close() override;
356 
357   // write out the cached data to the OS cache
358   // This is now taken care of the WritableFileWriter
359   virtual Status Flush() override;
360 
361   virtual Status Sync() override;
362 
363   virtual Status Fsync() override;
364 
365   virtual bool IsSyncThreadSafe() const override;
366 
367   // Indicates if the class makes use of direct I/O
368   // Use PositionedAppend
369   virtual bool use_direct_io() const override;
370 
371   virtual size_t GetRequiredBufferAlignment() const override;
372 
373   virtual uint64_t GetFileSize() override;
374 
375   virtual Status Allocate(uint64_t offset, uint64_t len) override;
376 
377   virtual size_t GetUniqueId(char* id, size_t max_size) const override;
378 };
379 
380 class WinRandomRWFile : private WinFileData,
381                         protected WinRandomAccessImpl,
382                         protected WinWritableImpl,
383                         public RandomRWFile {
384  public:
385   WinRandomRWFile(const std::string& fname, HANDLE hFile, size_t alignment,
386                   const EnvOptions& options);
387 
~WinRandomRWFile()388   ~WinRandomRWFile() {}
389 
390   // Indicates if the class makes use of direct I/O
391   // If false you must pass aligned buffer to Write()
392   virtual bool use_direct_io() const override;
393 
394   // Use the returned alignment value to allocate aligned
395   // buffer for Write() when use_direct_io() returns true
396   virtual size_t GetRequiredBufferAlignment() const override;
397 
398   // Write bytes in `data` at  offset `offset`, Returns Status::OK() on success.
399   // Pass aligned buffer when use_direct_io() returns true.
400   virtual Status Write(uint64_t offset, const Slice& data) override;
401 
402   // Read up to `n` bytes starting from offset `offset` and store them in
403   // result, provided `scratch` size should be at least `n`.
404   // Returns Status::OK() on success.
405   virtual Status Read(uint64_t offset, size_t n, Slice* result,
406                       char* scratch) const override;
407 
408   virtual Status Flush() override;
409 
410   virtual Status Sync() override;
411 
Fsync()412   virtual Status Fsync() { return Sync(); }
413 
414   virtual Status Close() override;
415 };
416 
417 class WinMemoryMappedBuffer : public MemoryMappedFileBuffer {
418 private:
419   HANDLE  file_handle_;
420   HANDLE  map_handle_;
421 public:
WinMemoryMappedBuffer(HANDLE file_handle,HANDLE map_handle,void * base,size_t size)422   WinMemoryMappedBuffer(HANDLE file_handle, HANDLE map_handle, void* base, size_t size) :
423     MemoryMappedFileBuffer(base, size),
424     file_handle_(file_handle),
425     map_handle_(map_handle) {}
426   ~WinMemoryMappedBuffer() override;
427 };
428 
429 class WinDirectory : public Directory {
430   HANDLE handle_;
431  public:
WinDirectory(HANDLE h)432   explicit WinDirectory(HANDLE h) noexcept : handle_(h) {
433     assert(handle_ != INVALID_HANDLE_VALUE);
434   }
~WinDirectory()435   ~WinDirectory() {
436     ::CloseHandle(handle_);
437   }
438   virtual Status Fsync() override;
439 
440   size_t GetUniqueId(char* id, size_t max_size) const override;
441 };
442 
443 class WinFileLock : public FileLock {
444  public:
WinFileLock(HANDLE hFile)445   explicit WinFileLock(HANDLE hFile) : hFile_(hFile) {
446     assert(hFile != NULL);
447     assert(hFile != INVALID_HANDLE_VALUE);
448   }
449 
450   ~WinFileLock();
451 
452  private:
453   HANDLE hFile_;
454 };
455 }
456 }  // namespace ROCKSDB_NAMESPACE
457