1 //  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
2 //  This source code is licensed under both the GPLv2 (found in the
3 //  COPYING file in the root directory) and Apache 2.0 License
4 //  (found in the LICENSE.Apache file in the root directory).
5 //
6 // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
7 // Use of this source code is governed by a BSD-style license that can be
8 // found in the LICENSE file. See the AUTHORS file for names of contributors.
9 #pragma once
10 
11 #include <stdint.h>
12 #include <mutex>
13 #include <string>
14 
15 #include "rocksdb/status.h"
16 #include "rocksdb/env.h"
17 #include "util/aligned_buffer.h"
18 
19 #include <windows.h>
20 
21 
22 namespace rocksdb {
23 namespace port {
24 
25 std::string GetWindowsErrSz(DWORD err);
26 
IOErrorFromWindowsError(const std::string & context,DWORD err)27 inline Status IOErrorFromWindowsError(const std::string& context, DWORD err) {
28   return ((err == ERROR_HANDLE_DISK_FULL) || (err == ERROR_DISK_FULL))
29              ? Status::NoSpace(context, GetWindowsErrSz(err))
30              : ((err == ERROR_FILE_NOT_FOUND) || (err == ERROR_PATH_NOT_FOUND))
31                    ? Status::PathNotFound(context, GetWindowsErrSz(err))
32                    : Status::IOError(context, GetWindowsErrSz(err));
33 }
34 
IOErrorFromLastWindowsError(const std::string & context)35 inline Status IOErrorFromLastWindowsError(const std::string& context) {
36   return IOErrorFromWindowsError(context, GetLastError());
37 }
38 
IOError(const std::string & context,int err_number)39 inline Status IOError(const std::string& context, int err_number) {
40   return (err_number == ENOSPC)
41              ? Status::NoSpace(context, strerror(err_number))
42              : (err_number == ENOENT)
43                    ? Status::PathNotFound(context, strerror(err_number))
44                    : Status::IOError(context, strerror(err_number));
45 }
46 
47 class WinFileData;
48 
49 Status pwrite(const WinFileData* file_data, const Slice& data,
50   uint64_t offset, size_t& bytes_written);
51 
52 Status pread(const WinFileData* file_data, char* src, size_t num_bytes,
53   uint64_t offset, size_t& bytes_read);
54 
55 Status fallocate(const std::string& filename, HANDLE hFile, uint64_t to_size);
56 
57 Status ftruncate(const std::string& filename, HANDLE hFile, uint64_t toSize);
58 
59 size_t GetUniqueIdFromFile(HANDLE hFile, char* id, size_t max_size);
60 
61 class WinFileData {
62  protected:
63   const std::string filename_;
64   HANDLE hFile_;
65   // If true, the I/O issued would be direct I/O which the buffer
66   // will need to be aligned (not sure there is a guarantee that the buffer
67   // passed in is aligned).
68   const bool use_direct_io_;
69 
70  public:
71   // We want this class be usable both for inheritance (prive
72   // or protected) and for containment so __ctor and __dtor public
WinFileData(const std::string & filename,HANDLE hFile,bool direct_io)73   WinFileData(const std::string& filename, HANDLE hFile, bool direct_io)
74       : filename_(filename), hFile_(hFile), use_direct_io_(direct_io) {}
75 
~WinFileData()76   virtual ~WinFileData() { this->CloseFile(); }
77 
CloseFile()78   bool CloseFile() {
79     bool result = true;
80 
81     if (hFile_ != NULL && hFile_ != INVALID_HANDLE_VALUE) {
82       result = ::CloseHandle(hFile_);
83       assert(result);
84       hFile_ = NULL;
85     }
86     return result;
87   }
88 
GetName()89   const std::string& GetName() const { return filename_; }
90 
GetFileHandle()91   HANDLE GetFileHandle() const { return hFile_; }
92 
use_direct_io()93   bool use_direct_io() const { return use_direct_io_; }
94 
95   WinFileData(const WinFileData&) = delete;
96   WinFileData& operator=(const WinFileData&) = delete;
97 };
98 
99 class WinSequentialFile : protected WinFileData, public SequentialFile {
100 
101   // Override for behavior change when creating a custom env
102   virtual Status PositionedReadInternal(char* src, size_t numBytes,
103     uint64_t offset, size_t& bytes_read) const;
104 
105 public:
106   WinSequentialFile(const std::string& fname, HANDLE f,
107     const EnvOptions& options);
108 
109   ~WinSequentialFile();
110 
111   WinSequentialFile(const WinSequentialFile&) = delete;
112   WinSequentialFile& operator=(const WinSequentialFile&) = delete;
113 
114   virtual Status Read(size_t n, Slice* result, char* scratch) override;
115   virtual Status PositionedRead(uint64_t offset, size_t n, Slice* result,
116     char* scratch) override;
117 
118   virtual Status Skip(uint64_t n) override;
119 
120   virtual Status InvalidateCache(size_t offset, size_t length) override;
121 
use_direct_io()122   virtual bool use_direct_io() const override { return WinFileData::use_direct_io(); }
123 };
124 
125 // mmap() based random-access
126 class WinMmapReadableFile : private WinFileData, public RandomAccessFile {
127   HANDLE hMap_;
128 
129   const void* mapped_region_;
130   const size_t length_;
131 
132  public:
133   // mapped_region_[0,length-1] contains the mmapped contents of the file.
134   WinMmapReadableFile(const std::string& fileName, HANDLE hFile, HANDLE hMap,
135                       const void* mapped_region, size_t length);
136 
137   ~WinMmapReadableFile();
138 
139   WinMmapReadableFile(const WinMmapReadableFile&) = delete;
140   WinMmapReadableFile& operator=(const WinMmapReadableFile&) = delete;
141 
142   virtual Status Read(uint64_t offset, size_t n, Slice* result,
143                       char* scratch) const override;
144 
145   virtual Status InvalidateCache(size_t offset, size_t length) override;
146 
147   virtual size_t GetUniqueId(char* id, size_t max_size) const override;
148 };
149 
150 // We preallocate and use memcpy to append new
151 // data to the file.  This is safe since we either properly close the
152 // file before reading from it, or for log files, the reading code
153 // knows enough to skip zero suffixes.
154 class WinMmapFile : private WinFileData, public WritableFile {
155  private:
156   HANDLE hMap_;
157 
158   const size_t page_size_;  // We flush the mapping view in page_size
159   // increments. We may decide if this is a memory
160   // page size or SSD page size
161   const size_t
162       allocation_granularity_;  // View must start at such a granularity
163 
164   size_t reserved_size_;  // Preallocated size
165 
166   size_t mapping_size_;  // The max size of the mapping object
167   // we want to guess the final file size to minimize the remapping
168   size_t view_size_;  // How much memory to map into a view at a time
169 
170   char* mapped_begin_;  // Must begin at the file offset that is aligned with
171   // allocation_granularity_
172   char* mapped_end_;
173   char* dst_;  // Where to write next  (in range [mapped_begin_,mapped_end_])
174   char* last_sync_;  // Where have we synced up to
175 
176   uint64_t file_offset_;  // Offset of mapped_begin_ in file
177 
178   // Do we have unsynced writes?
179   bool pending_sync_;
180 
181   // Can only truncate or reserve to a sector size aligned if
182   // used on files that are opened with Unbuffered I/O
183   Status TruncateFile(uint64_t toSize);
184 
185   Status UnmapCurrentRegion();
186 
187   Status MapNewRegion();
188 
189   virtual Status PreallocateInternal(uint64_t spaceToReserve);
190 
191  public:
192   WinMmapFile(const std::string& fname, HANDLE hFile, size_t page_size,
193               size_t allocation_granularity, const EnvOptions& options);
194 
195   ~WinMmapFile();
196 
197   WinMmapFile(const WinMmapFile&) = delete;
198   WinMmapFile& operator=(const WinMmapFile&) = delete;
199 
200   virtual Status Append(const Slice& data) override;
201 
202   // Means Close() will properly take care of truncate
203   // and it does not need any additional information
204   virtual Status Truncate(uint64_t size) override;
205 
206   virtual Status Close() override;
207 
208   virtual Status Flush() override;
209 
210   // Flush only data
211   virtual Status Sync() override;
212 
213   /**
214   * Flush data as well as metadata to stable storage.
215   */
216   virtual Status Fsync() override;
217 
218   /**
219   * Get the size of valid data in the file. This will not match the
220   * size that is returned from the filesystem because we use mmap
221   * to extend file by map_size every time.
222   */
223   virtual uint64_t GetFileSize() override;
224 
225   virtual Status InvalidateCache(size_t offset, size_t length) override;
226 
227   virtual Status Allocate(uint64_t offset, uint64_t len) override;
228 
229   virtual size_t GetUniqueId(char* id, size_t max_size) const override;
230 };
231 
232 class WinRandomAccessImpl {
233  protected:
234   WinFileData* file_base_;
235   size_t       alignment_;
236 
237   // Override for behavior change when creating a custom env
238   virtual Status PositionedReadInternal(char* src, size_t numBytes,
239                                         uint64_t offset, size_t& bytes_read) const;
240 
241   WinRandomAccessImpl(WinFileData* file_base, size_t alignment,
242                       const EnvOptions& options);
243 
~WinRandomAccessImpl()244   virtual ~WinRandomAccessImpl() {}
245 
246   Status ReadImpl(uint64_t offset, size_t n, Slice* result,
247                   char* scratch) const;
248 
GetAlignment()249   size_t GetAlignment() const { return alignment_; }
250 
251  public:
252 
253   WinRandomAccessImpl(const WinRandomAccessImpl&) = delete;
254   WinRandomAccessImpl& operator=(const WinRandomAccessImpl&) = delete;
255 };
256 
257 // pread() based random-access
258 class WinRandomAccessFile
259     : private WinFileData,
260       protected WinRandomAccessImpl,  // Want to be able to override
261                                       // PositionedReadInternal
262       public RandomAccessFile {
263  public:
264   WinRandomAccessFile(const std::string& fname, HANDLE hFile, size_t alignment,
265                       const EnvOptions& options);
266 
267   ~WinRandomAccessFile();
268 
269   virtual Status Read(uint64_t offset, size_t n, Slice* result,
270                       char* scratch) const override;
271 
272   virtual size_t GetUniqueId(char* id, size_t max_size) const override;
273 
use_direct_io()274   virtual bool use_direct_io() const override { return WinFileData::use_direct_io(); }
275 
276   virtual Status InvalidateCache(size_t offset, size_t length) override;
277 
278   virtual size_t GetRequiredBufferAlignment() const override;
279 };
280 
281 // This is a sequential write class. It has been mimicked (as others) after
282 // the original Posix class. We add support for unbuffered I/O on windows as
283 // well
284 // we utilize the original buffer as an alignment buffer to write directly to
285 // file with no buffering.
286 // No buffering requires that the provided buffer is aligned to the physical
287 // sector size (SSD page size) and
288 // that all SetFilePointer() operations to occur with such an alignment.
289 // We thus always write in sector/page size increments to the drive and leave
290 // the tail for the next write OR for Close() at which point we pad with zeros.
291 // No padding is required for
292 // buffered access.
293 class WinWritableImpl {
294  protected:
295   WinFileData* file_data_;
296   const uint64_t alignment_;
297   uint64_t next_write_offset_; // Needed because Windows does not support O_APPEND
298   uint64_t reservedsize_;  // how far we have reserved space
299 
300   virtual Status PreallocateInternal(uint64_t spaceToReserve);
301 
302   WinWritableImpl(WinFileData* file_data, size_t alignment);
303 
~WinWritableImpl()304   ~WinWritableImpl() {}
305 
GetAlignement()306   uint64_t GetAlignement() const { return alignment_; }
307 
308   Status AppendImpl(const Slice& data);
309 
310   // Requires that the data is aligned as specified by
311   // GetRequiredBufferAlignment()
312   Status PositionedAppendImpl(const Slice& data, uint64_t offset);
313 
314   Status TruncateImpl(uint64_t size);
315 
316   Status CloseImpl();
317 
318   Status SyncImpl();
319 
GetFileNextWriteOffset()320   uint64_t GetFileNextWriteOffset() {
321     // Double accounting now here with WritableFileWriter
322     // and this size will be wrong when unbuffered access is used
323     // but tests implement their own writable files and do not use
324     // WritableFileWrapper
325     // so we need to squeeze a square peg through
326     // a round hole here.
327     return next_write_offset_;
328   }
329 
330   Status AllocateImpl(uint64_t offset, uint64_t len);
331 
332  public:
333   WinWritableImpl(const WinWritableImpl&) = delete;
334   WinWritableImpl& operator=(const WinWritableImpl&) = delete;
335 };
336 
337 class WinWritableFile : private WinFileData,
338                         protected WinWritableImpl,
339                         public WritableFile {
340  public:
341   WinWritableFile(const std::string& fname, HANDLE hFile, size_t alignment,
342                   size_t capacity, const EnvOptions& options);
343 
344   ~WinWritableFile();
345 
346   virtual Status Append(const Slice& data) override;
347 
348   // Requires that the data is aligned as specified by
349   // GetRequiredBufferAlignment()
350   virtual Status PositionedAppend(const Slice& data, uint64_t offset) override;
351 
352   // Need to implement this so the file is truncated correctly
353   // when buffered and unbuffered mode
354   virtual Status Truncate(uint64_t size) override;
355 
356   virtual Status Close() override;
357 
358   // write out the cached data to the OS cache
359   // This is now taken care of the WritableFileWriter
360   virtual Status Flush() override;
361 
362   virtual Status Sync() override;
363 
364   virtual Status Fsync() override;
365 
366   virtual bool IsSyncThreadSafe() const override;
367 
368   // Indicates if the class makes use of direct I/O
369   // Use PositionedAppend
370   virtual bool use_direct_io() const override;
371 
372   virtual size_t GetRequiredBufferAlignment() const override;
373 
374   virtual uint64_t GetFileSize() override;
375 
376   virtual Status Allocate(uint64_t offset, uint64_t len) override;
377 
378   virtual size_t GetUniqueId(char* id, size_t max_size) const override;
379 };
380 
381 class WinRandomRWFile : private WinFileData,
382                         protected WinRandomAccessImpl,
383                         protected WinWritableImpl,
384                         public RandomRWFile {
385  public:
386   WinRandomRWFile(const std::string& fname, HANDLE hFile, size_t alignment,
387                   const EnvOptions& options);
388 
~WinRandomRWFile()389   ~WinRandomRWFile() {}
390 
391   // Indicates if the class makes use of direct I/O
392   // If false you must pass aligned buffer to Write()
393   virtual bool use_direct_io() const override;
394 
395   // Use the returned alignment value to allocate aligned
396   // buffer for Write() when use_direct_io() returns true
397   virtual size_t GetRequiredBufferAlignment() const override;
398 
399   // Write bytes in `data` at  offset `offset`, Returns Status::OK() on success.
400   // Pass aligned buffer when use_direct_io() returns true.
401   virtual Status Write(uint64_t offset, const Slice& data) override;
402 
403   // Read up to `n` bytes starting from offset `offset` and store them in
404   // result, provided `scratch` size should be at least `n`.
405   // Returns Status::OK() on success.
406   virtual Status Read(uint64_t offset, size_t n, Slice* result,
407                       char* scratch) const override;
408 
409   virtual Status Flush() override;
410 
411   virtual Status Sync() override;
412 
Fsync()413   virtual Status Fsync() { return Sync(); }
414 
415   virtual Status Close() override;
416 };
417 
418 class WinMemoryMappedBuffer : public MemoryMappedFileBuffer {
419 private:
420   HANDLE  file_handle_;
421   HANDLE  map_handle_;
422 public:
WinMemoryMappedBuffer(HANDLE file_handle,HANDLE map_handle,void * base,size_t size)423   WinMemoryMappedBuffer(HANDLE file_handle, HANDLE map_handle, void* base, size_t size) :
424     MemoryMappedFileBuffer(base, size),
425     file_handle_(file_handle),
426     map_handle_(map_handle) {}
427   ~WinMemoryMappedBuffer() override;
428 };
429 
430 class WinDirectory : public Directory {
431   HANDLE handle_;
432  public:
WinDirectory(HANDLE h)433   explicit WinDirectory(HANDLE h) noexcept : handle_(h) {
434     assert(handle_ != INVALID_HANDLE_VALUE);
435   }
~WinDirectory()436   ~WinDirectory() {
437     ::CloseHandle(handle_);
438   }
439   virtual Status Fsync() override;
440 
441   size_t GetUniqueId(char* id, size_t max_size) const override;
442 };
443 
444 class WinFileLock : public FileLock {
445  public:
WinFileLock(HANDLE hFile)446   explicit WinFileLock(HANDLE hFile) : hFile_(hFile) {
447     assert(hFile != NULL);
448     assert(hFile != INVALID_HANDLE_VALUE);
449   }
450 
451   ~WinFileLock();
452 
453  private:
454   HANDLE hFile_;
455 };
456 }
457 }
458