1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
2 // This source code is licensed under both the GPLv2 (found in the
3 // COPYING file in the root directory) and Apache 2.0 License
4 // (found in the LICENSE.Apache file in the root directory).
5 //
6 // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
7 // Use of this source code is governed by a BSD-style license that can be
8 // found in the LICENSE file. See the AUTHORS file for names of contributors.
9 #pragma once
10
11 #include <stdint.h>
12 #include <mutex>
13 #include <string>
14
15 #include "rocksdb/status.h"
16 #include "rocksdb/env.h"
17 #include "util/aligned_buffer.h"
18
19 #include <windows.h>
20
21 namespace ROCKSDB_NAMESPACE {
22 namespace port {
23
24 std::string GetWindowsErrSz(DWORD err);
25
IOErrorFromWindowsError(const std::string & context,DWORD err)26 inline Status IOErrorFromWindowsError(const std::string& context, DWORD err) {
27 return ((err == ERROR_HANDLE_DISK_FULL) || (err == ERROR_DISK_FULL))
28 ? Status::NoSpace(context, GetWindowsErrSz(err))
29 : ((err == ERROR_FILE_NOT_FOUND) || (err == ERROR_PATH_NOT_FOUND))
30 ? Status::PathNotFound(context, GetWindowsErrSz(err))
31 : Status::IOError(context, GetWindowsErrSz(err));
32 }
33
IOErrorFromLastWindowsError(const std::string & context)34 inline Status IOErrorFromLastWindowsError(const std::string& context) {
35 return IOErrorFromWindowsError(context, GetLastError());
36 }
37
IOError(const std::string & context,int err_number)38 inline Status IOError(const std::string& context, int err_number) {
39 return (err_number == ENOSPC)
40 ? Status::NoSpace(context, strerror(err_number))
41 : (err_number == ENOENT)
42 ? Status::PathNotFound(context, strerror(err_number))
43 : Status::IOError(context, strerror(err_number));
44 }
45
46 class WinFileData;
47
48 Status pwrite(const WinFileData* file_data, const Slice& data,
49 uint64_t offset, size_t& bytes_written);
50
51 Status pread(const WinFileData* file_data, char* src, size_t num_bytes,
52 uint64_t offset, size_t& bytes_read);
53
54 Status fallocate(const std::string& filename, HANDLE hFile, uint64_t to_size);
55
56 Status ftruncate(const std::string& filename, HANDLE hFile, uint64_t toSize);
57
58 size_t GetUniqueIdFromFile(HANDLE hFile, char* id, size_t max_size);
59
60 class WinFileData {
61 protected:
62 const std::string filename_;
63 HANDLE hFile_;
64 // If true, the I/O issued would be direct I/O which the buffer
65 // will need to be aligned (not sure there is a guarantee that the buffer
66 // passed in is aligned).
67 const bool use_direct_io_;
68
69 public:
70 // We want this class be usable both for inheritance (prive
71 // or protected) and for containment so __ctor and __dtor public
WinFileData(const std::string & filename,HANDLE hFile,bool direct_io)72 WinFileData(const std::string& filename, HANDLE hFile, bool direct_io)
73 : filename_(filename), hFile_(hFile), use_direct_io_(direct_io) {}
74
~WinFileData()75 virtual ~WinFileData() { this->CloseFile(); }
76
CloseFile()77 bool CloseFile() {
78 bool result = true;
79
80 if (hFile_ != NULL && hFile_ != INVALID_HANDLE_VALUE) {
81 result = ::CloseHandle(hFile_);
82 assert(result);
83 hFile_ = NULL;
84 }
85 return result;
86 }
87
GetName()88 const std::string& GetName() const { return filename_; }
89
GetFileHandle()90 HANDLE GetFileHandle() const { return hFile_; }
91
use_direct_io()92 bool use_direct_io() const { return use_direct_io_; }
93
94 WinFileData(const WinFileData&) = delete;
95 WinFileData& operator=(const WinFileData&) = delete;
96 };
97
98 class WinSequentialFile : protected WinFileData, public SequentialFile {
99
100 // Override for behavior change when creating a custom env
101 virtual Status PositionedReadInternal(char* src, size_t numBytes,
102 uint64_t offset, size_t& bytes_read) const;
103
104 public:
105 WinSequentialFile(const std::string& fname, HANDLE f,
106 const EnvOptions& options);
107
108 ~WinSequentialFile();
109
110 WinSequentialFile(const WinSequentialFile&) = delete;
111 WinSequentialFile& operator=(const WinSequentialFile&) = delete;
112
113 virtual Status Read(size_t n, Slice* result, char* scratch) override;
114 virtual Status PositionedRead(uint64_t offset, size_t n, Slice* result,
115 char* scratch) override;
116
117 virtual Status Skip(uint64_t n) override;
118
119 virtual Status InvalidateCache(size_t offset, size_t length) override;
120
use_direct_io()121 virtual bool use_direct_io() const override { return WinFileData::use_direct_io(); }
122 };
123
124 // mmap() based random-access
125 class WinMmapReadableFile : private WinFileData, public RandomAccessFile {
126 HANDLE hMap_;
127
128 const void* mapped_region_;
129 const size_t length_;
130
131 public:
132 // mapped_region_[0,length-1] contains the mmapped contents of the file.
133 WinMmapReadableFile(const std::string& fileName, HANDLE hFile, HANDLE hMap,
134 const void* mapped_region, size_t length);
135
136 ~WinMmapReadableFile();
137
138 WinMmapReadableFile(const WinMmapReadableFile&) = delete;
139 WinMmapReadableFile& operator=(const WinMmapReadableFile&) = delete;
140
141 virtual Status Read(uint64_t offset, size_t n, Slice* result,
142 char* scratch) const override;
143
144 virtual Status InvalidateCache(size_t offset, size_t length) override;
145
146 virtual size_t GetUniqueId(char* id, size_t max_size) const override;
147 };
148
149 // We preallocate and use memcpy to append new
150 // data to the file. This is safe since we either properly close the
151 // file before reading from it, or for log files, the reading code
152 // knows enough to skip zero suffixes.
153 class WinMmapFile : private WinFileData, public WritableFile {
154 private:
155 HANDLE hMap_;
156
157 const size_t page_size_; // We flush the mapping view in page_size
158 // increments. We may decide if this is a memory
159 // page size or SSD page size
160 const size_t
161 allocation_granularity_; // View must start at such a granularity
162
163 size_t reserved_size_; // Preallocated size
164
165 size_t mapping_size_; // The max size of the mapping object
166 // we want to guess the final file size to minimize the remapping
167 size_t view_size_; // How much memory to map into a view at a time
168
169 char* mapped_begin_; // Must begin at the file offset that is aligned with
170 // allocation_granularity_
171 char* mapped_end_;
172 char* dst_; // Where to write next (in range [mapped_begin_,mapped_end_])
173 char* last_sync_; // Where have we synced up to
174
175 uint64_t file_offset_; // Offset of mapped_begin_ in file
176
177 // Do we have unsynced writes?
178 bool pending_sync_;
179
180 // Can only truncate or reserve to a sector size aligned if
181 // used on files that are opened with Unbuffered I/O
182 Status TruncateFile(uint64_t toSize);
183
184 Status UnmapCurrentRegion();
185
186 Status MapNewRegion();
187
188 virtual Status PreallocateInternal(uint64_t spaceToReserve);
189
190 public:
191 WinMmapFile(const std::string& fname, HANDLE hFile, size_t page_size,
192 size_t allocation_granularity, const EnvOptions& options);
193
194 ~WinMmapFile();
195
196 WinMmapFile(const WinMmapFile&) = delete;
197 WinMmapFile& operator=(const WinMmapFile&) = delete;
198
199 virtual Status Append(const Slice& data) override;
200
201 // Means Close() will properly take care of truncate
202 // and it does not need any additional information
203 virtual Status Truncate(uint64_t size) override;
204
205 virtual Status Close() override;
206
207 virtual Status Flush() override;
208
209 // Flush only data
210 virtual Status Sync() override;
211
212 /**
213 * Flush data as well as metadata to stable storage.
214 */
215 virtual Status Fsync() override;
216
217 /**
218 * Get the size of valid data in the file. This will not match the
219 * size that is returned from the filesystem because we use mmap
220 * to extend file by map_size every time.
221 */
222 virtual uint64_t GetFileSize() override;
223
224 virtual Status InvalidateCache(size_t offset, size_t length) override;
225
226 virtual Status Allocate(uint64_t offset, uint64_t len) override;
227
228 virtual size_t GetUniqueId(char* id, size_t max_size) const override;
229 };
230
231 class WinRandomAccessImpl {
232 protected:
233 WinFileData* file_base_;
234 size_t alignment_;
235
236 // Override for behavior change when creating a custom env
237 virtual Status PositionedReadInternal(char* src, size_t numBytes,
238 uint64_t offset, size_t& bytes_read) const;
239
240 WinRandomAccessImpl(WinFileData* file_base, size_t alignment,
241 const EnvOptions& options);
242
~WinRandomAccessImpl()243 virtual ~WinRandomAccessImpl() {}
244
245 Status ReadImpl(uint64_t offset, size_t n, Slice* result,
246 char* scratch) const;
247
GetAlignment()248 size_t GetAlignment() const { return alignment_; }
249
250 public:
251
252 WinRandomAccessImpl(const WinRandomAccessImpl&) = delete;
253 WinRandomAccessImpl& operator=(const WinRandomAccessImpl&) = delete;
254 };
255
256 // pread() based random-access
257 class WinRandomAccessFile
258 : private WinFileData,
259 protected WinRandomAccessImpl, // Want to be able to override
260 // PositionedReadInternal
261 public RandomAccessFile {
262 public:
263 WinRandomAccessFile(const std::string& fname, HANDLE hFile, size_t alignment,
264 const EnvOptions& options);
265
266 ~WinRandomAccessFile();
267
268 virtual Status Read(uint64_t offset, size_t n, Slice* result,
269 char* scratch) const override;
270
271 virtual size_t GetUniqueId(char* id, size_t max_size) const override;
272
use_direct_io()273 virtual bool use_direct_io() const override { return WinFileData::use_direct_io(); }
274
275 virtual Status InvalidateCache(size_t offset, size_t length) override;
276
277 virtual size_t GetRequiredBufferAlignment() const override;
278 };
279
280 // This is a sequential write class. It has been mimicked (as others) after
281 // the original Posix class. We add support for unbuffered I/O on windows as
282 // well
283 // we utilize the original buffer as an alignment buffer to write directly to
284 // file with no buffering.
285 // No buffering requires that the provided buffer is aligned to the physical
286 // sector size (SSD page size) and
287 // that all SetFilePointer() operations to occur with such an alignment.
288 // We thus always write in sector/page size increments to the drive and leave
289 // the tail for the next write OR for Close() at which point we pad with zeros.
290 // No padding is required for
291 // buffered access.
292 class WinWritableImpl {
293 protected:
294 WinFileData* file_data_;
295 const uint64_t alignment_;
296 uint64_t next_write_offset_; // Needed because Windows does not support O_APPEND
297 uint64_t reservedsize_; // how far we have reserved space
298
299 virtual Status PreallocateInternal(uint64_t spaceToReserve);
300
301 WinWritableImpl(WinFileData* file_data, size_t alignment);
302
~WinWritableImpl()303 ~WinWritableImpl() {}
304
GetAlignement()305 uint64_t GetAlignement() const { return alignment_; }
306
307 Status AppendImpl(const Slice& data);
308
309 // Requires that the data is aligned as specified by
310 // GetRequiredBufferAlignment()
311 Status PositionedAppendImpl(const Slice& data, uint64_t offset);
312
313 Status TruncateImpl(uint64_t size);
314
315 Status CloseImpl();
316
317 Status SyncImpl();
318
GetFileNextWriteOffset()319 uint64_t GetFileNextWriteOffset() {
320 // Double accounting now here with WritableFileWriter
321 // and this size will be wrong when unbuffered access is used
322 // but tests implement their own writable files and do not use
323 // WritableFileWrapper
324 // so we need to squeeze a square peg through
325 // a round hole here.
326 return next_write_offset_;
327 }
328
329 Status AllocateImpl(uint64_t offset, uint64_t len);
330
331 public:
332 WinWritableImpl(const WinWritableImpl&) = delete;
333 WinWritableImpl& operator=(const WinWritableImpl&) = delete;
334 };
335
336 class WinWritableFile : private WinFileData,
337 protected WinWritableImpl,
338 public WritableFile {
339 public:
340 WinWritableFile(const std::string& fname, HANDLE hFile, size_t alignment,
341 size_t capacity, const EnvOptions& options);
342
343 ~WinWritableFile();
344
345 virtual Status Append(const Slice& data) override;
346
347 // Requires that the data is aligned as specified by
348 // GetRequiredBufferAlignment()
349 virtual Status PositionedAppend(const Slice& data, uint64_t offset) override;
350
351 // Need to implement this so the file is truncated correctly
352 // when buffered and unbuffered mode
353 virtual Status Truncate(uint64_t size) override;
354
355 virtual Status Close() override;
356
357 // write out the cached data to the OS cache
358 // This is now taken care of the WritableFileWriter
359 virtual Status Flush() override;
360
361 virtual Status Sync() override;
362
363 virtual Status Fsync() override;
364
365 virtual bool IsSyncThreadSafe() const override;
366
367 // Indicates if the class makes use of direct I/O
368 // Use PositionedAppend
369 virtual bool use_direct_io() const override;
370
371 virtual size_t GetRequiredBufferAlignment() const override;
372
373 virtual uint64_t GetFileSize() override;
374
375 virtual Status Allocate(uint64_t offset, uint64_t len) override;
376
377 virtual size_t GetUniqueId(char* id, size_t max_size) const override;
378 };
379
380 class WinRandomRWFile : private WinFileData,
381 protected WinRandomAccessImpl,
382 protected WinWritableImpl,
383 public RandomRWFile {
384 public:
385 WinRandomRWFile(const std::string& fname, HANDLE hFile, size_t alignment,
386 const EnvOptions& options);
387
~WinRandomRWFile()388 ~WinRandomRWFile() {}
389
390 // Indicates if the class makes use of direct I/O
391 // If false you must pass aligned buffer to Write()
392 virtual bool use_direct_io() const override;
393
394 // Use the returned alignment value to allocate aligned
395 // buffer for Write() when use_direct_io() returns true
396 virtual size_t GetRequiredBufferAlignment() const override;
397
398 // Write bytes in `data` at offset `offset`, Returns Status::OK() on success.
399 // Pass aligned buffer when use_direct_io() returns true.
400 virtual Status Write(uint64_t offset, const Slice& data) override;
401
402 // Read up to `n` bytes starting from offset `offset` and store them in
403 // result, provided `scratch` size should be at least `n`.
404 // Returns Status::OK() on success.
405 virtual Status Read(uint64_t offset, size_t n, Slice* result,
406 char* scratch) const override;
407
408 virtual Status Flush() override;
409
410 virtual Status Sync() override;
411
Fsync()412 virtual Status Fsync() { return Sync(); }
413
414 virtual Status Close() override;
415 };
416
417 class WinMemoryMappedBuffer : public MemoryMappedFileBuffer {
418 private:
419 HANDLE file_handle_;
420 HANDLE map_handle_;
421 public:
WinMemoryMappedBuffer(HANDLE file_handle,HANDLE map_handle,void * base,size_t size)422 WinMemoryMappedBuffer(HANDLE file_handle, HANDLE map_handle, void* base, size_t size) :
423 MemoryMappedFileBuffer(base, size),
424 file_handle_(file_handle),
425 map_handle_(map_handle) {}
426 ~WinMemoryMappedBuffer() override;
427 };
428
429 class WinDirectory : public Directory {
430 HANDLE handle_;
431 public:
WinDirectory(HANDLE h)432 explicit WinDirectory(HANDLE h) noexcept : handle_(h) {
433 assert(handle_ != INVALID_HANDLE_VALUE);
434 }
~WinDirectory()435 ~WinDirectory() {
436 ::CloseHandle(handle_);
437 }
438 virtual Status Fsync() override;
439
440 size_t GetUniqueId(char* id, size_t max_size) const override;
441 };
442
443 class WinFileLock : public FileLock {
444 public:
WinFileLock(HANDLE hFile)445 explicit WinFileLock(HANDLE hFile) : hFile_(hFile) {
446 assert(hFile != NULL);
447 assert(hFile != INVALID_HANDLE_VALUE);
448 }
449
450 ~WinFileLock();
451
452 private:
453 HANDLE hFile_;
454 };
455 }
456 } // namespace ROCKSDB_NAMESPACE
457