1 //===--- MemoryBuffer.cpp - Memory Buffer implementation ------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //  This file implements the MemoryBuffer interface.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "llvm/Support/MemoryBuffer.h"
14 #include "llvm/ADT/SmallString.h"
15 #include "llvm/Config/config.h"
16 #include "llvm/Support/Errc.h"
17 #include "llvm/Support/Error.h"
18 #include "llvm/Support/ErrorHandling.h"
19 #include "llvm/Support/FileSystem.h"
20 #include "llvm/Support/MathExtras.h"
21 #include "llvm/Support/Process.h"
22 #include "llvm/Support/Program.h"
23 #include "llvm/Support/SmallVectorMemoryBuffer.h"
24 #include <cassert>
25 #include <cstring>
26 #include <new>
27 #include <sys/types.h>
28 #include <system_error>
29 #if !defined(_MSC_VER) && !defined(__MINGW32__)
30 #include <unistd.h>
31 #else
32 #include <io.h>
33 #endif
34 
35 #ifdef __MVS__
36 #include "llvm/Support/AutoConvert.h"
37 #endif
38 using namespace llvm;
39 
40 //===----------------------------------------------------------------------===//
41 // MemoryBuffer implementation itself.
42 //===----------------------------------------------------------------------===//
43 
44 MemoryBuffer::~MemoryBuffer() = default;
45 
46 /// init - Initialize this MemoryBuffer as a reference to externally allocated
47 /// memory, memory that we know is already null terminated.
48 void MemoryBuffer::init(const char *BufStart, const char *BufEnd,
49                         bool RequiresNullTerminator) {
50   assert((!RequiresNullTerminator || BufEnd[0] == 0) &&
51          "Buffer is not null terminated!");
52   BufferStart = BufStart;
53   BufferEnd = BufEnd;
54 }
55 
56 //===----------------------------------------------------------------------===//
57 // MemoryBufferMem implementation.
58 //===----------------------------------------------------------------------===//
59 
60 /// CopyStringRef - Copies contents of a StringRef into a block of memory and
61 /// null-terminates it.
62 static void CopyStringRef(char *Memory, StringRef Data) {
63   if (!Data.empty())
64     memcpy(Memory, Data.data(), Data.size());
65   Memory[Data.size()] = 0; // Null terminate string.
66 }
67 
68 namespace {
69 struct NamedBufferAlloc {
70   const Twine &Name;
71   NamedBufferAlloc(const Twine &Name) : Name(Name) {}
72 };
73 } // namespace
74 
75 void *operator new(size_t N, const NamedBufferAlloc &Alloc) {
76   SmallString<256> NameBuf;
77   StringRef NameRef = Alloc.Name.toStringRef(NameBuf);
78 
79   char *Mem = static_cast<char *>(operator new(N + NameRef.size() + 1));
80   CopyStringRef(Mem + N, NameRef);
81   return Mem;
82 }
83 
84 namespace {
85 /// MemoryBufferMem - Named MemoryBuffer pointing to a block of memory.
86 template<typename MB>
87 class MemoryBufferMem : public MB {
88 public:
89   MemoryBufferMem(StringRef InputData, bool RequiresNullTerminator) {
90     MemoryBuffer::init(InputData.begin(), InputData.end(),
91                        RequiresNullTerminator);
92   }
93 
94   /// Disable sized deallocation for MemoryBufferMem, because it has
95   /// tail-allocated data.
96   void operator delete(void *p) { ::operator delete(p); }
97 
98   StringRef getBufferIdentifier() const override {
99     // The name is stored after the class itself.
100     return StringRef(reinterpret_cast<const char *>(this + 1));
101   }
102 
103   MemoryBuffer::BufferKind getBufferKind() const override {
104     return MemoryBuffer::MemoryBuffer_Malloc;
105   }
106 };
107 } // namespace
108 
109 template <typename MB>
110 static ErrorOr<std::unique_ptr<MB>>
111 getFileAux(const Twine &Filename, uint64_t MapSize, uint64_t Offset,
112            bool IsText, bool RequiresNullTerminator, bool IsVolatile);
113 
114 std::unique_ptr<MemoryBuffer>
115 MemoryBuffer::getMemBuffer(StringRef InputData, StringRef BufferName,
116                            bool RequiresNullTerminator) {
117   auto *Ret = new (NamedBufferAlloc(BufferName))
118       MemoryBufferMem<MemoryBuffer>(InputData, RequiresNullTerminator);
119   return std::unique_ptr<MemoryBuffer>(Ret);
120 }
121 
122 std::unique_ptr<MemoryBuffer>
123 MemoryBuffer::getMemBuffer(MemoryBufferRef Ref, bool RequiresNullTerminator) {
124   return std::unique_ptr<MemoryBuffer>(getMemBuffer(
125       Ref.getBuffer(), Ref.getBufferIdentifier(), RequiresNullTerminator));
126 }
127 
128 static ErrorOr<std::unique_ptr<WritableMemoryBuffer>>
129 getMemBufferCopyImpl(StringRef InputData, const Twine &BufferName) {
130   auto Buf = WritableMemoryBuffer::getNewUninitMemBuffer(InputData.size(), BufferName);
131   if (!Buf)
132     return make_error_code(errc::not_enough_memory);
133   memcpy(Buf->getBufferStart(), InputData.data(), InputData.size());
134   return std::move(Buf);
135 }
136 
137 std::unique_ptr<MemoryBuffer>
138 MemoryBuffer::getMemBufferCopy(StringRef InputData, const Twine &BufferName) {
139   auto Buf = getMemBufferCopyImpl(InputData, BufferName);
140   if (Buf)
141     return std::move(*Buf);
142   return nullptr;
143 }
144 
145 ErrorOr<std::unique_ptr<MemoryBuffer>>
146 MemoryBuffer::getFileOrSTDIN(const Twine &Filename, bool IsText,
147                              bool RequiresNullTerminator) {
148   SmallString<256> NameBuf;
149   StringRef NameRef = Filename.toStringRef(NameBuf);
150 
151   if (NameRef == "-")
152     return getSTDIN();
153   return getFile(Filename, IsText, RequiresNullTerminator,
154                  /*IsVolatile=*/false);
155 }
156 
157 ErrorOr<std::unique_ptr<MemoryBuffer>>
158 MemoryBuffer::getFileSlice(const Twine &FilePath, uint64_t MapSize,
159                            uint64_t Offset, bool IsVolatile) {
160   return getFileAux<MemoryBuffer>(FilePath, MapSize, Offset, /*IsText=*/false,
161                                   /*RequiresNullTerminator=*/false, IsVolatile);
162 }
163 
164 //===----------------------------------------------------------------------===//
165 // MemoryBuffer::getFile implementation.
166 //===----------------------------------------------------------------------===//
167 
168 namespace {
169 
170 template <typename MB>
171 constexpr sys::fs::mapped_file_region::mapmode Mapmode =
172     sys::fs::mapped_file_region::readonly;
173 template <>
174 constexpr sys::fs::mapped_file_region::mapmode Mapmode<MemoryBuffer> =
175     sys::fs::mapped_file_region::readonly;
176 template <>
177 constexpr sys::fs::mapped_file_region::mapmode Mapmode<WritableMemoryBuffer> =
178     sys::fs::mapped_file_region::priv;
179 template <>
180 constexpr sys::fs::mapped_file_region::mapmode
181     Mapmode<WriteThroughMemoryBuffer> = sys::fs::mapped_file_region::readwrite;
182 
183 /// Memory maps a file descriptor using sys::fs::mapped_file_region.
184 ///
185 /// This handles converting the offset into a legal offset on the platform.
186 template<typename MB>
187 class MemoryBufferMMapFile : public MB {
188   sys::fs::mapped_file_region MFR;
189 
190   static uint64_t getLegalMapOffset(uint64_t Offset) {
191     return Offset & ~(sys::fs::mapped_file_region::alignment() - 1);
192   }
193 
194   static uint64_t getLegalMapSize(uint64_t Len, uint64_t Offset) {
195     return Len + (Offset - getLegalMapOffset(Offset));
196   }
197 
198   const char *getStart(uint64_t Len, uint64_t Offset) {
199     return MFR.const_data() + (Offset - getLegalMapOffset(Offset));
200   }
201 
202 public:
203   MemoryBufferMMapFile(bool RequiresNullTerminator, sys::fs::file_t FD, uint64_t Len,
204                        uint64_t Offset, std::error_code &EC)
205       : MFR(FD, Mapmode<MB>, getLegalMapSize(Len, Offset),
206             getLegalMapOffset(Offset), EC) {
207     if (!EC) {
208       const char *Start = getStart(Len, Offset);
209       MemoryBuffer::init(Start, Start + Len, RequiresNullTerminator);
210     }
211   }
212 
213   /// Disable sized deallocation for MemoryBufferMMapFile, because it has
214   /// tail-allocated data.
215   void operator delete(void *p) { ::operator delete(p); }
216 
217   StringRef getBufferIdentifier() const override {
218     // The name is stored after the class itself.
219     return StringRef(reinterpret_cast<const char *>(this + 1));
220   }
221 
222   MemoryBuffer::BufferKind getBufferKind() const override {
223     return MemoryBuffer::MemoryBuffer_MMap;
224   }
225 
226   void dontNeedIfMmap() override { MFR.dontNeed(); }
227 };
228 } // namespace
229 
230 static ErrorOr<std::unique_ptr<WritableMemoryBuffer>>
231 getMemoryBufferForStream(sys::fs::file_t FD, const Twine &BufferName) {
232   SmallString<sys::fs::DefaultReadChunkSize> Buffer;
233   if (Error E = sys::fs::readNativeFileToEOF(FD, Buffer))
234     return errorToErrorCode(std::move(E));
235   return getMemBufferCopyImpl(Buffer, BufferName);
236 }
237 
238 ErrorOr<std::unique_ptr<MemoryBuffer>>
239 MemoryBuffer::getFile(const Twine &Filename, bool IsText,
240                       bool RequiresNullTerminator, bool IsVolatile) {
241   return getFileAux<MemoryBuffer>(Filename, /*MapSize=*/-1, /*Offset=*/0,
242                                   IsText, RequiresNullTerminator, IsVolatile);
243 }
244 
245 template <typename MB>
246 static ErrorOr<std::unique_ptr<MB>>
247 getOpenFileImpl(sys::fs::file_t FD, const Twine &Filename, uint64_t FileSize,
248                 uint64_t MapSize, int64_t Offset, bool RequiresNullTerminator,
249                 bool IsVolatile);
250 
251 template <typename MB>
252 static ErrorOr<std::unique_ptr<MB>>
253 getFileAux(const Twine &Filename, uint64_t MapSize, uint64_t Offset,
254            bool IsText, bool RequiresNullTerminator, bool IsVolatile) {
255   Expected<sys::fs::file_t> FDOrErr = sys::fs::openNativeFileForRead(
256       Filename, IsText ? sys::fs::OF_TextWithCRLF : sys::fs::OF_None);
257   if (!FDOrErr)
258     return errorToErrorCode(FDOrErr.takeError());
259   sys::fs::file_t FD = *FDOrErr;
260   auto Ret = getOpenFileImpl<MB>(FD, Filename, /*FileSize=*/-1, MapSize, Offset,
261                                  RequiresNullTerminator, IsVolatile);
262   sys::fs::closeFile(FD);
263   return Ret;
264 }
265 
266 ErrorOr<std::unique_ptr<WritableMemoryBuffer>>
267 WritableMemoryBuffer::getFile(const Twine &Filename, bool IsVolatile) {
268   return getFileAux<WritableMemoryBuffer>(
269       Filename, /*MapSize=*/-1, /*Offset=*/0, /*IsText=*/false,
270       /*RequiresNullTerminator=*/false, IsVolatile);
271 }
272 
273 ErrorOr<std::unique_ptr<WritableMemoryBuffer>>
274 WritableMemoryBuffer::getFileSlice(const Twine &Filename, uint64_t MapSize,
275                                    uint64_t Offset, bool IsVolatile) {
276   return getFileAux<WritableMemoryBuffer>(
277       Filename, MapSize, Offset, /*IsText=*/false,
278       /*RequiresNullTerminator=*/false, IsVolatile);
279 }
280 
281 std::unique_ptr<WritableMemoryBuffer>
282 WritableMemoryBuffer::getNewUninitMemBuffer(size_t Size, const Twine &BufferName) {
283   using MemBuffer = MemoryBufferMem<WritableMemoryBuffer>;
284   // Allocate space for the MemoryBuffer, the data and the name. It is important
285   // that MemoryBuffer and data are aligned so PointerIntPair works with them.
286   // TODO: Is 16-byte alignment enough?  We copy small object files with large
287   // alignment expectations into this buffer.
288   SmallString<256> NameBuf;
289   StringRef NameRef = BufferName.toStringRef(NameBuf);
290   size_t AlignedStringLen = alignTo(sizeof(MemBuffer) + NameRef.size() + 1, 16);
291   size_t RealLen = AlignedStringLen + Size + 1;
292   if (RealLen <= Size) // Check for rollover.
293     return nullptr;
294   char *Mem = static_cast<char*>(operator new(RealLen, std::nothrow));
295   if (!Mem)
296     return nullptr;
297 
298   // The name is stored after the class itself.
299   CopyStringRef(Mem + sizeof(MemBuffer), NameRef);
300 
301   // The buffer begins after the name and must be aligned.
302   char *Buf = Mem + AlignedStringLen;
303   Buf[Size] = 0; // Null terminate buffer.
304 
305   auto *Ret = new (Mem) MemBuffer(StringRef(Buf, Size), true);
306   return std::unique_ptr<WritableMemoryBuffer>(Ret);
307 }
308 
309 std::unique_ptr<WritableMemoryBuffer>
310 WritableMemoryBuffer::getNewMemBuffer(size_t Size, const Twine &BufferName) {
311   auto SB = WritableMemoryBuffer::getNewUninitMemBuffer(Size, BufferName);
312   if (!SB)
313     return nullptr;
314   memset(SB->getBufferStart(), 0, Size);
315   return SB;
316 }
317 
318 static bool shouldUseMmap(sys::fs::file_t FD,
319                           size_t FileSize,
320                           size_t MapSize,
321                           off_t Offset,
322                           bool RequiresNullTerminator,
323                           int PageSize,
324                           bool IsVolatile) {
325   // mmap may leave the buffer without null terminator if the file size changed
326   // by the time the last page is mapped in, so avoid it if the file size is
327   // likely to change.
328   if (IsVolatile && RequiresNullTerminator)
329     return false;
330 
331   // We don't use mmap for small files because this can severely fragment our
332   // address space.
333   if (MapSize < 4 * 4096 || MapSize < (unsigned)PageSize)
334     return false;
335 
336   if (!RequiresNullTerminator)
337     return true;
338 
339   // If we don't know the file size, use fstat to find out.  fstat on an open
340   // file descriptor is cheaper than stat on a random path.
341   // FIXME: this chunk of code is duplicated, but it avoids a fstat when
342   // RequiresNullTerminator = false and MapSize != -1.
343   if (FileSize == size_t(-1)) {
344     sys::fs::file_status Status;
345     if (sys::fs::status(FD, Status))
346       return false;
347     FileSize = Status.getSize();
348   }
349 
350   // If we need a null terminator and the end of the map is inside the file,
351   // we cannot use mmap.
352   size_t End = Offset + MapSize;
353   assert(End <= FileSize);
354   if (End != FileSize)
355     return false;
356 
357   // Don't try to map files that are exactly a multiple of the system page size
358   // if we need a null terminator.
359   if ((FileSize & (PageSize -1)) == 0)
360     return false;
361 
362 #if defined(__CYGWIN__)
363   // Don't try to map files that are exactly a multiple of the physical page size
364   // if we need a null terminator.
365   // FIXME: We should reorganize again getPageSize() on Win32.
366   if ((FileSize & (4096 - 1)) == 0)
367     return false;
368 #endif
369 
370   return true;
371 }
372 
373 static ErrorOr<std::unique_ptr<WriteThroughMemoryBuffer>>
374 getReadWriteFile(const Twine &Filename, uint64_t FileSize, uint64_t MapSize,
375                  uint64_t Offset) {
376   Expected<sys::fs::file_t> FDOrErr = sys::fs::openNativeFileForReadWrite(
377       Filename, sys::fs::CD_OpenExisting, sys::fs::OF_None);
378   if (!FDOrErr)
379     return errorToErrorCode(FDOrErr.takeError());
380   sys::fs::file_t FD = *FDOrErr;
381 
382   // Default is to map the full file.
383   if (MapSize == uint64_t(-1)) {
384     // If we don't know the file size, use fstat to find out.  fstat on an open
385     // file descriptor is cheaper than stat on a random path.
386     if (FileSize == uint64_t(-1)) {
387       sys::fs::file_status Status;
388       std::error_code EC = sys::fs::status(FD, Status);
389       if (EC)
390         return EC;
391 
392       // If this not a file or a block device (e.g. it's a named pipe
393       // or character device), we can't mmap it, so error out.
394       sys::fs::file_type Type = Status.type();
395       if (Type != sys::fs::file_type::regular_file &&
396           Type != sys::fs::file_type::block_file)
397         return make_error_code(errc::invalid_argument);
398 
399       FileSize = Status.getSize();
400     }
401     MapSize = FileSize;
402   }
403 
404   std::error_code EC;
405   std::unique_ptr<WriteThroughMemoryBuffer> Result(
406       new (NamedBufferAlloc(Filename))
407           MemoryBufferMMapFile<WriteThroughMemoryBuffer>(false, FD, MapSize,
408                                                          Offset, EC));
409   if (EC)
410     return EC;
411   return std::move(Result);
412 }
413 
414 ErrorOr<std::unique_ptr<WriteThroughMemoryBuffer>>
415 WriteThroughMemoryBuffer::getFile(const Twine &Filename, int64_t FileSize) {
416   return getReadWriteFile(Filename, FileSize, FileSize, 0);
417 }
418 
419 /// Map a subrange of the specified file as a WritableMemoryBuffer.
420 ErrorOr<std::unique_ptr<WriteThroughMemoryBuffer>>
421 WriteThroughMemoryBuffer::getFileSlice(const Twine &Filename, uint64_t MapSize,
422                                        uint64_t Offset) {
423   return getReadWriteFile(Filename, -1, MapSize, Offset);
424 }
425 
426 template <typename MB>
427 static ErrorOr<std::unique_ptr<MB>>
428 getOpenFileImpl(sys::fs::file_t FD, const Twine &Filename, uint64_t FileSize,
429                 uint64_t MapSize, int64_t Offset, bool RequiresNullTerminator,
430                 bool IsVolatile) {
431   static int PageSize = sys::Process::getPageSizeEstimate();
432 
433   // Default is to map the full file.
434   if (MapSize == uint64_t(-1)) {
435     // If we don't know the file size, use fstat to find out.  fstat on an open
436     // file descriptor is cheaper than stat on a random path.
437     if (FileSize == uint64_t(-1)) {
438       sys::fs::file_status Status;
439       std::error_code EC = sys::fs::status(FD, Status);
440       if (EC)
441         return EC;
442 
443       // If this not a file or a block device (e.g. it's a named pipe
444       // or character device), we can't trust the size. Create the memory
445       // buffer by copying off the stream.
446       sys::fs::file_type Type = Status.type();
447       if (Type != sys::fs::file_type::regular_file &&
448           Type != sys::fs::file_type::block_file)
449         return getMemoryBufferForStream(FD, Filename);
450 
451       FileSize = Status.getSize();
452     }
453     MapSize = FileSize;
454   }
455 
456   if (shouldUseMmap(FD, FileSize, MapSize, Offset, RequiresNullTerminator,
457                     PageSize, IsVolatile)) {
458     std::error_code EC;
459     std::unique_ptr<MB> Result(
460         new (NamedBufferAlloc(Filename)) MemoryBufferMMapFile<MB>(
461             RequiresNullTerminator, FD, MapSize, Offset, EC));
462     if (!EC)
463       return std::move(Result);
464   }
465 
466 #ifdef __MVS__
467   // Set codepage auto-conversion for z/OS.
468   if (auto EC = llvm::enableAutoConversion(FD))
469     return EC;
470 #endif
471 
472   auto Buf = WritableMemoryBuffer::getNewUninitMemBuffer(MapSize, Filename);
473   if (!Buf) {
474     // Failed to create a buffer. The only way it can fail is if
475     // new(std::nothrow) returns 0.
476     return make_error_code(errc::not_enough_memory);
477   }
478 
479   // Read until EOF, zero-initialize the rest.
480   MutableArrayRef<char> ToRead = Buf->getBuffer();
481   while (!ToRead.empty()) {
482     Expected<size_t> ReadBytes =
483         sys::fs::readNativeFileSlice(FD, ToRead, Offset);
484     if (!ReadBytes)
485       return errorToErrorCode(ReadBytes.takeError());
486     if (*ReadBytes == 0) {
487       std::memset(ToRead.data(), 0, ToRead.size());
488       break;
489     }
490     ToRead = ToRead.drop_front(*ReadBytes);
491     Offset += *ReadBytes;
492   }
493 
494   return std::move(Buf);
495 }
496 
497 ErrorOr<std::unique_ptr<MemoryBuffer>>
498 MemoryBuffer::getOpenFile(sys::fs::file_t FD, const Twine &Filename, uint64_t FileSize,
499                           bool RequiresNullTerminator, bool IsVolatile) {
500   return getOpenFileImpl<MemoryBuffer>(FD, Filename, FileSize, FileSize, 0,
501                          RequiresNullTerminator, IsVolatile);
502 }
503 
504 ErrorOr<std::unique_ptr<MemoryBuffer>>
505 MemoryBuffer::getOpenFileSlice(sys::fs::file_t FD, const Twine &Filename, uint64_t MapSize,
506                                int64_t Offset, bool IsVolatile) {
507   assert(MapSize != uint64_t(-1));
508   return getOpenFileImpl<MemoryBuffer>(FD, Filename, -1, MapSize, Offset, false,
509                                        IsVolatile);
510 }
511 
512 ErrorOr<std::unique_ptr<MemoryBuffer>> MemoryBuffer::getSTDIN() {
513   // Read in all of the data from stdin, we cannot mmap stdin.
514   //
515   // FIXME: That isn't necessarily true, we should try to mmap stdin and
516   // fallback if it fails.
517   sys::ChangeStdinMode(sys::fs::OF_Text);
518 
519   return getMemoryBufferForStream(sys::fs::getStdinHandle(), "<stdin>");
520 }
521 
522 ErrorOr<std::unique_ptr<MemoryBuffer>>
523 MemoryBuffer::getFileAsStream(const Twine &Filename) {
524   Expected<sys::fs::file_t> FDOrErr =
525       sys::fs::openNativeFileForRead(Filename, sys::fs::OF_None);
526   if (!FDOrErr)
527     return errorToErrorCode(FDOrErr.takeError());
528   sys::fs::file_t FD = *FDOrErr;
529   ErrorOr<std::unique_ptr<MemoryBuffer>> Ret =
530       getMemoryBufferForStream(FD, Filename);
531   sys::fs::closeFile(FD);
532   return Ret;
533 }
534 
535 MemoryBufferRef MemoryBuffer::getMemBufferRef() const {
536   StringRef Data = getBuffer();
537   StringRef Identifier = getBufferIdentifier();
538   return MemoryBufferRef(Data, Identifier);
539 }
540 
541 SmallVectorMemoryBuffer::~SmallVectorMemoryBuffer() = default;
542