MemoryBuffer.cpp 19 KB


  1. //===--- MemoryBuffer.cpp - Memory Buffer implementation ------------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file implements the MemoryBuffer interface.
  10. //
  11. //===----------------------------------------------------------------------===//
  12. #include "llvm/Support/MemoryBuffer.h"
  13. #include "llvm/ADT/SmallString.h"
  14. #include "llvm/Config/config.h"
  15. #include "llvm/Support/AutoConvert.h"
  16. #include "llvm/Support/Error.h"
  17. #include "llvm/Support/ErrorHandling.h"
  18. #include "llvm/Support/Errc.h"
  19. #include "llvm/Support/FileSystem.h"
  20. #include "llvm/Support/MathExtras.h"
  21. #include "llvm/Support/Process.h"
  22. #include "llvm/Support/Program.h"
  23. #include "llvm/Support/SmallVectorMemoryBuffer.h"
  24. #include <cassert>
  25. #include <cstring>
  26. #include <new>
  27. #include <sys/types.h>
  28. #include <system_error>
  29. #if !defined(_MSC_VER) && !defined(__MINGW32__)
  30. #include <unistd.h>
  31. #else
  32. #include <io.h>
  33. #endif
  34. using namespace llvm;
  35. //===----------------------------------------------------------------------===//
  36. // MemoryBuffer implementation itself.
  37. //===----------------------------------------------------------------------===//
  38. MemoryBuffer::~MemoryBuffer() { }
  39. /// init - Initialize this MemoryBuffer as a reference to externally allocated
  40. /// memory, memory that we know is already null terminated.
  41. void MemoryBuffer::init(const char *BufStart, const char *BufEnd,
  42. bool RequiresNullTerminator) {
  43. assert((!RequiresNullTerminator || BufEnd[0] == 0) &&
  44. "Buffer is not null terminated!");
  45. BufferStart = BufStart;
  46. BufferEnd = BufEnd;
  47. }
  48. //===----------------------------------------------------------------------===//
  49. // MemoryBufferMem implementation.
  50. //===----------------------------------------------------------------------===//
  51. /// CopyStringRef - Copies contents of a StringRef into a block of memory and
  52. /// null-terminates it.
  53. static void CopyStringRef(char *Memory, StringRef Data) {
  54. if (!Data.empty())
  55. memcpy(Memory, Data.data(), Data.size());
  56. Memory[Data.size()] = 0; // Null terminate string.
  57. }
  58. namespace {
  59. struct NamedBufferAlloc {
  60. const Twine &Name;
  61. NamedBufferAlloc(const Twine &Name) : Name(Name) {}
  62. };
  63. } // namespace
  64. void *operator new(size_t N, const NamedBufferAlloc &Alloc) {
  65. SmallString<256> NameBuf;
  66. StringRef NameRef = Alloc.Name.toStringRef(NameBuf);
  67. char *Mem = static_cast<char *>(operator new(N + NameRef.size() + 1));
  68. CopyStringRef(Mem + N, NameRef);
  69. return Mem;
  70. }
  71. namespace {
  72. /// MemoryBufferMem - Named MemoryBuffer pointing to a block of memory.
  73. template<typename MB>
  74. class MemoryBufferMem : public MB {
  75. public:
  76. MemoryBufferMem(StringRef InputData, bool RequiresNullTerminator) {
  77. MemoryBuffer::init(InputData.begin(), InputData.end(),
  78. RequiresNullTerminator);
  79. }
  80. /// Disable sized deallocation for MemoryBufferMem, because it has
  81. /// tail-allocated data.
  82. void operator delete(void *p) { ::operator delete(p); }
  83. StringRef getBufferIdentifier() const override {
  84. // The name is stored after the class itself.
  85. return StringRef(reinterpret_cast<const char *>(this + 1));
  86. }
  87. MemoryBuffer::BufferKind getBufferKind() const override {
  88. return MemoryBuffer::MemoryBuffer_Malloc;
  89. }
  90. };
  91. } // namespace
  92. template <typename MB>
  93. static ErrorOr<std::unique_ptr<MB>>
  94. getFileAux(const Twine &Filename, uint64_t MapSize, uint64_t Offset,
  95. bool IsText, bool RequiresNullTerminator, bool IsVolatile);
  96. std::unique_ptr<MemoryBuffer>
  97. MemoryBuffer::getMemBuffer(StringRef InputData, StringRef BufferName,
  98. bool RequiresNullTerminator) {
  99. auto *Ret = new (NamedBufferAlloc(BufferName))
  100. MemoryBufferMem<MemoryBuffer>(InputData, RequiresNullTerminator);
  101. return std::unique_ptr<MemoryBuffer>(Ret);
  102. }
  103. std::unique_ptr<MemoryBuffer>
  104. MemoryBuffer::getMemBuffer(MemoryBufferRef Ref, bool RequiresNullTerminator) {
  105. return std::unique_ptr<MemoryBuffer>(getMemBuffer(
  106. Ref.getBuffer(), Ref.getBufferIdentifier(), RequiresNullTerminator));
  107. }
  108. static ErrorOr<std::unique_ptr<WritableMemoryBuffer>>
  109. getMemBufferCopyImpl(StringRef InputData, const Twine &BufferName) {
  110. auto Buf = WritableMemoryBuffer::getNewUninitMemBuffer(InputData.size(), BufferName);
  111. if (!Buf)
  112. return make_error_code(errc::not_enough_memory);
  113. memcpy(Buf->getBufferStart(), InputData.data(), InputData.size());
  114. return std::move(Buf);
  115. }
  116. std::unique_ptr<MemoryBuffer>
  117. MemoryBuffer::getMemBufferCopy(StringRef InputData, const Twine &BufferName) {
  118. auto Buf = getMemBufferCopyImpl(InputData, BufferName);
  119. if (Buf)
  120. return std::move(*Buf);
  121. return nullptr;
  122. }
  123. ErrorOr<std::unique_ptr<MemoryBuffer>>
  124. MemoryBuffer::getFileOrSTDIN(const Twine &Filename, bool IsText,
  125. bool RequiresNullTerminator) {
  126. SmallString<256> NameBuf;
  127. StringRef NameRef = Filename.toStringRef(NameBuf);
  128. if (NameRef == "-")
  129. return getSTDIN();
  130. return getFile(Filename, IsText, RequiresNullTerminator,
  131. /*IsVolatile=*/false);
  132. }
  133. ErrorOr<std::unique_ptr<MemoryBuffer>>
  134. MemoryBuffer::getFileSlice(const Twine &FilePath, uint64_t MapSize,
  135. uint64_t Offset, bool IsVolatile) {
  136. return getFileAux<MemoryBuffer>(FilePath, MapSize, Offset, /*IsText=*/false,
  137. /*RequiresNullTerminator=*/false, IsVolatile);
  138. }
  139. //===----------------------------------------------------------------------===//
  140. // MemoryBuffer::getFile implementation.
  141. //===----------------------------------------------------------------------===//
  142. namespace {
  143. template <typename MB>
  144. constexpr sys::fs::mapped_file_region::mapmode Mapmode =
  145. sys::fs::mapped_file_region::readonly;
  146. template <>
  147. constexpr sys::fs::mapped_file_region::mapmode Mapmode<MemoryBuffer> =
  148. sys::fs::mapped_file_region::readonly;
  149. template <>
  150. constexpr sys::fs::mapped_file_region::mapmode Mapmode<WritableMemoryBuffer> =
  151. sys::fs::mapped_file_region::priv;
  152. template <>
  153. constexpr sys::fs::mapped_file_region::mapmode
  154. Mapmode<WriteThroughMemoryBuffer> = sys::fs::mapped_file_region::readwrite;
  155. /// Memory maps a file descriptor using sys::fs::mapped_file_region.
  156. ///
  157. /// This handles converting the offset into a legal offset on the platform.
  158. template<typename MB>
  159. class MemoryBufferMMapFile : public MB {
  160. sys::fs::mapped_file_region MFR;
  161. static uint64_t getLegalMapOffset(uint64_t Offset) {
  162. return Offset & ~(sys::fs::mapped_file_region::alignment() - 1);
  163. }
  164. static uint64_t getLegalMapSize(uint64_t Len, uint64_t Offset) {
  165. return Len + (Offset - getLegalMapOffset(Offset));
  166. }
  167. const char *getStart(uint64_t Len, uint64_t Offset) {
  168. return MFR.const_data() + (Offset - getLegalMapOffset(Offset));
  169. }
  170. public:
  171. MemoryBufferMMapFile(bool RequiresNullTerminator, sys::fs::file_t FD, uint64_t Len,
  172. uint64_t Offset, std::error_code &EC)
  173. : MFR(FD, Mapmode<MB>, getLegalMapSize(Len, Offset),
  174. getLegalMapOffset(Offset), EC) {
  175. if (!EC) {
  176. const char *Start = getStart(Len, Offset);
  177. MemoryBuffer::init(Start, Start + Len, RequiresNullTerminator);
  178. }
  179. }
  180. /// Disable sized deallocation for MemoryBufferMMapFile, because it has
  181. /// tail-allocated data.
  182. void operator delete(void *p) { ::operator delete(p); }
  183. StringRef getBufferIdentifier() const override {
  184. // The name is stored after the class itself.
  185. return StringRef(reinterpret_cast<const char *>(this + 1));
  186. }
  187. MemoryBuffer::BufferKind getBufferKind() const override {
  188. return MemoryBuffer::MemoryBuffer_MMap;
  189. }
  190. void dontNeedIfMmap() override { MFR.dontNeed(); }
  191. };
  192. } // namespace
  193. static ErrorOr<std::unique_ptr<WritableMemoryBuffer>>
  194. getMemoryBufferForStream(sys::fs::file_t FD, const Twine &BufferName) {
  195. SmallString<sys::fs::DefaultReadChunkSize> Buffer;
  196. if (Error E = sys::fs::readNativeFileToEOF(FD, Buffer))
  197. return errorToErrorCode(std::move(E));
  198. return getMemBufferCopyImpl(Buffer, BufferName);
  199. }
  200. ErrorOr<std::unique_ptr<MemoryBuffer>>
  201. MemoryBuffer::getFile(const Twine &Filename, bool IsText,
  202. bool RequiresNullTerminator, bool IsVolatile) {
  203. return getFileAux<MemoryBuffer>(Filename, /*MapSize=*/-1, /*Offset=*/0,
  204. IsText, RequiresNullTerminator, IsVolatile);
  205. }
  206. template <typename MB>
  207. static ErrorOr<std::unique_ptr<MB>>
  208. getOpenFileImpl(sys::fs::file_t FD, const Twine &Filename, uint64_t FileSize,
  209. uint64_t MapSize, int64_t Offset, bool RequiresNullTerminator,
  210. bool IsVolatile);
  211. template <typename MB>
  212. static ErrorOr<std::unique_ptr<MB>>
  213. getFileAux(const Twine &Filename, uint64_t MapSize, uint64_t Offset,
  214. bool IsText, bool RequiresNullTerminator, bool IsVolatile) {
  215. Expected<sys::fs::file_t> FDOrErr = sys::fs::openNativeFileForRead(
  216. Filename, IsText ? sys::fs::OF_TextWithCRLF : sys::fs::OF_None);
  217. if (!FDOrErr)
  218. return errorToErrorCode(FDOrErr.takeError());
  219. sys::fs::file_t FD = *FDOrErr;
  220. auto Ret = getOpenFileImpl<MB>(FD, Filename, /*FileSize=*/-1, MapSize, Offset,
  221. RequiresNullTerminator, IsVolatile);
  222. sys::fs::closeFile(FD);
  223. return Ret;
  224. }
  225. ErrorOr<std::unique_ptr<WritableMemoryBuffer>>
  226. WritableMemoryBuffer::getFile(const Twine &Filename, bool IsVolatile) {
  227. return getFileAux<WritableMemoryBuffer>(
  228. Filename, /*MapSize=*/-1, /*Offset=*/0, /*IsText=*/false,
  229. /*RequiresNullTerminator=*/false, IsVolatile);
  230. }
  231. ErrorOr<std::unique_ptr<WritableMemoryBuffer>>
  232. WritableMemoryBuffer::getFileSlice(const Twine &Filename, uint64_t MapSize,
  233. uint64_t Offset, bool IsVolatile) {
  234. return getFileAux<WritableMemoryBuffer>(
  235. Filename, MapSize, Offset, /*IsText=*/false,
  236. /*RequiresNullTerminator=*/false, IsVolatile);
  237. }
  238. std::unique_ptr<WritableMemoryBuffer>
  239. WritableMemoryBuffer::getNewUninitMemBuffer(size_t Size, const Twine &BufferName) {
  240. using MemBuffer = MemoryBufferMem<WritableMemoryBuffer>;
  241. // Allocate space for the MemoryBuffer, the data and the name. It is important
  242. // that MemoryBuffer and data are aligned so PointerIntPair works with them.
  243. // TODO: Is 16-byte alignment enough? We copy small object files with large
  244. // alignment expectations into this buffer.
  245. SmallString<256> NameBuf;
  246. StringRef NameRef = BufferName.toStringRef(NameBuf);
  247. size_t AlignedStringLen = alignTo(sizeof(MemBuffer) + NameRef.size() + 1, 16);
  248. size_t RealLen = AlignedStringLen + Size + 1;
  249. char *Mem = static_cast<char*>(operator new(RealLen, std::nothrow));
  250. if (!Mem)
  251. return nullptr;
  252. // The name is stored after the class itself.
  253. CopyStringRef(Mem + sizeof(MemBuffer), NameRef);
  254. // The buffer begins after the name and must be aligned.
  255. char *Buf = Mem + AlignedStringLen;
  256. Buf[Size] = 0; // Null terminate buffer.
  257. auto *Ret = new (Mem) MemBuffer(StringRef(Buf, Size), true);
  258. return std::unique_ptr<WritableMemoryBuffer>(Ret);
  259. }
  260. std::unique_ptr<WritableMemoryBuffer>
  261. WritableMemoryBuffer::getNewMemBuffer(size_t Size, const Twine &BufferName) {
  262. auto SB = WritableMemoryBuffer::getNewUninitMemBuffer(Size, BufferName);
  263. if (!SB)
  264. return nullptr;
  265. memset(SB->getBufferStart(), 0, Size);
  266. return SB;
  267. }
  268. static bool shouldUseMmap(sys::fs::file_t FD,
  269. size_t FileSize,
  270. size_t MapSize,
  271. off_t Offset,
  272. bool RequiresNullTerminator,
  273. int PageSize,
  274. bool IsVolatile) {
  275. // mmap may leave the buffer without null terminator if the file size changed
  276. // by the time the last page is mapped in, so avoid it if the file size is
  277. // likely to change.
  278. if (IsVolatile && RequiresNullTerminator)
  279. return false;
  280. // We don't use mmap for small files because this can severely fragment our
  281. // address space.
  282. if (MapSize < 4 * 4096 || MapSize < (unsigned)PageSize)
  283. return false;
  284. if (!RequiresNullTerminator)
  285. return true;
  286. // If we don't know the file size, use fstat to find out. fstat on an open
  287. // file descriptor is cheaper than stat on a random path.
  288. // FIXME: this chunk of code is duplicated, but it avoids a fstat when
  289. // RequiresNullTerminator = false and MapSize != -1.
  290. if (FileSize == size_t(-1)) {
  291. sys::fs::file_status Status;
  292. if (sys::fs::status(FD, Status))
  293. return false;
  294. FileSize = Status.getSize();
  295. }
  296. // If we need a null terminator and the end of the map is inside the file,
  297. // we cannot use mmap.
  298. size_t End = Offset + MapSize;
  299. assert(End <= FileSize);
  300. if (End != FileSize)
  301. return false;
  302. // Don't try to map files that are exactly a multiple of the system page size
  303. // if we need a null terminator.
  304. if ((FileSize & (PageSize -1)) == 0)
  305. return false;
  306. #if defined(__CYGWIN__)
  307. // Don't try to map files that are exactly a multiple of the physical page size
  308. // if we need a null terminator.
  309. // FIXME: We should reorganize again getPageSize() on Win32.
  310. if ((FileSize & (4096 - 1)) == 0)
  311. return false;
  312. #endif
  313. return true;
  314. }
  315. static ErrorOr<std::unique_ptr<WriteThroughMemoryBuffer>>
  316. getReadWriteFile(const Twine &Filename, uint64_t FileSize, uint64_t MapSize,
  317. uint64_t Offset) {
  318. Expected<sys::fs::file_t> FDOrErr = sys::fs::openNativeFileForReadWrite(
  319. Filename, sys::fs::CD_OpenExisting, sys::fs::OF_None);
  320. if (!FDOrErr)
  321. return errorToErrorCode(FDOrErr.takeError());
  322. sys::fs::file_t FD = *FDOrErr;
  323. // Default is to map the full file.
  324. if (MapSize == uint64_t(-1)) {
  325. // If we don't know the file size, use fstat to find out. fstat on an open
  326. // file descriptor is cheaper than stat on a random path.
  327. if (FileSize == uint64_t(-1)) {
  328. sys::fs::file_status Status;
  329. std::error_code EC = sys::fs::status(FD, Status);
  330. if (EC)
  331. return EC;
  332. // If this not a file or a block device (e.g. it's a named pipe
  333. // or character device), we can't mmap it, so error out.
  334. sys::fs::file_type Type = Status.type();
  335. if (Type != sys::fs::file_type::regular_file &&
  336. Type != sys::fs::file_type::block_file)
  337. return make_error_code(errc::invalid_argument);
  338. FileSize = Status.getSize();
  339. }
  340. MapSize = FileSize;
  341. }
  342. std::error_code EC;
  343. std::unique_ptr<WriteThroughMemoryBuffer> Result(
  344. new (NamedBufferAlloc(Filename))
  345. MemoryBufferMMapFile<WriteThroughMemoryBuffer>(false, FD, MapSize,
  346. Offset, EC));
  347. if (EC)
  348. return EC;
  349. return std::move(Result);
  350. }
  351. ErrorOr<std::unique_ptr<WriteThroughMemoryBuffer>>
  352. WriteThroughMemoryBuffer::getFile(const Twine &Filename, int64_t FileSize) {
  353. return getReadWriteFile(Filename, FileSize, FileSize, 0);
  354. }
  355. /// Map a subrange of the specified file as a WritableMemoryBuffer.
  356. ErrorOr<std::unique_ptr<WriteThroughMemoryBuffer>>
  357. WriteThroughMemoryBuffer::getFileSlice(const Twine &Filename, uint64_t MapSize,
  358. uint64_t Offset) {
  359. return getReadWriteFile(Filename, -1, MapSize, Offset);
  360. }
  361. template <typename MB>
  362. static ErrorOr<std::unique_ptr<MB>>
  363. getOpenFileImpl(sys::fs::file_t FD, const Twine &Filename, uint64_t FileSize,
  364. uint64_t MapSize, int64_t Offset, bool RequiresNullTerminator,
  365. bool IsVolatile) {
  366. static int PageSize = sys::Process::getPageSizeEstimate();
  367. // Default is to map the full file.
  368. if (MapSize == uint64_t(-1)) {
  369. // If we don't know the file size, use fstat to find out. fstat on an open
  370. // file descriptor is cheaper than stat on a random path.
  371. if (FileSize == uint64_t(-1)) {
  372. sys::fs::file_status Status;
  373. std::error_code EC = sys::fs::status(FD, Status);
  374. if (EC)
  375. return EC;
  376. // If this not a file or a block device (e.g. it's a named pipe
  377. // or character device), we can't trust the size. Create the memory
  378. // buffer by copying off the stream.
  379. sys::fs::file_type Type = Status.type();
  380. if (Type != sys::fs::file_type::regular_file &&
  381. Type != sys::fs::file_type::block_file)
  382. return getMemoryBufferForStream(FD, Filename);
  383. FileSize = Status.getSize();
  384. }
  385. MapSize = FileSize;
  386. }
  387. if (shouldUseMmap(FD, FileSize, MapSize, Offset, RequiresNullTerminator,
  388. PageSize, IsVolatile)) {
  389. std::error_code EC;
  390. std::unique_ptr<MB> Result(
  391. new (NamedBufferAlloc(Filename)) MemoryBufferMMapFile<MB>(
  392. RequiresNullTerminator, FD, MapSize, Offset, EC));
  393. if (!EC)
  394. return std::move(Result);
  395. }
  396. #ifdef __MVS__
  397. // Set codepage auto-conversion for z/OS.
  398. if (auto EC = llvm::enableAutoConversion(FD))
  399. return EC;
  400. #endif
  401. auto Buf = WritableMemoryBuffer::getNewUninitMemBuffer(MapSize, Filename);
  402. if (!Buf) {
  403. // Failed to create a buffer. The only way it can fail is if
  404. // new(std::nothrow) returns 0.
  405. return make_error_code(errc::not_enough_memory);
  406. }
  407. // Read until EOF, zero-initialize the rest.
  408. MutableArrayRef<char> ToRead = Buf->getBuffer();
  409. while (!ToRead.empty()) {
  410. Expected<size_t> ReadBytes =
  411. sys::fs::readNativeFileSlice(FD, ToRead, Offset);
  412. if (!ReadBytes)
  413. return errorToErrorCode(ReadBytes.takeError());
  414. if (*ReadBytes == 0) {
  415. std::memset(ToRead.data(), 0, ToRead.size());
  416. break;
  417. }
  418. ToRead = ToRead.drop_front(*ReadBytes);
  419. Offset += *ReadBytes;
  420. }
  421. return std::move(Buf);
  422. }
  423. ErrorOr<std::unique_ptr<MemoryBuffer>>
  424. MemoryBuffer::getOpenFile(sys::fs::file_t FD, const Twine &Filename, uint64_t FileSize,
  425. bool RequiresNullTerminator, bool IsVolatile) {
  426. return getOpenFileImpl<MemoryBuffer>(FD, Filename, FileSize, FileSize, 0,
  427. RequiresNullTerminator, IsVolatile);
  428. }
  429. ErrorOr<std::unique_ptr<MemoryBuffer>>
  430. MemoryBuffer::getOpenFileSlice(sys::fs::file_t FD, const Twine &Filename, uint64_t MapSize,
  431. int64_t Offset, bool IsVolatile) {
  432. assert(MapSize != uint64_t(-1));
  433. return getOpenFileImpl<MemoryBuffer>(FD, Filename, -1, MapSize, Offset, false,
  434. IsVolatile);
  435. }
  436. ErrorOr<std::unique_ptr<MemoryBuffer>> MemoryBuffer::getSTDIN() {
  437. // Read in all of the data from stdin, we cannot mmap stdin.
  438. //
  439. // FIXME: That isn't necessarily true, we should try to mmap stdin and
  440. // fallback if it fails.
  441. sys::ChangeStdinMode(sys::fs::OF_Text);
  442. return getMemoryBufferForStream(sys::fs::getStdinHandle(), "<stdin>");
  443. }
  444. ErrorOr<std::unique_ptr<MemoryBuffer>>
  445. MemoryBuffer::getFileAsStream(const Twine &Filename) {
  446. Expected<sys::fs::file_t> FDOrErr =
  447. sys::fs::openNativeFileForRead(Filename, sys::fs::OF_None);
  448. if (!FDOrErr)
  449. return errorToErrorCode(FDOrErr.takeError());
  450. sys::fs::file_t FD = *FDOrErr;
  451. ErrorOr<std::unique_ptr<MemoryBuffer>> Ret =
  452. getMemoryBufferForStream(FD, Filename);
  453. sys::fs::closeFile(FD);
  454. return Ret;
  455. }
  456. MemoryBufferRef MemoryBuffer::getMemBufferRef() const {
  457. StringRef Data = getBuffer();
  458. StringRef Identifier = getBufferIdentifier();
  459. return MemoryBufferRef(Data, Identifier);
  460. }
  461. SmallVectorMemoryBuffer::~SmallVectorMemoryBuffer() {}