file.cpp 31 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302
  1. #include "file.h"
  2. #include "flock.h"
  3. #include "fstat.h"
  4. #include "sysstat.h"
  5. #include "align.h"
  6. #include "info.h"
  7. #include <array>
  8. #include <util/string/util.h>
  9. #include <util/string/cast.h>
  10. #include <util/string/builder.h>
  11. #include <util/stream/hex.h>
  12. #include <util/stream/format.h>
  13. #include <util/random/random.h>
  14. #include <util/generic/size_literals.h>
  15. #include <util/generic/string.h>
  16. #include <util/generic/ylimits.h>
  17. #include <util/generic/yexception.h>
  18. #include <util/datetime/base.h>
  19. #include <errno.h>
  20. #if defined(_unix_)
  21. #include <fcntl.h>
  22. #if defined(_linux_) && (!defined(_android_) || __ANDROID_API__ >= 21) && !defined(FALLOC_FL_KEEP_SIZE)
  23. #include <linux/falloc.h>
  24. #endif
  25. #include <stdlib.h>
  26. #include <unistd.h>
  27. #include <sys/mman.h>
  28. #elif defined(_win_)
  29. #include "winint.h"
  30. #include "fs_win.h"
  31. #include <io.h>
  32. #endif
  33. #if defined(_bionic_)
  34. #include <sys/sendfile.h>
  35. #define HAVE_POSIX_FADVISE 0
  36. #define HAVE_SYNC_FILE_RANGE 0
  37. #elif defined(_linux_)
  38. #include <sys/sendfile.h>
  39. #define HAVE_POSIX_FADVISE 1
  40. #define HAVE_SYNC_FILE_RANGE 1
  41. #elif defined(__FreeBSD__) && !defined(WITH_VALGRIND)
  42. #include <sys/param.h>
  43. #define HAVE_POSIX_FADVISE (__FreeBSD_version >= 900501)
  44. #define HAVE_SYNC_FILE_RANGE 0
  45. #else
  46. #define HAVE_POSIX_FADVISE 0
  47. #define HAVE_SYNC_FILE_RANGE 0
  48. #endif
  49. static bool IsStupidFlagCombination(EOpenMode oMode) {
  50. // ForAppend will actually not be applied in the following combinations:
  51. return (oMode & (CreateAlways | ForAppend)) == (CreateAlways | ForAppend) || (oMode & (TruncExisting | ForAppend)) == (TruncExisting | ForAppend) || (oMode & (CreateNew | ForAppend)) == (CreateNew | ForAppend);
  52. }
  53. TFileHandle::TFileHandle(const TString& fName, EOpenMode oMode) noexcept {
  54. ui32 fcMode = 0;
  55. EOpenMode createMode = oMode & MaskCreation;
  56. Y_VERIFY(!IsStupidFlagCombination(oMode), "oMode %d makes no sense", static_cast<int>(oMode));
  57. if (!(oMode & MaskRW)) {
  58. oMode |= RdWr;
  59. }
  60. if (!(oMode & AMask)) {
  61. oMode |= ARW;
  62. }
  63. #ifdef _win_
  64. switch (createMode) {
  65. case OpenExisting:
  66. fcMode = OPEN_EXISTING;
  67. break;
  68. case TruncExisting:
  69. fcMode = TRUNCATE_EXISTING;
  70. break;
  71. case OpenAlways:
  72. fcMode = OPEN_ALWAYS;
  73. break;
  74. case CreateNew:
  75. fcMode = CREATE_NEW;
  76. break;
  77. case CreateAlways:
  78. fcMode = CREATE_ALWAYS;
  79. break;
  80. default:
  81. abort();
  82. break;
  83. }
  84. ui32 faMode = 0;
  85. if (oMode & RdOnly) {
  86. faMode |= GENERIC_READ;
  87. }
  88. if (oMode & WrOnly) {
  89. // WrOnly or RdWr
  90. faMode |= GENERIC_WRITE;
  91. }
  92. if (oMode & ::ForAppend) {
  93. faMode |= GENERIC_WRITE;
  94. faMode |= FILE_APPEND_DATA;
  95. faMode &= ~FILE_WRITE_DATA;
  96. }
  97. bool inheritHandle = !(oMode & CloseOnExec);
  98. ui32 shMode = FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE;
  99. ui32 attrMode = FILE_ATTRIBUTE_NORMAL;
  100. if ((createMode == OpenExisting || createMode == OpenAlways) && ((oMode & AMask) == (oMode & AR))) {
  101. attrMode |= FILE_ATTRIBUTE_READONLY;
  102. }
  103. if (oMode & Seq) {
  104. attrMode |= FILE_FLAG_SEQUENTIAL_SCAN;
  105. }
  106. if (oMode & Temp) {
  107. // we use TTempFile instead of FILE_FLAG_DELETE_ON_CLOSE
  108. attrMode |= FILE_ATTRIBUTE_TEMPORARY;
  109. }
  110. if (oMode & Transient) {
  111. attrMode |= FILE_FLAG_DELETE_ON_CLOSE;
  112. }
  113. if ((oMode & (Direct | DirectAligned)) && (oMode & WrOnly)) {
  114. // WrOnly or RdWr
  115. attrMode |= /*FILE_FLAG_NO_BUFFERING |*/ FILE_FLAG_WRITE_THROUGH;
  116. }
  117. Fd_ = NFsPrivate::CreateFileWithUtf8Name(fName, faMode, shMode, fcMode, attrMode, inheritHandle);
  118. if ((oMode & ::ForAppend) && (Fd_ != INVALID_FHANDLE)) {
  119. ::SetFilePointer(Fd_, 0, 0, FILE_END);
  120. }
  121. #elif defined(_unix_)
  122. switch (createMode) {
  123. case OpenExisting:
  124. fcMode = 0;
  125. break;
  126. case TruncExisting:
  127. fcMode = O_TRUNC;
  128. break;
  129. case OpenAlways:
  130. fcMode = O_CREAT;
  131. break;
  132. case CreateNew:
  133. fcMode = O_CREAT | O_EXCL;
  134. break;
  135. case CreateAlways:
  136. fcMode = O_CREAT | O_TRUNC;
  137. break;
  138. default:
  139. abort();
  140. break;
  141. }
  142. if ((oMode & RdOnly) && (oMode & WrOnly)) {
  143. fcMode |= O_RDWR;
  144. } else if (oMode & RdOnly) {
  145. fcMode |= O_RDONLY;
  146. } else if (oMode & WrOnly) {
  147. fcMode |= O_WRONLY;
  148. }
  149. if (oMode & ::ForAppend) {
  150. fcMode |= O_APPEND;
  151. }
  152. if (oMode & CloseOnExec) {
  153. fcMode |= O_CLOEXEC;
  154. }
  155. /* I don't now about this for unix...
  156. if (oMode & Temp) {
  157. }
  158. */
  159. #if defined(_freebsd_)
  160. if (oMode & (Direct | DirectAligned)) {
  161. fcMode |= O_DIRECT;
  162. }
  163. if (oMode & Sync) {
  164. fcMode |= O_SYNC;
  165. }
  166. #elif defined(_linux_)
  167. if (oMode & DirectAligned) {
  168. /*
  169. * O_DIRECT in Linux requires aligning request size and buffer address
  170. * to size of hardware sector (see hw_sector_size or ioctl BLKSSZGET).
  171. * Usually 512 bytes, but modern hardware works better with 4096 bytes.
  172. */
  173. fcMode |= O_DIRECT;
  174. }
  175. if (oMode & Sync) {
  176. fcMode |= O_SYNC;
  177. }
  178. #endif
  179. #if defined(_linux_)
  180. fcMode |= O_LARGEFILE;
  181. #endif
  182. ui32 permMode = 0;
  183. if (oMode & AXOther) {
  184. permMode |= S_IXOTH;
  185. }
  186. if (oMode & AWOther) {
  187. permMode |= S_IWOTH;
  188. }
  189. if (oMode & AROther) {
  190. permMode |= S_IROTH;
  191. }
  192. if (oMode & AXGroup) {
  193. permMode |= S_IXGRP;
  194. }
  195. if (oMode & AWGroup) {
  196. permMode |= S_IWGRP;
  197. }
  198. if (oMode & ARGroup) {
  199. permMode |= S_IRGRP;
  200. }
  201. if (oMode & AXUser) {
  202. permMode |= S_IXUSR;
  203. }
  204. if (oMode & AWUser) {
  205. permMode |= S_IWUSR;
  206. }
  207. if (oMode & ARUser) {
  208. permMode |= S_IRUSR;
  209. }
  210. do {
  211. Fd_ = ::open(fName.data(), fcMode, permMode);
  212. } while (Fd_ == -1 && errno == EINTR);
  213. #if HAVE_POSIX_FADVISE
  214. if (Fd_ >= 0) {
  215. if (oMode & NoReuse) {
  216. ::posix_fadvise(Fd_, 0, 0, POSIX_FADV_NOREUSE);
  217. }
  218. if (oMode & Seq) {
  219. ::posix_fadvise(Fd_, 0, 0, POSIX_FADV_SEQUENTIAL);
  220. }
  221. if (oMode & NoReadAhead) {
  222. ::posix_fadvise(Fd_, 0, 0, POSIX_FADV_RANDOM);
  223. }
  224. }
  225. #endif
  226. //temp file
  227. if (Fd_ >= 0 && (oMode & Transient)) {
  228. unlink(fName.data());
  229. }
  230. #else
  231. #error unsupported platform
  232. #endif
  233. }
  234. bool TFileHandle::Close() noexcept {
  235. bool isOk = true;
  236. #ifdef _win_
  237. if (Fd_ != INVALID_FHANDLE) {
  238. isOk = (::CloseHandle(Fd_) != 0);
  239. }
  240. if (!isOk) {
  241. Y_VERIFY(GetLastError() != ERROR_INVALID_HANDLE,
  242. "must not quietly close invalid handle");
  243. }
  244. #elif defined(_unix_)
  245. if (Fd_ != INVALID_FHANDLE) {
  246. isOk = (::close(Fd_) == 0 || errno == EINTR);
  247. }
  248. if (!isOk) {
  249. // Do not quietly close bad descriptor,
  250. // because often it means double close
  251. // that is disasterous
  252. Y_VERIFY(errno != EBADF, "must not quietly close bad descriptor: fd=%d", int(Fd_));
  253. }
  254. #else
  255. #error unsupported platform
  256. #endif
  257. Fd_ = INVALID_FHANDLE;
  258. return isOk;
  259. }
  260. static inline i64 DoSeek(FHANDLE h, i64 offset, SeekDir origin) noexcept {
  261. if (h == INVALID_FHANDLE) {
  262. return -1L;
  263. }
  264. #if defined(_win_)
  265. static ui32 dir[] = {FILE_BEGIN, FILE_CURRENT, FILE_END};
  266. LARGE_INTEGER pos;
  267. pos.QuadPart = offset;
  268. pos.LowPart = ::SetFilePointer(h, pos.LowPart, &pos.HighPart, dir[origin]);
  269. if (pos.LowPart == INVALID_SET_FILE_POINTER && GetLastError() != NO_ERROR) {
  270. pos.QuadPart = -1;
  271. }
  272. return pos.QuadPart;
  273. #elif defined(_unix_)
  274. static int dir[] = {SEEK_SET, SEEK_CUR, SEEK_END};
  275. #if defined(_sun_)
  276. return ::llseek(h, (offset_t)offset, dir[origin]);
  277. #else
  278. return ::lseek(h, (off_t)offset, dir[origin]);
  279. #endif
  280. #else
  281. #error unsupported platform
  282. #endif
  283. }
  284. i64 TFileHandle::GetPosition() const noexcept {
  285. return DoSeek(Fd_, 0, sCur);
  286. }
  287. i64 TFileHandle::Seek(i64 offset, SeekDir origin) noexcept {
  288. return DoSeek(Fd_, offset, origin);
  289. }
  290. i64 TFileHandle::GetLength() const noexcept {
  291. // XXX: returns error code, but does not set errno
  292. if (!IsOpen()) {
  293. return -1L;
  294. }
  295. return GetFileLength(Fd_);
  296. }
  297. bool TFileHandle::Resize(i64 length) noexcept {
  298. if (!IsOpen()) {
  299. return false;
  300. }
  301. i64 currentLength = GetLength();
  302. if (length == currentLength) {
  303. return true;
  304. }
  305. #if defined(_win_)
  306. i64 currentPosition = GetPosition();
  307. if (currentPosition == -1L) {
  308. return false;
  309. }
  310. Seek(length, sSet);
  311. if (!::SetEndOfFile(Fd_)) {
  312. return false;
  313. }
  314. if (currentPosition < length) {
  315. Seek(currentPosition, sSet);
  316. }
  317. return true;
  318. #elif defined(_unix_)
  319. return (0 == ftruncate(Fd_, (off_t)length));
  320. #else
  321. #error unsupported platform
  322. #endif
  323. }
  324. bool TFileHandle::Reserve(i64 length) noexcept {
  325. // FIXME this should reserve disk space with fallocate
  326. if (!IsOpen()) {
  327. return false;
  328. }
  329. i64 currentLength = GetLength();
  330. if (length <= currentLength) {
  331. return true;
  332. }
  333. if (!Resize(length)) {
  334. return false;
  335. }
  336. #if defined(_win_)
  337. if (!::SetFileValidData(Fd_, length)) {
  338. Resize(currentLength);
  339. return false;
  340. }
  341. #elif defined(_unix_)
  342. // No way to implement this under FreeBSD. Just do nothing
  343. #else
  344. #error unsupported platform
  345. #endif
  346. return true;
  347. }
  348. bool TFileHandle::FallocateNoResize(i64 length) noexcept {
  349. if (!IsOpen()) {
  350. return false;
  351. }
  352. #if defined(_linux_) && (!defined(_android_) || __ANDROID_API__ >= 21)
  353. return !fallocate(Fd_, FALLOC_FL_KEEP_SIZE, 0, length);
  354. #else
  355. Y_UNUSED(length);
  356. return true;
  357. #endif
  358. }
  359. // Pair for FallocateNoResize
  360. bool TFileHandle::ShrinkToFit() noexcept {
  361. if (!IsOpen()) {
  362. return false;
  363. }
  364. #if defined(_linux_) && (!defined(_android_) || __ANDROID_API__ >= 21)
  365. return !ftruncate(Fd_, (off_t)GetLength());
  366. #else
  367. return true;
  368. #endif
  369. }
  370. bool TFileHandle::Flush() noexcept {
  371. if (!IsOpen()) {
  372. return false;
  373. }
  374. #if defined(_win_)
  375. bool ok = ::FlushFileBuffers(Fd_) != 0;
  376. /*
  377. * FlushFileBuffers fails if hFile is a handle to the console output.
  378. * That is because the console output is not buffered.
  379. * The function returns FALSE, and GetLastError returns ERROR_INVALID_HANDLE.
  380. */
  381. return ok || GetLastError() == ERROR_INVALID_HANDLE;
  382. #elif defined(_unix_)
  383. int ret = ::fsync(Fd_);
  384. /*
  385. * Ignore EROFS, EINVAL - fd is bound to a special file
  386. * (PIPE, FIFO, or socket) which does not support synchronization.
  387. * Fail in case of EIO, ENOSPC, EDQUOT - data might be lost.
  388. */
  389. return ret == 0 || errno == EROFS || errno == EINVAL
  390. #if defined(_darwin_)
  391. // ENOTSUP fd does not refer to a vnode
  392. || errno == ENOTSUP
  393. #endif
  394. ;
  395. #else
  396. #error unsupported platform
  397. #endif
  398. }
  399. bool TFileHandle::FlushData() noexcept {
  400. #if defined(_linux_)
  401. if (!IsOpen()) {
  402. return false;
  403. }
  404. int ret = ::fdatasync(Fd_);
  405. // Same loginc in error handling as for fsync above.
  406. return ret == 0 || errno == EROFS || errno == EINVAL;
  407. #else
  408. return Flush();
  409. #endif
  410. }
  411. i32 TFileHandle::Read(void* buffer, ui32 byteCount) noexcept {
  412. // FIXME size and return must be 64-bit
  413. if (!IsOpen()) {
  414. return -1;
  415. }
  416. #if defined(_win_)
  417. DWORD bytesRead = 0;
  418. if (::ReadFile(Fd_, buffer, byteCount, &bytesRead, nullptr)) {
  419. return bytesRead;
  420. }
  421. return -1;
  422. #elif defined(_unix_)
  423. i32 ret;
  424. do {
  425. ret = ::read(Fd_, buffer, byteCount);
  426. } while (ret == -1 && errno == EINTR);
  427. return ret;
  428. #else
  429. #error unsupported platform
  430. #endif
  431. }
  432. i32 TFileHandle::Write(const void* buffer, ui32 byteCount) noexcept {
  433. if (!IsOpen()) {
  434. return -1;
  435. }
  436. #if defined(_win_)
  437. DWORD bytesWritten = 0;
  438. if (::WriteFile(Fd_, buffer, byteCount, &bytesWritten, nullptr)) {
  439. return bytesWritten;
  440. }
  441. return -1;
  442. #elif defined(_unix_)
  443. i32 ret;
  444. do {
  445. ret = ::write(Fd_, buffer, byteCount);
  446. } while (ret == -1 && errno == EINTR);
  447. return ret;
  448. #else
  449. #error unsupported platform
  450. #endif
  451. }
  452. i32 TFileHandle::Pread(void* buffer, ui32 byteCount, i64 offset) const noexcept {
  453. #if defined(_win_)
  454. OVERLAPPED io;
  455. Zero(io);
  456. DWORD bytesRead = 0;
  457. io.Offset = (ui32)offset;
  458. io.OffsetHigh = (ui32)(offset >> 32);
  459. if (::ReadFile(Fd_, buffer, byteCount, &bytesRead, &io)) {
  460. return bytesRead;
  461. }
  462. if (::GetLastError() == ERROR_HANDLE_EOF) {
  463. return 0;
  464. }
  465. return -1;
  466. #elif defined(_unix_)
  467. i32 ret;
  468. do {
  469. ret = ::pread(Fd_, buffer, byteCount, offset);
  470. } while (ret == -1 && errno == EINTR);
  471. return ret;
  472. #else
  473. #error unsupported platform
  474. #endif
  475. }
  476. i32 TFileHandle::Pwrite(const void* buffer, ui32 byteCount, i64 offset) const noexcept {
  477. #if defined(_win_)
  478. OVERLAPPED io;
  479. Zero(io);
  480. DWORD bytesWritten = 0;
  481. io.Offset = (ui32)offset;
  482. io.OffsetHigh = (ui32)(offset >> 32);
  483. if (::WriteFile(Fd_, buffer, byteCount, &bytesWritten, &io)) {
  484. return bytesWritten;
  485. }
  486. return -1;
  487. #elif defined(_unix_)
  488. i32 ret;
  489. do {
  490. ret = ::pwrite(Fd_, buffer, byteCount, offset);
  491. } while (ret == -1 && errno == EINTR);
  492. return ret;
  493. #else
  494. #error unsupported platform
  495. #endif
  496. }
  497. FHANDLE TFileHandle::Duplicate() const noexcept {
  498. if (!IsOpen()) {
  499. return INVALID_FHANDLE;
  500. }
  501. #if defined(_win_)
  502. FHANDLE dupHandle;
  503. if (!::DuplicateHandle(GetCurrentProcess(), Fd_, GetCurrentProcess(), &dupHandle, 0, TRUE, DUPLICATE_SAME_ACCESS)) {
  504. return INVALID_FHANDLE;
  505. }
  506. return dupHandle;
  507. #elif defined(_unix_)
  508. return ::dup(Fd_);
  509. #else
  510. #error unsupported platform
  511. #endif
  512. }
  513. int TFileHandle::Duplicate2Posix(int dstHandle) const noexcept {
  514. if (!IsOpen()) {
  515. return -1;
  516. }
  517. #if defined(_win_)
  518. FHANDLE dupHandle = Duplicate();
  519. if (dupHandle == INVALID_FHANDLE) {
  520. _set_errno(EMFILE);
  521. return -1;
  522. }
  523. int posixHandle = _open_osfhandle((intptr_t)dupHandle, 0);
  524. if (posixHandle == -1) {
  525. CloseHandle(dupHandle);
  526. return -1;
  527. }
  528. if (dup2(posixHandle, dstHandle) == -1) {
  529. dstHandle = -1;
  530. }
  531. _close(posixHandle);
  532. return dstHandle;
  533. #elif defined(_unix_)
  534. while (dup2(Fd_, dstHandle) == -1) {
  535. if (errno != EINTR) {
  536. return -1;
  537. }
  538. }
  539. return dstHandle;
  540. #else
  541. #error unsupported platform
  542. #endif
  543. }
  544. bool TFileHandle::LinkTo(const TFileHandle& fh) const noexcept {
  545. #if defined(_unix_)
  546. while (dup2(fh.Fd_, Fd_) == -1) {
  547. if (errno != EINTR) {
  548. return false;
  549. }
  550. }
  551. return true;
  552. #elif defined(_win_)
  553. TFileHandle nh(fh.Duplicate());
  554. if (!nh.IsOpen()) {
  555. return false;
  556. }
  557. //not thread-safe
  558. nh.Swap(*const_cast<TFileHandle*>(this));
  559. return true;
  560. #else
  561. #error unsupported
  562. #endif
  563. }
  564. int TFileHandle::Flock(int op) noexcept {
  565. return ::Flock(Fd_, op);
  566. }
  567. bool TFileHandle::SetDirect() {
  568. #ifdef _linux_
  569. const long flags = fcntl(Fd_, F_GETFL);
  570. const int r = fcntl(Fd_, F_SETFL, flags | O_DIRECT);
  571. return !r;
  572. #endif
  573. return false;
  574. }
  575. void TFileHandle::ResetDirect() {
  576. #ifdef _linux_
  577. long flags = fcntl(Fd_, F_GETFL);
  578. fcntl(Fd_, F_SETFL, flags & ~O_DIRECT);
  579. #endif
  580. }
  581. i64 TFileHandle::CountCache(i64 offset, i64 length) const noexcept {
  582. #ifdef _linux_
  583. const i64 pageSize = NSystemInfo::GetPageSize();
  584. constexpr size_t vecSize = 512; // Fetch up to 2MiB at once
  585. const i64 batchSize = vecSize * pageSize;
  586. std::array<ui8, vecSize> vec;
  587. void* ptr = nullptr;
  588. i64 res = 0;
  589. if (!IsOpen()) {
  590. return -1;
  591. }
  592. if (!length) {
  593. length = GetLength();
  594. length -= Min(length, offset);
  595. }
  596. if (!length) {
  597. return 0;
  598. }
  599. const i64 begin = AlignDown(offset, pageSize);
  600. const i64 end = AlignUp(offset + length, pageSize);
  601. const i64 size = end - begin;
  602. /*
  603. * Since fincode is not implemented yet use mmap and mincore.
  604. * This is not so effective and scalable for frequent usage.
  605. */
  606. ptr = ::mmap(
  607. (caddr_t) nullptr,
  608. size,
  609. PROT_READ,
  610. MAP_SHARED | MAP_NORESERVE,
  611. Fd_,
  612. begin);
  613. if (MAP_FAILED == ptr) {
  614. return -1;
  615. }
  616. for (i64 base = begin; base < end; base += batchSize) {
  617. const size_t batch = Min(vecSize, size_t((end - base) / pageSize));
  618. void* batchPtr = static_cast<caddr_t>(ptr) + (base - begin);
  619. if (::mincore(batchPtr, batch * pageSize, vec.data())) {
  620. res = -1;
  621. break;
  622. }
  623. for (size_t i = 0; i < batch; i++) {
  624. // count uptodate complete pages in cache
  625. if (vec[i] & 1) {
  626. res += pageSize;
  627. }
  628. }
  629. if (base == begin && (vec[0] & 1)) {
  630. // cut head of first page
  631. res -= offset - begin;
  632. }
  633. if ((end - base) <= batchSize && (vec[batch - 1] & 1)) {
  634. // cut tail of last page
  635. res -= size - (offset - begin) - length;
  636. }
  637. }
  638. ::munmap(ptr, size);
  639. return res;
  640. #else
  641. Y_UNUSED(offset);
  642. Y_UNUSED(length);
  643. return -1;
  644. #endif
  645. }
  646. void TFileHandle::PrefetchCache(i64 offset, i64 length, bool wait) const noexcept {
  647. #ifdef _linux_
  648. #if HAVE_POSIX_FADVISE
  649. // POSIX_FADV_WILLNEED starts reading upto read_ahead_kb in background
  650. ::posix_fadvise(Fd_, offset, length, POSIX_FADV_WILLNEED);
  651. #endif
  652. if (wait) {
  653. TFileHandle devnull("/dev/null", OpenExisting | WrOnly | CloseOnExec);
  654. off_t end = length ? (offset + length) : GetLength();
  655. off_t pos = offset;
  656. ssize_t ret;
  657. do {
  658. ret = ::sendfile((FHANDLE)devnull, Fd_, &pos, end - pos);
  659. } while (pos < end && (ret > 0 || errno == EINTR));
  660. }
  661. #else
  662. Y_UNUSED(offset);
  663. Y_UNUSED(length);
  664. Y_UNUSED(wait);
  665. #endif
  666. }
  667. void TFileHandle::EvictCache(i64 offset, i64 length) const noexcept {
  668. #if HAVE_POSIX_FADVISE
  669. /*
  670. * This tries to evicts only unmaped, clean, complete pages.
  671. */
  672. ::posix_fadvise(Fd_, offset, length, POSIX_FADV_DONTNEED);
  673. #else
  674. Y_UNUSED(offset);
  675. Y_UNUSED(length);
  676. #endif
  677. }
  678. bool TFileHandle::FlushCache(i64 offset, i64 length, bool wait) noexcept {
  679. #if HAVE_SYNC_FILE_RANGE
  680. int flags = SYNC_FILE_RANGE_WRITE;
  681. if (wait) {
  682. flags |= SYNC_FILE_RANGE_WAIT_AFTER;
  683. }
  684. int ret = ::sync_file_range(Fd_, offset, length, flags);
  685. return ret == 0 || errno == EROFS;
  686. #else
  687. Y_UNUSED(offset);
  688. Y_UNUSED(length);
  689. if (wait) {
  690. return FlushData();
  691. }
  692. return true;
  693. #endif
  694. }
  695. TString DecodeOpenMode(ui32 mode0) {
  696. ui32 mode = mode0;
  697. TStringBuilder r;
  698. #define F(flag) \
  699. if ((mode & flag) == flag) { \
  700. mode &= ~flag; \
  701. if (r) { \
  702. r << TStringBuf("|"); \
  703. } \
  704. r << TStringBuf(#flag); \
  705. }
  706. F(RdWr)
  707. F(RdOnly)
  708. F(WrOnly)
  709. F(CreateAlways)
  710. F(CreateNew)
  711. F(OpenAlways)
  712. F(TruncExisting)
  713. F(ForAppend)
  714. F(Transient)
  715. F(CloseOnExec)
  716. F(Temp)
  717. F(Sync)
  718. F(Direct)
  719. F(DirectAligned)
  720. F(Seq)
  721. F(NoReuse)
  722. F(NoReadAhead)
  723. F(AX)
  724. F(AR)
  725. F(AW)
  726. F(ARW)
  727. F(AXOther)
  728. F(AWOther)
  729. F(AROther)
  730. F(AXGroup)
  731. F(AWGroup)
  732. F(ARGroup)
  733. F(AXUser)
  734. F(AWUser)
  735. F(ARUser)
  736. #undef F
  737. if (mode != 0) {
  738. if (r) {
  739. r << TStringBuf("|");
  740. }
  741. r << Hex(mode);
  742. }
  743. if (!r) {
  744. return "0";
  745. }
  746. return r;
  747. }
  748. class TFile::TImpl: public TAtomicRefCount<TImpl> {
  749. public:
  750. inline TImpl(FHANDLE fd, const TString& fname = TString())
  751. : Handle_(fd)
  752. , FileName_(fname)
  753. {
  754. }
  755. inline TImpl(const TString& fName, EOpenMode oMode)
  756. : Handle_(fName, oMode)
  757. , FileName_(fName)
  758. {
  759. if (!Handle_.IsOpen()) {
  760. ythrow TFileError() << "can't open " << fName.Quote() << " with mode " << DecodeOpenMode(oMode) << " (" << Hex(oMode.ToBaseType()) << ")";
  761. }
  762. }
  763. inline ~TImpl() = default;
  764. inline void Close() {
  765. if (!Handle_.Close()) {
  766. ythrow TFileError() << "can't close " << FileName_.Quote();
  767. }
  768. }
  769. const TString& GetName() const noexcept {
  770. return FileName_;
  771. }
  772. void SetName(const TString& newName) {
  773. FileName_ = newName;
  774. }
  775. const TFileHandle& GetHandle() const noexcept {
  776. return Handle_;
  777. }
  778. i64 Seek(i64 offset, SeekDir origin) {
  779. i64 pos = Handle_.Seek(offset, origin);
  780. if (pos == -1L) {
  781. ythrow TFileError() << "can't seek " << offset << " bytes in " << FileName_.Quote();
  782. }
  783. return pos;
  784. }
  785. void Resize(i64 length) {
  786. if (!Handle_.Resize(length)) {
  787. ythrow TFileError() << "can't resize " << FileName_.Quote() << " to size " << length;
  788. }
  789. }
  790. void Reserve(i64 length) {
  791. if (!Handle_.Reserve(length)) {
  792. ythrow TFileError() << "can't reserve " << length << " for file " << FileName_.Quote();
  793. }
  794. }
  795. void FallocateNoResize(i64 length) {
  796. if (!Handle_.FallocateNoResize(length)) {
  797. ythrow TFileError() << "can't allocate " << length << "bytes of space for file " << FileName_.Quote();
  798. }
  799. }
  800. void ShrinkToFit() {
  801. if (!Handle_.ShrinkToFit()) {
  802. ythrow TFileError() << "can't shrink " << FileName_.Quote() << " to logical size";
  803. }
  804. }
  805. void Flush() {
  806. if (!Handle_.Flush()) {
  807. ythrow TFileError() << "can't flush " << FileName_.Quote();
  808. }
  809. }
  810. void FlushData() {
  811. if (!Handle_.FlushData()) {
  812. ythrow TFileError() << "can't flush data " << FileName_.Quote();
  813. }
  814. }
  815. TFile Duplicate() const {
  816. TFileHandle dupH(Handle_.Duplicate());
  817. if (!dupH.IsOpen()) {
  818. ythrow TFileError() << "can't duplicate the handle of " << FileName_.Quote();
  819. }
  820. TFile res(dupH);
  821. dupH.Release();
  822. return res;
  823. }
  824. // Maximum amount of bytes to be read via single system call.
  825. // Some libraries fail when it is greater than max int.
  826. // Syscalls can cause contention if they operate on very large data blocks.
  827. static constexpr size_t MaxReadPortion = 1_GB;
  828. i32 RawRead(void* bufferIn, size_t numBytes) {
  829. const size_t toRead = Min(MaxReadPortion, numBytes);
  830. return Handle_.Read(bufferIn, toRead);
  831. }
  832. size_t ReadOrFail(void* buf, size_t numBytes) {
  833. const i32 reallyRead = RawRead(buf, numBytes);
  834. if (reallyRead < 0) {
  835. ythrow TFileError() << "can not read data from " << FileName_.Quote();
  836. }
  837. return reallyRead;
  838. }
  839. size_t Read(void* bufferIn, size_t numBytes) {
  840. ui8* buf = (ui8*)bufferIn;
  841. while (numBytes) {
  842. const size_t reallyRead = ReadOrFail(buf, numBytes);
  843. if (reallyRead == 0) {
  844. // file exhausted
  845. break;
  846. }
  847. buf += reallyRead;
  848. numBytes -= reallyRead;
  849. }
  850. return buf - (ui8*)bufferIn;
  851. }
  852. void Load(void* buf, size_t len) {
  853. if (Read(buf, len) != len) {
  854. ythrow TFileError() << "can't read " << len << " bytes from " << FileName_.Quote();
  855. }
  856. }
  857. // Maximum amount of bytes to be written via single system call.
  858. // Some libraries fail when it is greater than max int.
  859. // Syscalls can cause contention if they operate on very large data blocks.
  860. static constexpr size_t MaxWritePortion = 1_GB;
  861. void Write(const void* buffer, size_t numBytes) {
  862. const ui8* buf = (const ui8*)buffer;
  863. while (numBytes) {
  864. const i32 toWrite = (i32)Min(MaxWritePortion, numBytes);
  865. const i32 reallyWritten = Handle_.Write(buf, toWrite);
  866. if (reallyWritten < 0) {
  867. ythrow TFileError() << "can't write " << toWrite << " bytes to " << FileName_.Quote();
  868. }
  869. buf += reallyWritten;
  870. numBytes -= reallyWritten;
  871. }
  872. }
  873. size_t Pread(void* bufferIn, size_t numBytes, i64 offset) const {
  874. ui8* buf = (ui8*)bufferIn;
  875. while (numBytes) {
  876. const i32 toRead = (i32)Min(MaxReadPortion, numBytes);
  877. const i32 reallyRead = RawPread(buf, toRead, offset);
  878. if (reallyRead < 0) {
  879. ythrow TFileError() << "can not read data from " << FileName_.Quote();
  880. }
  881. if (reallyRead == 0) {
  882. // file exausted
  883. break;
  884. }
  885. buf += reallyRead;
  886. offset += reallyRead;
  887. numBytes -= reallyRead;
  888. }
  889. return buf - (ui8*)bufferIn;
  890. }
  891. i32 RawPread(void* buf, ui32 len, i64 offset) const {
  892. return Handle_.Pread(buf, len, offset);
  893. }
  894. void Pload(void* buf, size_t len, i64 offset) const {
  895. if (Pread(buf, len, offset) != len) {
  896. ythrow TFileError() << "can't read " << len << " bytes at offset " << offset << " from " << FileName_.Quote();
  897. }
  898. }
  899. void Pwrite(const void* buffer, size_t numBytes, i64 offset) const {
  900. const ui8* buf = (const ui8*)buffer;
  901. while (numBytes) {
  902. const i32 toWrite = (i32)Min(MaxWritePortion, numBytes);
  903. const i32 reallyWritten = Handle_.Pwrite(buf, toWrite, offset);
  904. if (reallyWritten < 0) {
  905. ythrow TFileError() << "can't write " << toWrite << " bytes to " << FileName_.Quote();
  906. }
  907. buf += reallyWritten;
  908. offset += reallyWritten;
  909. numBytes -= reallyWritten;
  910. }
  911. }
  912. void Flock(int op) {
  913. if (0 != Handle_.Flock(op)) {
  914. ythrow TFileError() << "can't flock " << FileName_.Quote();
  915. }
  916. }
  917. void SetDirect() {
  918. if (!Handle_.SetDirect()) {
  919. ythrow TFileError() << "can't set direct mode for " << FileName_.Quote();
  920. }
  921. }
  922. void ResetDirect() {
  923. Handle_.ResetDirect();
  924. }
  925. i64 CountCache(i64 offset, i64 length) const noexcept {
  926. return Handle_.CountCache(offset, length);
  927. }
  928. void PrefetchCache(i64 offset, i64 length, bool wait) const noexcept {
  929. Handle_.PrefetchCache(offset, length, wait);
  930. }
  931. void EvictCache(i64 offset, i64 length) const noexcept {
  932. Handle_.EvictCache(offset, length);
  933. }
  934. void FlushCache(i64 offset, i64 length, bool wait) {
  935. if (!Handle_.FlushCache(offset, length, wait)) {
  936. ythrow TFileError() << "can't flush data " << FileName_.Quote();
  937. }
  938. }
  939. private:
  940. TFileHandle Handle_;
  941. TString FileName_;
  942. };
  943. TFile::TFile()
  944. : Impl_(new TImpl(INVALID_FHANDLE))
  945. {
  946. }
  947. TFile::TFile(FHANDLE fd)
  948. : Impl_(new TImpl(fd))
  949. {
  950. }
  951. TFile::TFile(FHANDLE fd, const TString& name)
  952. : Impl_(new TImpl(fd, name))
  953. {
  954. }
  955. TFile::TFile(const TString& fName, EOpenMode oMode)
  956. : Impl_(new TImpl(fName, oMode))
  957. {
  958. }
  959. TFile::~TFile() = default;
  960. void TFile::Close() {
  961. Impl_->Close();
  962. }
  963. const TString& TFile::GetName() const noexcept {
  964. return Impl_->GetName();
  965. }
  966. i64 TFile::GetPosition() const noexcept {
  967. return Impl_->GetHandle().GetPosition();
  968. }
  969. i64 TFile::GetLength() const noexcept {
  970. return Impl_->GetHandle().GetLength();
  971. }
  972. bool TFile::IsOpen() const noexcept {
  973. return Impl_->GetHandle().IsOpen();
  974. }
  975. FHANDLE TFile::GetHandle() const noexcept {
  976. return Impl_->GetHandle();
  977. }
  978. i64 TFile::Seek(i64 offset, SeekDir origin) {
  979. return Impl_->Seek(offset, origin);
  980. }
  981. void TFile::Resize(i64 length) {
  982. Impl_->Resize(length);
  983. }
  984. void TFile::Reserve(i64 length) {
  985. Impl_->Reserve(length);
  986. }
  987. void TFile::FallocateNoResize(i64 length) {
  988. Impl_->FallocateNoResize(length);
  989. }
  990. void TFile::ShrinkToFit() {
  991. Impl_->ShrinkToFit();
  992. }
  993. void TFile::Flush() {
  994. Impl_->Flush();
  995. }
  996. void TFile::FlushData() {
  997. Impl_->FlushData();
  998. }
  999. TFile TFile::Duplicate() const {
  1000. TFile res = Impl_->Duplicate();
  1001. res.Impl_->SetName(Impl_->GetName());
  1002. return res;
  1003. }
  1004. size_t TFile::Read(void* buf, size_t len) {
  1005. return Impl_->Read(buf, len);
  1006. }
  1007. i32 TFile::RawRead(void* buf, size_t len) {
  1008. return Impl_->RawRead(buf, len);
  1009. }
  1010. size_t TFile::ReadOrFail(void* buf, size_t len) {
  1011. return Impl_->ReadOrFail(buf, len);
  1012. }
  1013. void TFile::Load(void* buf, size_t len) {
  1014. Impl_->Load(buf, len);
  1015. }
  1016. void TFile::Write(const void* buf, size_t len) {
  1017. Impl_->Write(buf, len);
  1018. }
  1019. size_t TFile::Pread(void* buf, size_t len, i64 offset) const {
  1020. return Impl_->Pread(buf, len, offset);
  1021. }
  1022. i32 TFile::RawPread(void* buf, ui32 len, i64 offset) const {
  1023. return Impl_->RawPread(buf, len, offset);
  1024. }
  1025. void TFile::Pload(void* buf, size_t len, i64 offset) const {
  1026. Impl_->Pload(buf, len, offset);
  1027. }
  1028. void TFile::Pwrite(const void* buf, size_t len, i64 offset) const {
  1029. Impl_->Pwrite(buf, len, offset);
  1030. }
  1031. void TFile::Flock(int op) {
  1032. Impl_->Flock(op);
  1033. }
  1034. void TFile::SetDirect() {
  1035. Impl_->SetDirect();
  1036. }
  1037. void TFile::ResetDirect() {
  1038. Impl_->ResetDirect();
  1039. }
  1040. i64 TFile::CountCache(i64 offset, i64 length) const noexcept {
  1041. return Impl_->CountCache(offset, length);
  1042. }
  1043. void TFile::PrefetchCache(i64 offset, i64 length, bool wait) const noexcept {
  1044. Impl_->PrefetchCache(offset, length, wait);
  1045. }
  1046. void TFile::EvictCache(i64 offset, i64 length) const noexcept {
  1047. Impl_->EvictCache(offset, length);
  1048. }
  1049. void TFile::FlushCache(i64 offset, i64 length, bool wait) {
  1050. Impl_->FlushCache(offset, length, wait);
  1051. }
  1052. void TFile::LinkTo(const TFile& f) const {
  1053. if (!Impl_->GetHandle().LinkTo(f.Impl_->GetHandle())) {
  1054. ythrow TFileError() << "can not link fd(" << GetName() << " -> " << f.GetName() << ")";
  1055. }
  1056. }
  1057. TFile TFile::Temporary(const TString& prefix) {
  1058. //TODO - handle impossible case of name collision
  1059. return TFile(prefix + ToString(MicroSeconds()) + "-" + ToString(RandomNumber<ui64>()), CreateNew | RdWr | Seq | Temp | Transient);
  1060. }
  1061. TFile TFile::ForAppend(const TString& path) {
  1062. return TFile(path, OpenAlways | WrOnly | Seq | ::ForAppend);
  1063. }
  1064. TFile Duplicate(FILE* f) {
  1065. return Duplicate(fileno(f));
  1066. }
  1067. TFile Duplicate(int fd) {
  1068. #if defined(_win_)
  1069. /* There are two options of how to duplicate a file descriptor on Windows:
  1070. *
  1071. * 1:
  1072. * - Call dup.
  1073. * - Call _get_osfhandle on the result.
  1074. * - Use returned handle.
  1075. * - Call _close on file descriptor returned by dup. This will also close
  1076. * the handle.
  1077. *
  1078. * 2:
  1079. * - Call _get_osfhandle.
  1080. * - Call DuplicateHandle on the result.
  1081. * - Use returned handle.
  1082. * - Call CloseHandle.
  1083. *
  1084. * TFileHandle calls CloseHandle when destroyed, leaving us with option #2. */
  1085. FHANDLE handle = reinterpret_cast<FHANDLE>(::_get_osfhandle(fd));
  1086. FHANDLE dupHandle;
  1087. if (!::DuplicateHandle(GetCurrentProcess(), handle, GetCurrentProcess(), &dupHandle, 0, TRUE, DUPLICATE_SAME_ACCESS)) {
  1088. ythrow TFileError() << "can not duplicate file descriptor " << LastSystemError() << Endl;
  1089. }
  1090. return TFile(dupHandle);
  1091. #elif defined(_unix_)
  1092. return TFile(::dup(fd));
  1093. #else
  1094. #error unsupported platform
  1095. #endif
  1096. }
  1097. bool PosixDisableReadAhead(FHANDLE fileHandle, void* addr) noexcept {
  1098. int ret = -1;
  1099. #if HAVE_POSIX_FADVISE
  1100. #if defined(_linux_)
  1101. Y_UNUSED(fileHandle);
  1102. ret = madvise(addr, 0, MADV_RANDOM); // according to klamm@ posix_fadvise does not work under linux, madvise does work
  1103. #else
  1104. Y_UNUSED(addr);
  1105. ret = ::posix_fadvise(fileHandle, 0, 0, POSIX_FADV_RANDOM);
  1106. #endif
  1107. #else
  1108. Y_UNUSED(fileHandle);
  1109. Y_UNUSED(addr);
  1110. #endif
  1111. return ret == 0;
  1112. }