fileio.c 132 KB


  1. /*
  2. * Copyright (c) Meta Platforms, Inc. and affiliates.
  3. * All rights reserved.
  4. *
  5. * This source code is licensed under both the BSD-style license (found in the
  6. * LICENSE file in the root directory of this source tree) and the GPLv2 (found
  7. * in the COPYING file in the root directory of this source tree).
  8. * You may select, at your option, one of the above-listed licenses.
  9. */
  10. /* *************************************
  11. * Compiler Options
  12. ***************************************/
  13. #ifdef _MSC_VER /* Visual */
  14. # pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
  15. # pragma warning(disable : 4204) /* non-constant aggregate initializer */
  16. #endif
  17. #if defined(__MINGW32__) && !defined(_POSIX_SOURCE)
  18. # define _POSIX_SOURCE 1 /* disable %llu warnings with MinGW on Windows */
  19. #endif
  20. /*-*************************************
  21. * Includes
  22. ***************************************/
  23. #include "platform.h" /* Large Files support, SET_BINARY_MODE */
  24. #include "util.h" /* UTIL_getFileSize, UTIL_isRegularFile, UTIL_isSameFile */
  25. #include <stdio.h> /* fprintf, open, fdopen, fread, _fileno, stdin, stdout */
  26. #include <stdlib.h> /* malloc, free */
  27. #include <string.h> /* strcmp, strlen */
  28. #include <time.h> /* clock_t, to measure process time */
  29. #include <fcntl.h> /* O_WRONLY */
  30. #include <assert.h>
  31. #include <errno.h> /* errno */
  32. #include <limits.h> /* INT_MAX */
  33. #include <signal.h>
  34. #include "timefn.h" /* UTIL_getTime, UTIL_clockSpanMicro */
  35. #if defined (_MSC_VER)
  36. # include <sys/stat.h>
  37. # include <io.h>
  38. #endif
  39. #include "fileio.h"
  40. #include "fileio_asyncio.h"
  41. #include "fileio_common.h"
  42. FIO_display_prefs_t g_display_prefs = {2, FIO_ps_auto};
  43. UTIL_time_t g_displayClock = UTIL_TIME_INITIALIZER;
  44. #define ZSTD_STATIC_LINKING_ONLY /* ZSTD_magicNumber, ZSTD_frameHeaderSize_max */
  45. #include "../lib/zstd.h"
  46. #include "../lib/zstd_errors.h" /* ZSTD_error_frameParameter_windowTooLarge */
  47. #if defined(ZSTD_GZCOMPRESS) || defined(ZSTD_GZDECOMPRESS)
  48. # error #include <zlib.h>
  49. # if !defined(z_const)
  50. # define z_const
  51. # endif
  52. #endif
  53. #if defined(ZSTD_LZMACOMPRESS) || defined(ZSTD_LZMADECOMPRESS)
  54. # error #include <lzma.h>
  55. #endif
  56. #define LZ4_MAGICNUMBER 0x184D2204
  57. #if defined(ZSTD_LZ4COMPRESS) || defined(ZSTD_LZ4DECOMPRESS)
  58. # define LZ4F_ENABLE_OBSOLETE_ENUMS
  59. # error #include <lz4frame.h>
  60. # error #include <lz4.h>
  61. #endif
  62. char const* FIO_zlibVersion(void)
  63. {
  64. #if defined(ZSTD_GZCOMPRESS) || defined(ZSTD_GZDECOMPRESS)
  65. return zlibVersion();
  66. #else
  67. return "Unsupported";
  68. #endif
  69. }
  70. char const* FIO_lz4Version(void)
  71. {
  72. #if defined(ZSTD_LZ4COMPRESS) || defined(ZSTD_LZ4DECOMPRESS)
  73. /* LZ4_versionString() added in v1.7.3 */
  74. # if LZ4_VERSION_NUMBER >= 10703
  75. return LZ4_versionString();
  76. # else
  77. # define ZSTD_LZ4_VERSION LZ4_VERSION_MAJOR.LZ4_VERSION_MINOR.LZ4_VERSION_RELEASE
  78. # define ZSTD_LZ4_VERSION_STRING ZSTD_EXPAND_AND_QUOTE(ZSTD_LZ4_VERSION)
  79. return ZSTD_LZ4_VERSION_STRING;
  80. # endif
  81. #else
  82. return "Unsupported";
  83. #endif
  84. }
  85. char const* FIO_lzmaVersion(void)
  86. {
  87. #if defined(ZSTD_LZMACOMPRESS) || defined(ZSTD_LZMADECOMPRESS)
  88. return lzma_version_string();
  89. #else
  90. return "Unsupported";
  91. #endif
  92. }
  93. /*-*************************************
  94. * Constants
  95. ***************************************/
  96. #define ADAPT_WINDOWLOG_DEFAULT 23 /* 8 MB */
  97. #define DICTSIZE_MAX (32 MB) /* protection against large input (attack scenario) */
  98. #define FNSPACE 30
  99. /* Default file permissions 0666 (modulated by umask) */
  100. /* Temporary restricted file permissions are used when we're going to
  101. * chmod/chown at the end of the operation. */
  102. #if !defined(_WIN32)
  103. /* These macros aren't defined on windows. */
  104. #define DEFAULT_FILE_PERMISSIONS (S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)
  105. #define TEMPORARY_FILE_PERMISSIONS (S_IRUSR|S_IWUSR)
  106. #else
  107. #define DEFAULT_FILE_PERMISSIONS (0666)
  108. #define TEMPORARY_FILE_PERMISSIONS (0600)
  109. #endif
  110. /*-************************************
  111. * Signal (Ctrl-C trapping)
  112. **************************************/
  113. static const char* g_artefact = NULL;
  114. static void INThandler(int sig)
  115. {
  116. assert(sig==SIGINT); (void)sig;
  117. #if !defined(_MSC_VER)
  118. signal(sig, SIG_IGN); /* this invocation generates a buggy warning in Visual Studio */
  119. #endif
  120. if (g_artefact) {
  121. assert(UTIL_isRegularFile(g_artefact));
  122. remove(g_artefact);
  123. }
  124. DISPLAY("\n");
  125. exit(2);
  126. }
  127. static void addHandler(char const* dstFileName)
  128. {
  129. if (UTIL_isRegularFile(dstFileName)) {
  130. g_artefact = dstFileName;
  131. signal(SIGINT, INThandler);
  132. } else {
  133. g_artefact = NULL;
  134. }
  135. }
  136. /* Idempotent */
  137. static void clearHandler(void)
  138. {
  139. if (g_artefact) signal(SIGINT, SIG_DFL);
  140. g_artefact = NULL;
  141. }
  142. /*-*********************************************************
  143. * Termination signal trapping (Print debug stack trace)
  144. ***********************************************************/
  145. #if defined(__has_feature) && !defined(BACKTRACE_ENABLE) /* Clang compiler */
  146. # if (__has_feature(address_sanitizer))
  147. # define BACKTRACE_ENABLE 0
  148. # endif /* __has_feature(address_sanitizer) */
  149. #elif defined(__SANITIZE_ADDRESS__) && !defined(BACKTRACE_ENABLE) /* GCC compiler */
  150. # define BACKTRACE_ENABLE 0
  151. #endif
  152. #if !defined(BACKTRACE_ENABLE)
  153. /* automatic detector : backtrace enabled by default on linux+glibc and osx */
  154. # if (defined(__linux__) && (defined(__GLIBC__) && !defined(__UCLIBC__))) \
  155. || (defined(__APPLE__) && defined(__MACH__))
  156. # define BACKTRACE_ENABLE 1
  157. # else
  158. # define BACKTRACE_ENABLE 0
  159. # endif
  160. #endif
  161. /* note : after this point, BACKTRACE_ENABLE is necessarily defined */
  162. #if BACKTRACE_ENABLE
  163. #include <execinfo.h> /* backtrace, backtrace_symbols */
  164. #define MAX_STACK_FRAMES 50
  165. static void ABRThandler(int sig) {
  166. const char* name;
  167. void* addrlist[MAX_STACK_FRAMES];
  168. char** symbollist;
  169. int addrlen, i;
  170. switch (sig) {
  171. case SIGABRT: name = "SIGABRT"; break;
  172. case SIGFPE: name = "SIGFPE"; break;
  173. case SIGILL: name = "SIGILL"; break;
  174. case SIGINT: name = "SIGINT"; break;
  175. case SIGSEGV: name = "SIGSEGV"; break;
  176. default: name = "UNKNOWN";
  177. }
  178. DISPLAY("Caught %s signal, printing stack:\n", name);
  179. /* Retrieve current stack addresses. */
  180. addrlen = backtrace(addrlist, MAX_STACK_FRAMES);
  181. if (addrlen == 0) {
  182. DISPLAY("\n");
  183. return;
  184. }
  185. /* Create readable strings to each frame. */
  186. symbollist = backtrace_symbols(addrlist, addrlen);
  187. /* Print the stack trace, excluding calls handling the signal. */
  188. for (i = ZSTD_START_SYMBOLLIST_FRAME; i < addrlen; i++) {
  189. DISPLAY("%s\n", symbollist[i]);
  190. }
  191. free(symbollist);
  192. /* Reset and raise the signal so default handler runs. */
  193. signal(sig, SIG_DFL);
  194. raise(sig);
  195. }
  196. #endif
  197. void FIO_addAbortHandler(void)
  198. {
  199. #if BACKTRACE_ENABLE
  200. signal(SIGABRT, ABRThandler);
  201. signal(SIGFPE, ABRThandler);
  202. signal(SIGILL, ABRThandler);
  203. signal(SIGSEGV, ABRThandler);
  204. signal(SIGBUS, ABRThandler);
  205. #endif
  206. }
  207. /*-*************************************
  208. * Parameters: FIO_ctx_t
  209. ***************************************/
  210. /* typedef'd to FIO_ctx_t within fileio.h */
  211. struct FIO_ctx_s {
  212. /* file i/o info */
  213. int nbFilesTotal;
  214. int hasStdinInput;
  215. int hasStdoutOutput;
  216. /* file i/o state */
  217. int currFileIdx;
  218. int nbFilesProcessed;
  219. size_t totalBytesInput;
  220. size_t totalBytesOutput;
  221. };
  222. static int FIO_shouldDisplayFileSummary(FIO_ctx_t const* fCtx)
  223. {
  224. return fCtx->nbFilesTotal <= 1 || g_display_prefs.displayLevel >= 3;
  225. }
  226. static int FIO_shouldDisplayMultipleFileSummary(FIO_ctx_t const* fCtx)
  227. {
  228. int const shouldDisplay = (fCtx->nbFilesProcessed >= 1 && fCtx->nbFilesTotal > 1);
  229. assert(shouldDisplay || FIO_shouldDisplayFileSummary(fCtx) || fCtx->nbFilesProcessed == 0);
  230. return shouldDisplay;
  231. }
  232. /*-*************************************
  233. * Parameters: Initialization
  234. ***************************************/
  235. #define FIO_OVERLAP_LOG_NOTSET 9999
  236. #define FIO_LDM_PARAM_NOTSET 9999
  237. FIO_prefs_t* FIO_createPreferences(void)
  238. {
  239. FIO_prefs_t* const ret = (FIO_prefs_t*)malloc(sizeof(FIO_prefs_t));
  240. if (!ret) EXM_THROW(21, "Allocation error : not enough memory");
  241. ret->compressionType = FIO_zstdCompression;
  242. ret->overwrite = 0;
  243. ret->sparseFileSupport = ZSTD_SPARSE_DEFAULT;
  244. ret->dictIDFlag = 1;
  245. ret->checksumFlag = 1;
  246. ret->removeSrcFile = 0;
  247. ret->memLimit = 0;
  248. ret->nbWorkers = 1;
  249. ret->blockSize = 0;
  250. ret->overlapLog = FIO_OVERLAP_LOG_NOTSET;
  251. ret->adaptiveMode = 0;
  252. ret->rsyncable = 0;
  253. ret->minAdaptLevel = -50; /* initializing this value requires a constant, so ZSTD_minCLevel() doesn't work */
  254. ret->maxAdaptLevel = 22; /* initializing this value requires a constant, so ZSTD_maxCLevel() doesn't work */
  255. ret->ldmFlag = 0;
  256. ret->ldmHashLog = 0;
  257. ret->ldmMinMatch = 0;
  258. ret->ldmBucketSizeLog = FIO_LDM_PARAM_NOTSET;
  259. ret->ldmHashRateLog = FIO_LDM_PARAM_NOTSET;
  260. ret->streamSrcSize = 0;
  261. ret->targetCBlockSize = 0;
  262. ret->srcSizeHint = 0;
  263. ret->testMode = 0;
  264. ret->literalCompressionMode = ZSTD_ps_auto;
  265. ret->excludeCompressedFiles = 0;
  266. ret->allowBlockDevices = 0;
  267. ret->asyncIO = AIO_supported();
  268. ret->passThrough = -1;
  269. return ret;
  270. }
  271. FIO_ctx_t* FIO_createContext(void)
  272. {
  273. FIO_ctx_t* const ret = (FIO_ctx_t*)malloc(sizeof(FIO_ctx_t));
  274. if (!ret) EXM_THROW(21, "Allocation error : not enough memory");
  275. ret->currFileIdx = 0;
  276. ret->hasStdinInput = 0;
  277. ret->hasStdoutOutput = 0;
  278. ret->nbFilesTotal = 1;
  279. ret->nbFilesProcessed = 0;
  280. ret->totalBytesInput = 0;
  281. ret->totalBytesOutput = 0;
  282. return ret;
  283. }
  284. void FIO_freePreferences(FIO_prefs_t* const prefs)
  285. {
  286. free(prefs);
  287. }
  288. void FIO_freeContext(FIO_ctx_t* const fCtx)
  289. {
  290. free(fCtx);
  291. }
  292. /*-*************************************
  293. * Parameters: Display Options
  294. ***************************************/
  295. void FIO_setNotificationLevel(int level) { g_display_prefs.displayLevel=level; }
  296. void FIO_setProgressSetting(FIO_progressSetting_e setting) { g_display_prefs.progressSetting = setting; }
  297. /*-*************************************
  298. * Parameters: Setters
  299. ***************************************/
  300. /* FIO_prefs_t functions */
  301. void FIO_setCompressionType(FIO_prefs_t* const prefs, FIO_compressionType_t compressionType) { prefs->compressionType = compressionType; }
  302. void FIO_overwriteMode(FIO_prefs_t* const prefs) { prefs->overwrite = 1; }
  303. void FIO_setSparseWrite(FIO_prefs_t* const prefs, int sparse) { prefs->sparseFileSupport = sparse; }
  304. void FIO_setDictIDFlag(FIO_prefs_t* const prefs, int dictIDFlag) { prefs->dictIDFlag = dictIDFlag; }
  305. void FIO_setChecksumFlag(FIO_prefs_t* const prefs, int checksumFlag) { prefs->checksumFlag = checksumFlag; }
  306. void FIO_setRemoveSrcFile(FIO_prefs_t* const prefs, int flag) { prefs->removeSrcFile = (flag!=0); }
  307. void FIO_setMemLimit(FIO_prefs_t* const prefs, unsigned memLimit) { prefs->memLimit = memLimit; }
  308. void FIO_setNbWorkers(FIO_prefs_t* const prefs, int nbWorkers) {
  309. #ifndef ZSTD_MULTITHREAD
  310. if (nbWorkers > 0) DISPLAYLEVEL(2, "Note : multi-threading is disabled \n");
  311. #endif
  312. prefs->nbWorkers = nbWorkers;
  313. }
  314. void FIO_setExcludeCompressedFile(FIO_prefs_t* const prefs, int excludeCompressedFiles) { prefs->excludeCompressedFiles = excludeCompressedFiles; }
  315. void FIO_setAllowBlockDevices(FIO_prefs_t* const prefs, int allowBlockDevices) { prefs->allowBlockDevices = allowBlockDevices; }
  316. void FIO_setBlockSize(FIO_prefs_t* const prefs, int blockSize) {
  317. if (blockSize && prefs->nbWorkers==0)
  318. DISPLAYLEVEL(2, "Setting block size is useless in single-thread mode \n");
  319. prefs->blockSize = blockSize;
  320. }
  321. void FIO_setOverlapLog(FIO_prefs_t* const prefs, int overlapLog){
  322. if (overlapLog && prefs->nbWorkers==0)
  323. DISPLAYLEVEL(2, "Setting overlapLog is useless in single-thread mode \n");
  324. prefs->overlapLog = overlapLog;
  325. }
  326. void FIO_setAdaptiveMode(FIO_prefs_t* const prefs, int adapt) {
  327. if ((adapt>0) && (prefs->nbWorkers==0))
  328. EXM_THROW(1, "Adaptive mode is not compatible with single thread mode \n");
  329. prefs->adaptiveMode = adapt;
  330. }
  331. void FIO_setUseRowMatchFinder(FIO_prefs_t* const prefs, int useRowMatchFinder) {
  332. prefs->useRowMatchFinder = useRowMatchFinder;
  333. }
  334. void FIO_setRsyncable(FIO_prefs_t* const prefs, int rsyncable) {
  335. if ((rsyncable>0) && (prefs->nbWorkers==0))
  336. EXM_THROW(1, "Rsyncable mode is not compatible with single thread mode \n");
  337. prefs->rsyncable = rsyncable;
  338. }
  339. void FIO_setStreamSrcSize(FIO_prefs_t* const prefs, size_t streamSrcSize) {
  340. prefs->streamSrcSize = streamSrcSize;
  341. }
  342. void FIO_setTargetCBlockSize(FIO_prefs_t* const prefs, size_t targetCBlockSize) {
  343. prefs->targetCBlockSize = targetCBlockSize;
  344. }
  345. void FIO_setSrcSizeHint(FIO_prefs_t* const prefs, size_t srcSizeHint) {
  346. prefs->srcSizeHint = (int)MIN((size_t)INT_MAX, srcSizeHint);
  347. }
  348. void FIO_setTestMode(FIO_prefs_t* const prefs, int testMode) {
  349. prefs->testMode = (testMode!=0);
  350. }
  351. void FIO_setLiteralCompressionMode(
  352. FIO_prefs_t* const prefs,
  353. ZSTD_paramSwitch_e mode) {
  354. prefs->literalCompressionMode = mode;
  355. }
  356. void FIO_setAdaptMin(FIO_prefs_t* const prefs, int minCLevel)
  357. {
  358. #ifndef ZSTD_NOCOMPRESS
  359. assert(minCLevel >= ZSTD_minCLevel());
  360. #endif
  361. prefs->minAdaptLevel = minCLevel;
  362. }
  363. void FIO_setAdaptMax(FIO_prefs_t* const prefs, int maxCLevel)
  364. {
  365. prefs->maxAdaptLevel = maxCLevel;
  366. }
  367. void FIO_setLdmFlag(FIO_prefs_t* const prefs, unsigned ldmFlag) {
  368. prefs->ldmFlag = (ldmFlag>0);
  369. }
  370. void FIO_setLdmHashLog(FIO_prefs_t* const prefs, int ldmHashLog) {
  371. prefs->ldmHashLog = ldmHashLog;
  372. }
  373. void FIO_setLdmMinMatch(FIO_prefs_t* const prefs, int ldmMinMatch) {
  374. prefs->ldmMinMatch = ldmMinMatch;
  375. }
  376. void FIO_setLdmBucketSizeLog(FIO_prefs_t* const prefs, int ldmBucketSizeLog) {
  377. prefs->ldmBucketSizeLog = ldmBucketSizeLog;
  378. }
  379. void FIO_setLdmHashRateLog(FIO_prefs_t* const prefs, int ldmHashRateLog) {
  380. prefs->ldmHashRateLog = ldmHashRateLog;
  381. }
  382. void FIO_setPatchFromMode(FIO_prefs_t* const prefs, int value)
  383. {
  384. prefs->patchFromMode = value != 0;
  385. }
  386. void FIO_setContentSize(FIO_prefs_t* const prefs, int value)
  387. {
  388. prefs->contentSize = value != 0;
  389. }
  390. void FIO_setAsyncIOFlag(FIO_prefs_t* const prefs, int value) {
  391. #ifdef ZSTD_MULTITHREAD
  392. prefs->asyncIO = value;
  393. #else
  394. (void) prefs;
  395. (void) value;
  396. DISPLAYLEVEL(2, "Note : asyncio is disabled (lack of multithreading support) \n");
  397. #endif
  398. }
  399. void FIO_setPassThroughFlag(FIO_prefs_t* const prefs, int value) {
  400. prefs->passThrough = (value != 0);
  401. }
  402. void FIO_setMMapDict(FIO_prefs_t* const prefs, ZSTD_paramSwitch_e value)
  403. {
  404. prefs->mmapDict = value;
  405. }
  406. /* FIO_ctx_t functions */
  407. void FIO_setHasStdoutOutput(FIO_ctx_t* const fCtx, int value) {
  408. fCtx->hasStdoutOutput = value;
  409. }
  410. void FIO_setNbFilesTotal(FIO_ctx_t* const fCtx, int value)
  411. {
  412. fCtx->nbFilesTotal = value;
  413. }
  414. void FIO_determineHasStdinInput(FIO_ctx_t* const fCtx, const FileNamesTable* const filenames) {
  415. size_t i = 0;
  416. for ( ; i < filenames->tableSize; ++i) {
  417. if (!strcmp(stdinmark, filenames->fileNames[i])) {
  418. fCtx->hasStdinInput = 1;
  419. return;
  420. }
  421. }
  422. }
  423. /*-*************************************
  424. * Functions
  425. ***************************************/
  426. /** FIO_removeFile() :
  427. * @result : Unlink `fileName`, even if it's read-only */
  428. static int FIO_removeFile(const char* path)
  429. {
  430. stat_t statbuf;
  431. if (!UTIL_stat(path, &statbuf)) {
  432. DISPLAYLEVEL(2, "zstd: Failed to stat %s while trying to remove it\n", path);
  433. return 0;
  434. }
  435. if (!UTIL_isRegularFileStat(&statbuf)) {
  436. DISPLAYLEVEL(2, "zstd: Refusing to remove non-regular file %s\n", path);
  437. return 0;
  438. }
  439. #if defined(_WIN32)
  440. /* windows doesn't allow remove read-only files,
  441. * so try to make it writable first */
  442. if (!(statbuf.st_mode & _S_IWRITE)) {
  443. UTIL_chmod(path, &statbuf, _S_IWRITE);
  444. }
  445. #endif
  446. return remove(path);
  447. }
  448. /** FIO_openSrcFile() :
  449. * condition : `srcFileName` must be non-NULL. `prefs` may be NULL.
  450. * @result : FILE* to `srcFileName`, or NULL if it fails */
  451. static FILE* FIO_openSrcFile(const FIO_prefs_t* const prefs, const char* srcFileName, stat_t* statbuf)
  452. {
  453. int allowBlockDevices = prefs != NULL ? prefs->allowBlockDevices : 0;
  454. assert(srcFileName != NULL);
  455. assert(statbuf != NULL);
  456. if (!strcmp (srcFileName, stdinmark)) {
  457. DISPLAYLEVEL(4,"Using stdin for input \n");
  458. SET_BINARY_MODE(stdin);
  459. return stdin;
  460. }
  461. if (!UTIL_stat(srcFileName, statbuf)) {
  462. DISPLAYLEVEL(1, "zstd: can't stat %s : %s -- ignored \n",
  463. srcFileName, strerror(errno));
  464. return NULL;
  465. }
  466. if (!UTIL_isRegularFileStat(statbuf)
  467. && !UTIL_isFIFOStat(statbuf)
  468. && !(allowBlockDevices && UTIL_isBlockDevStat(statbuf))
  469. ) {
  470. DISPLAYLEVEL(1, "zstd: %s is not a regular file -- ignored \n",
  471. srcFileName);
  472. return NULL;
  473. }
  474. { FILE* const f = fopen(srcFileName, "rb");
  475. if (f == NULL)
  476. DISPLAYLEVEL(1, "zstd: %s: %s \n", srcFileName, strerror(errno));
  477. return f;
  478. }
  479. }
  480. /** FIO_openDstFile() :
  481. * condition : `dstFileName` must be non-NULL.
  482. * @result : FILE* to `dstFileName`, or NULL if it fails */
  483. static FILE*
  484. FIO_openDstFile(FIO_ctx_t* fCtx, FIO_prefs_t* const prefs,
  485. const char* srcFileName, const char* dstFileName,
  486. const int mode)
  487. {
  488. int isDstRegFile;
  489. if (prefs->testMode) return NULL; /* do not open file in test mode */
  490. assert(dstFileName != NULL);
  491. if (!strcmp (dstFileName, stdoutmark)) {
  492. DISPLAYLEVEL(4,"Using stdout for output \n");
  493. SET_BINARY_MODE(stdout);
  494. if (prefs->sparseFileSupport == 1) {
  495. prefs->sparseFileSupport = 0;
  496. DISPLAYLEVEL(4, "Sparse File Support is automatically disabled on stdout ; try --sparse \n");
  497. }
  498. return stdout;
  499. }
  500. /* ensure dst is not the same as src */
  501. if (srcFileName != NULL && UTIL_isSameFile(srcFileName, dstFileName)) {
  502. DISPLAYLEVEL(1, "zstd: Refusing to open an output file which will overwrite the input file \n");
  503. return NULL;
  504. }
  505. isDstRegFile = UTIL_isRegularFile(dstFileName); /* invoke once */
  506. if (prefs->sparseFileSupport == 1) {
  507. prefs->sparseFileSupport = ZSTD_SPARSE_DEFAULT;
  508. if (!isDstRegFile) {
  509. prefs->sparseFileSupport = 0;
  510. DISPLAYLEVEL(4, "Sparse File Support is disabled when output is not a file \n");
  511. }
  512. }
  513. if (isDstRegFile) {
  514. /* Check if destination file already exists */
  515. #if !defined(_WIN32)
  516. /* this test does not work on Windows :
  517. * `NUL` and `nul` are detected as regular files */
  518. if (!strcmp(dstFileName, nulmark)) {
  519. EXM_THROW(40, "%s is unexpectedly categorized as a regular file",
  520. dstFileName);
  521. }
  522. #endif
  523. if (!prefs->overwrite) {
  524. if (g_display_prefs.displayLevel <= 1) {
  525. /* No interaction possible */
  526. DISPLAYLEVEL(1, "zstd: %s already exists; not overwritten \n",
  527. dstFileName);
  528. return NULL;
  529. }
  530. DISPLAY("zstd: %s already exists; ", dstFileName);
  531. if (UTIL_requireUserConfirmation("overwrite (y/n) ? ", "Not overwritten \n", "yY", fCtx->hasStdinInput))
  532. return NULL;
  533. }
  534. /* need to unlink */
  535. FIO_removeFile(dstFileName);
  536. }
  537. {
  538. #if defined(_WIN32)
  539. /* Windows requires opening the file as a "binary" file to avoid
  540. * mangling. This macro doesn't exist on unix. */
  541. const int openflags = O_WRONLY|O_CREAT|O_TRUNC|O_BINARY;
  542. const int fd = _open(dstFileName, openflags, mode);
  543. FILE* f = NULL;
  544. if (fd != -1) {
  545. f = _fdopen(fd, "wb");
  546. }
  547. #else
  548. const int openflags = O_WRONLY|O_CREAT|O_TRUNC;
  549. const int fd = open(dstFileName, openflags, mode);
  550. FILE* f = NULL;
  551. if (fd != -1) {
  552. f = fdopen(fd, "wb");
  553. }
  554. #endif
  555. if (f == NULL) {
  556. DISPLAYLEVEL(1, "zstd: %s: %s\n", dstFileName, strerror(errno));
  557. } else {
  558. /* An increased buffer size can provide a significant performance
  559. * boost on some platforms. Note that providing a NULL buf with a
  560. * size that's not 0 is not defined in ANSI C, but is defined in an
  561. * extension. There are three possibilities here:
  562. * 1. Libc supports the extended version and everything is good.
  563. * 2. Libc ignores the size when buf is NULL, in which case
  564. * everything will continue as if we didn't call `setvbuf()`.
  565. * 3. We fail the call and execution continues but a warning
  566. * message might be shown.
  567. * In all cases due execution continues. For now, I believe that
  568. * this is a more cost-effective solution than managing the buffers
  569. * allocations ourselves (will require an API change).
  570. */
  571. if (setvbuf(f, NULL, _IOFBF, 1 MB)) {
  572. DISPLAYLEVEL(2, "Warning: setvbuf failed for %s\n", dstFileName);
  573. }
  574. }
  575. return f;
  576. }
  577. }
  578. /* FIO_getDictFileStat() :
  579. */
  580. static void FIO_getDictFileStat(const char* fileName, stat_t* dictFileStat) {
  581. assert(dictFileStat != NULL);
  582. if (fileName == NULL) return;
  583. if (!UTIL_stat(fileName, dictFileStat)) {
  584. EXM_THROW(31, "Stat failed on dictionary file %s: %s", fileName, strerror(errno));
  585. }
  586. if (!UTIL_isRegularFileStat(dictFileStat)) {
  587. EXM_THROW(32, "Dictionary %s must be a regular file.", fileName);
  588. }
  589. }
  590. /* FIO_setDictBufferMalloc() :
  591. * allocates a buffer, pointed by `dict->dictBuffer`,
  592. * loads `filename` content into it, up to DICTSIZE_MAX bytes.
  593. * @return : loaded size
  594. * if fileName==NULL, returns 0 and a NULL pointer
  595. */
  596. static size_t FIO_setDictBufferMalloc(FIO_Dict_t* dict, const char* fileName, FIO_prefs_t* const prefs, stat_t* dictFileStat)
  597. {
  598. FILE* fileHandle;
  599. U64 fileSize;
  600. void** bufferPtr = &dict->dictBuffer;
  601. assert(bufferPtr != NULL);
  602. assert(dictFileStat != NULL);
  603. *bufferPtr = NULL;
  604. if (fileName == NULL) return 0;
  605. DISPLAYLEVEL(4,"Loading %s as dictionary \n", fileName);
  606. fileHandle = fopen(fileName, "rb");
  607. if (fileHandle == NULL) {
  608. EXM_THROW(33, "Couldn't open dictionary %s: %s", fileName, strerror(errno));
  609. }
  610. fileSize = UTIL_getFileSizeStat(dictFileStat);
  611. {
  612. size_t const dictSizeMax = prefs->patchFromMode ? prefs->memLimit : DICTSIZE_MAX;
  613. if (fileSize > dictSizeMax) {
  614. EXM_THROW(34, "Dictionary file %s is too large (> %u bytes)",
  615. fileName, (unsigned)dictSizeMax); /* avoid extreme cases */
  616. }
  617. }
  618. *bufferPtr = malloc((size_t)fileSize);
  619. if (*bufferPtr==NULL) EXM_THROW(34, "%s", strerror(errno));
  620. { size_t const readSize = fread(*bufferPtr, 1, (size_t)fileSize, fileHandle);
  621. if (readSize != fileSize) {
  622. EXM_THROW(35, "Error reading dictionary file %s : %s",
  623. fileName, strerror(errno));
  624. }
  625. }
  626. fclose(fileHandle);
  627. return (size_t)fileSize;
  628. }
  629. #if (PLATFORM_POSIX_VERSION > 0)
  630. #include <sys/mman.h>
  631. static void FIO_munmap(FIO_Dict_t* dict)
  632. {
  633. munmap(dict->dictBuffer, dict->dictBufferSize);
  634. dict->dictBuffer = NULL;
  635. dict->dictBufferSize = 0;
  636. }
  637. static size_t FIO_setDictBufferMMap(FIO_Dict_t* dict, const char* fileName, FIO_prefs_t* const prefs, stat_t* dictFileStat)
  638. {
  639. int fileHandle;
  640. U64 fileSize;
  641. void** bufferPtr = &dict->dictBuffer;
  642. assert(bufferPtr != NULL);
  643. assert(dictFileStat != NULL);
  644. *bufferPtr = NULL;
  645. if (fileName == NULL) return 0;
  646. DISPLAYLEVEL(4,"Loading %s as dictionary \n", fileName);
  647. fileHandle = open(fileName, O_RDONLY);
  648. if (fileHandle == -1) {
  649. EXM_THROW(33, "Couldn't open dictionary %s: %s", fileName, strerror(errno));
  650. }
  651. fileSize = UTIL_getFileSizeStat(dictFileStat);
  652. {
  653. size_t const dictSizeMax = prefs->patchFromMode ? prefs->memLimit : DICTSIZE_MAX;
  654. if (fileSize > dictSizeMax) {
  655. EXM_THROW(34, "Dictionary file %s is too large (> %u bytes)",
  656. fileName, (unsigned)dictSizeMax); /* avoid extreme cases */
  657. }
  658. }
  659. *bufferPtr = mmap(NULL, (size_t)fileSize, PROT_READ, MAP_PRIVATE, fileHandle, 0);
  660. if (*bufferPtr==NULL) EXM_THROW(34, "%s", strerror(errno));
  661. close(fileHandle);
  662. return (size_t)fileSize;
  663. }
  664. #elif defined(_MSC_VER) || defined(_WIN32)
  665. #include <windows.h>
  666. static void FIO_munmap(FIO_Dict_t* dict)
  667. {
  668. UnmapViewOfFile(dict->dictBuffer);
  669. CloseHandle(dict->dictHandle);
  670. dict->dictBuffer = NULL;
  671. dict->dictBufferSize = 0;
  672. }
  673. static size_t FIO_setDictBufferMMap(FIO_Dict_t* dict, const char* fileName, FIO_prefs_t* const prefs, stat_t* dictFileStat)
  674. {
  675. HANDLE fileHandle, mapping;
  676. U64 fileSize;
  677. void** bufferPtr = &dict->dictBuffer;
  678. assert(bufferPtr != NULL);
  679. assert(dictFileStat != NULL);
  680. *bufferPtr = NULL;
  681. if (fileName == NULL) return 0;
  682. DISPLAYLEVEL(4,"Loading %s as dictionary \n", fileName);
  683. fileHandle = CreateFileA(fileName, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_READONLY, NULL);
  684. if (fileHandle == INVALID_HANDLE_VALUE) {
  685. EXM_THROW(33, "Couldn't open dictionary %s: %s", fileName, strerror(errno));
  686. }
  687. fileSize = UTIL_getFileSizeStat(dictFileStat);
  688. {
  689. size_t const dictSizeMax = prefs->patchFromMode ? prefs->memLimit : DICTSIZE_MAX;
  690. if (fileSize > dictSizeMax) {
  691. EXM_THROW(34, "Dictionary file %s is too large (> %u bytes)",
  692. fileName, (unsigned)dictSizeMax); /* avoid extreme cases */
  693. }
  694. }
  695. mapping = CreateFileMapping(fileHandle, NULL, PAGE_READONLY, 0, 0, NULL);
  696. if (mapping == NULL) {
  697. EXM_THROW(35, "Couldn't map dictionary %s: %s", fileName, strerror(errno));
  698. }
  699. *bufferPtr = MapViewOfFile(mapping, FILE_MAP_READ, 0, 0, (DWORD)fileSize); /* we can only cast to DWORD here because dictSize <= 2GB */
  700. if (*bufferPtr==NULL) EXM_THROW(36, "%s", strerror(errno));
  701. dict->dictHandle = fileHandle;
  702. return (size_t)fileSize;
  703. }
  704. #else
  705. static size_t FIO_setDictBufferMMap(FIO_Dict_t* dict, const char* fileName, FIO_prefs_t* const prefs, stat_t* dictFileStat)
  706. {
  707. return FIO_setDictBufferMalloc(dict, fileName, prefs, dictFileStat);
  708. }
  709. static void FIO_munmap(FIO_Dict_t* dict) {
  710. free(dict->dictBuffer);
  711. dict->dictBuffer = NULL;
  712. dict->dictBufferSize = 0;
  713. }
  714. #endif
  715. static void FIO_freeDict(FIO_Dict_t* dict) {
  716. if (dict->dictBufferType == FIO_mallocDict) {
  717. free(dict->dictBuffer);
  718. dict->dictBuffer = NULL;
  719. dict->dictBufferSize = 0;
  720. } else if (dict->dictBufferType == FIO_mmapDict) {
  721. FIO_munmap(dict);
  722. } else {
  723. assert(0); /* Should not reach this case */
  724. }
  725. }
  726. static void FIO_initDict(FIO_Dict_t* dict, const char* fileName, FIO_prefs_t* const prefs, stat_t* dictFileStat, FIO_dictBufferType_t dictBufferType) {
  727. dict->dictBufferType = dictBufferType;
  728. if (dict->dictBufferType == FIO_mallocDict) {
  729. dict->dictBufferSize = FIO_setDictBufferMalloc(dict, fileName, prefs, dictFileStat);
  730. } else if (dict->dictBufferType == FIO_mmapDict) {
  731. dict->dictBufferSize = FIO_setDictBufferMMap(dict, fileName, prefs, dictFileStat);
  732. } else {
  733. assert(0); /* Should not reach this case */
  734. }
  735. }
  736. /* FIO_checkFilenameCollisions() :
  737. * Checks for and warns if there are any files that would have the same output path
  738. */
  739. int FIO_checkFilenameCollisions(const char** filenameTable, unsigned nbFiles) {
  740. const char **filenameTableSorted, *prevElem, *filename;
  741. unsigned u;
  742. filenameTableSorted = (const char**) malloc(sizeof(char*) * nbFiles);
  743. if (!filenameTableSorted) {
  744. DISPLAYLEVEL(1, "Allocation error during filename collision checking \n");
  745. return 1;
  746. }
  747. for (u = 0; u < nbFiles; ++u) {
  748. filename = strrchr(filenameTable[u], PATH_SEP);
  749. if (filename == NULL) {
  750. filenameTableSorted[u] = filenameTable[u];
  751. } else {
  752. filenameTableSorted[u] = filename+1;
  753. }
  754. }
  755. qsort((void*)filenameTableSorted, nbFiles, sizeof(char*), UTIL_compareStr);
  756. prevElem = filenameTableSorted[0];
  757. for (u = 1; u < nbFiles; ++u) {
  758. if (strcmp(prevElem, filenameTableSorted[u]) == 0) {
  759. DISPLAYLEVEL(2, "WARNING: Two files have same filename: %s\n", prevElem);
  760. }
  761. prevElem = filenameTableSorted[u];
  762. }
  763. free((void*)filenameTableSorted);
  764. return 0;
  765. }
  766. static const char*
  767. extractFilename(const char* path, char separator)
  768. {
  769. const char* search = strrchr(path, separator);
  770. if (search == NULL) return path;
  771. return search+1;
  772. }
  773. /* FIO_createFilename_fromOutDir() :
  774. * Takes a source file name and specified output directory, and
  775. * allocates memory for and returns a pointer to final path.
  776. * This function never returns an error (it may abort() in case of pb)
  777. */
  778. static char*
  779. FIO_createFilename_fromOutDir(const char* path, const char* outDirName, const size_t suffixLen)
  780. {
  781. const char* filenameStart;
  782. char separator;
  783. char* result;
  784. #if defined(_MSC_VER) || defined(__MINGW32__) || defined (__MSVCRT__) /* windows support */
  785. separator = '\\';
  786. #else
  787. separator = '/';
  788. #endif
  789. filenameStart = extractFilename(path, separator);
  790. #if defined(_MSC_VER) || defined(__MINGW32__) || defined (__MSVCRT__) /* windows support */
  791. filenameStart = extractFilename(filenameStart, '/'); /* sometimes, '/' separator is also used on Windows (mingw+msys2) */
  792. #endif
  793. result = (char*) calloc(1, strlen(outDirName) + 1 + strlen(filenameStart) + suffixLen + 1);
  794. if (!result) {
  795. EXM_THROW(30, "zstd: FIO_createFilename_fromOutDir: %s", strerror(errno));
  796. }
  797. memcpy(result, outDirName, strlen(outDirName));
  798. if (outDirName[strlen(outDirName)-1] == separator) {
  799. memcpy(result + strlen(outDirName), filenameStart, strlen(filenameStart));
  800. } else {
  801. memcpy(result + strlen(outDirName), &separator, 1);
  802. memcpy(result + strlen(outDirName) + 1, filenameStart, strlen(filenameStart));
  803. }
  804. return result;
  805. }
  806. /* FIO_highbit64() :
  807. * gives position of highest bit.
  808. * note : only works for v > 0 !
  809. */
  810. static unsigned FIO_highbit64(unsigned long long v)
  811. {
  812. unsigned count = 0;
  813. assert(v != 0);
  814. v >>= 1;
  815. while (v) { v >>= 1; count++; }
  816. return count;
  817. }
  818. static void FIO_adjustMemLimitForPatchFromMode(FIO_prefs_t* const prefs,
  819. unsigned long long const dictSize,
  820. unsigned long long const maxSrcFileSize)
  821. {
  822. unsigned long long maxSize = MAX(prefs->memLimit, MAX(dictSize, maxSrcFileSize));
  823. unsigned const maxWindowSize = (1U << ZSTD_WINDOWLOG_MAX);
  824. if (maxSize == UTIL_FILESIZE_UNKNOWN)
  825. EXM_THROW(42, "Using --patch-from with stdin requires --stream-size");
  826. assert(maxSize != UTIL_FILESIZE_UNKNOWN);
  827. if (maxSize > maxWindowSize)
  828. EXM_THROW(42, "Can't handle files larger than %u GB\n", maxWindowSize/(1 GB));
  829. FIO_setMemLimit(prefs, (unsigned)maxSize);
  830. }
  831. /* FIO_multiFilesConcatWarning() :
  832. * This function handles logic when processing multiple files with -o or -c, displaying the appropriate warnings/prompts.
  833. * Returns 1 if the console should abort, 0 if console should proceed.
  834. *
  835. * If output is stdout or test mode is active, check that `--rm` disabled.
  836. *
  837. * If there is just 1 file to process, zstd will proceed as usual.
  838. * If each file get processed into its own separate destination file, proceed as usual.
  839. *
  840. * When multiple files are processed into a single output,
  841. * display a warning message, then disable --rm if it's set.
  842. *
  843. * If -f is specified or if output is stdout, just proceed.
  844. * If output is set with -o, prompt for confirmation.
  845. */
  846. static int FIO_multiFilesConcatWarning(const FIO_ctx_t* fCtx, FIO_prefs_t* prefs, const char* outFileName, int displayLevelCutoff)
  847. {
  848. if (fCtx->hasStdoutOutput) {
  849. if (prefs->removeSrcFile)
  850. /* this should not happen ; hard fail, to protect user's data
  851. * note: this should rather be an assert(), but we want to be certain that user's data will not be wiped out in case it nonetheless happen */
  852. EXM_THROW(43, "It's not allowed to remove input files when processed output is piped to stdout. "
  853. "This scenario is not supposed to be possible. "
  854. "This is a programming error. File an issue for it to be fixed.");
  855. }
  856. if (prefs->testMode) {
  857. if (prefs->removeSrcFile)
  858. /* this should not happen ; hard fail, to protect user's data
  859. * note: this should rather be an assert(), but we want to be certain that user's data will not be wiped out in case it nonetheless happen */
  860. EXM_THROW(43, "Test mode shall not remove input files! "
  861. "This scenario is not supposed to be possible. "
  862. "This is a programming error. File an issue for it to be fixed.");
  863. return 0;
  864. }
  865. if (fCtx->nbFilesTotal == 1) return 0;
  866. assert(fCtx->nbFilesTotal > 1);
  867. if (!outFileName) return 0;
  868. if (fCtx->hasStdoutOutput) {
  869. DISPLAYLEVEL(2, "zstd: WARNING: all input files will be processed and concatenated into stdout. \n");
  870. } else {
  871. DISPLAYLEVEL(2, "zstd: WARNING: all input files will be processed and concatenated into a single output file: %s \n", outFileName);
  872. }
  873. DISPLAYLEVEL(2, "The concatenated output CANNOT regenerate original file names nor directory structure. \n")
  874. /* multi-input into single output : --rm is not allowed */
  875. if (prefs->removeSrcFile) {
  876. DISPLAYLEVEL(2, "Since it's a destructive operation, input files will not be removed. \n");
  877. prefs->removeSrcFile = 0;
  878. }
  879. if (fCtx->hasStdoutOutput) return 0;
  880. if (prefs->overwrite) return 0;
  881. /* multiple files concatenated into single destination file using -o without -f */
  882. if (g_display_prefs.displayLevel <= displayLevelCutoff) {
  883. /* quiet mode => no prompt => fail automatically */
  884. DISPLAYLEVEL(1, "Concatenating multiple processed inputs into a single output loses file metadata. \n");
  885. DISPLAYLEVEL(1, "Aborting. \n");
  886. return 1;
  887. }
  888. /* normal mode => prompt */
  889. return UTIL_requireUserConfirmation("Proceed? (y/n): ", "Aborting...", "yY", fCtx->hasStdinInput);
  890. }
  891. static ZSTD_inBuffer setInBuffer(const void* buf, size_t s, size_t pos)
  892. {
  893. ZSTD_inBuffer i;
  894. i.src = buf;
  895. i.size = s;
  896. i.pos = pos;
  897. return i;
  898. }
  899. static ZSTD_outBuffer setOutBuffer(void* buf, size_t s, size_t pos)
  900. {
  901. ZSTD_outBuffer o;
  902. o.dst = buf;
  903. o.size = s;
  904. o.pos = pos;
  905. return o;
  906. }
  907. #ifndef ZSTD_NOCOMPRESS
  908. /* **********************************************************************
  909. * Compression
  910. ************************************************************************/
  911. typedef struct {
  912. FIO_Dict_t dict;
  913. const char* dictFileName;
  914. stat_t dictFileStat;
  915. ZSTD_CStream* cctx;
  916. WritePoolCtx_t *writeCtx;
  917. ReadPoolCtx_t *readCtx;
  918. } cRess_t;
  919. /** ZSTD_cycleLog() :
  920. * condition for correct operation : hashLog > 1 */
  921. static U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat)
  922. {
  923. U32 const btScale = ((U32)strat >= (U32)ZSTD_btlazy2);
  924. assert(hashLog > 1);
  925. return hashLog - btScale;
  926. }
  927. static void FIO_adjustParamsForPatchFromMode(FIO_prefs_t* const prefs,
  928. ZSTD_compressionParameters* comprParams,
  929. unsigned long long const dictSize,
  930. unsigned long long const maxSrcFileSize,
  931. int cLevel)
  932. {
  933. unsigned const fileWindowLog = FIO_highbit64(maxSrcFileSize) + 1;
  934. ZSTD_compressionParameters const cParams = ZSTD_getCParams(cLevel, (size_t)maxSrcFileSize, (size_t)dictSize);
  935. FIO_adjustMemLimitForPatchFromMode(prefs, dictSize, maxSrcFileSize);
  936. if (fileWindowLog > ZSTD_WINDOWLOG_MAX)
  937. DISPLAYLEVEL(1, "Max window log exceeded by file (compression ratio will suffer)\n");
  938. comprParams->windowLog = MAX(ZSTD_WINDOWLOG_MIN, MIN(ZSTD_WINDOWLOG_MAX, fileWindowLog));
  939. if (fileWindowLog > ZSTD_cycleLog(cParams.chainLog, cParams.strategy)) {
  940. if (!prefs->ldmFlag)
  941. DISPLAYLEVEL(2, "long mode automatically triggered\n");
  942. FIO_setLdmFlag(prefs, 1);
  943. }
  944. if (cParams.strategy >= ZSTD_btopt) {
  945. DISPLAYLEVEL(3, "[Optimal parser notes] Consider the following to improve patch size at the cost of speed:\n");
  946. DISPLAYLEVEL(3, "- Use --single-thread mode in the zstd cli\n");
  947. DISPLAYLEVEL(3, "- Set a larger targetLength (e.g. --zstd=targetLength=4096)\n");
  948. DISPLAYLEVEL(3, "- Set a larger chainLog (e.g. --zstd=chainLog=%u)\n", ZSTD_CHAINLOG_MAX);
  949. DISPLAYLEVEL(3, "Also consider playing around with searchLog and hashLog\n");
  950. }
  951. }
  952. static cRess_t FIO_createCResources(FIO_prefs_t* const prefs,
  953. const char* dictFileName, unsigned long long const maxSrcFileSize,
  954. int cLevel, ZSTD_compressionParameters comprParams) {
  955. int useMMap = prefs->mmapDict == ZSTD_ps_enable;
  956. int forceNoUseMMap = prefs->mmapDict == ZSTD_ps_disable;
  957. FIO_dictBufferType_t dictBufferType;
  958. cRess_t ress;
  959. memset(&ress, 0, sizeof(ress));
  960. DISPLAYLEVEL(6, "FIO_createCResources \n");
  961. ress.cctx = ZSTD_createCCtx();
  962. if (ress.cctx == NULL)
  963. EXM_THROW(30, "allocation error (%s): can't create ZSTD_CCtx",
  964. strerror(errno));
  965. FIO_getDictFileStat(dictFileName, &ress.dictFileStat);
  966. /* need to update memLimit before calling createDictBuffer
  967. * because of memLimit check inside it */
  968. if (prefs->patchFromMode) {
  969. U64 const dictSize = UTIL_getFileSizeStat(&ress.dictFileStat);
  970. unsigned long long const ssSize = (unsigned long long)prefs->streamSrcSize;
  971. useMMap |= dictSize > prefs->memLimit;
  972. FIO_adjustParamsForPatchFromMode(prefs, &comprParams, dictSize, ssSize > 0 ? ssSize : maxSrcFileSize, cLevel);
  973. }
  974. dictBufferType = (useMMap && !forceNoUseMMap) ? FIO_mmapDict : FIO_mallocDict;
  975. FIO_initDict(&ress.dict, dictFileName, prefs, &ress.dictFileStat, dictBufferType); /* works with dictFileName==NULL */
  976. ress.writeCtx = AIO_WritePool_create(prefs, ZSTD_CStreamOutSize());
  977. ress.readCtx = AIO_ReadPool_create(prefs, ZSTD_CStreamInSize());
  978. /* Advanced parameters, including dictionary */
  979. if (dictFileName && (ress.dict.dictBuffer==NULL))
  980. EXM_THROW(32, "allocation error : can't create dictBuffer");
  981. ress.dictFileName = dictFileName;
  982. if (prefs->adaptiveMode && !prefs->ldmFlag && !comprParams.windowLog)
  983. comprParams.windowLog = ADAPT_WINDOWLOG_DEFAULT;
  984. CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_contentSizeFlag, prefs->contentSize) ); /* always enable content size when available (note: supposed to be default) */
  985. CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_dictIDFlag, prefs->dictIDFlag) );
  986. CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_checksumFlag, prefs->checksumFlag) );
  987. /* compression level */
  988. CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_compressionLevel, cLevel) );
  989. /* max compressed block size */
  990. CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_targetCBlockSize, (int)prefs->targetCBlockSize) );
  991. /* source size hint */
  992. CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_srcSizeHint, (int)prefs->srcSizeHint) );
  993. /* long distance matching */
  994. CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_enableLongDistanceMatching, prefs->ldmFlag) );
  995. CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_ldmHashLog, prefs->ldmHashLog) );
  996. CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_ldmMinMatch, prefs->ldmMinMatch) );
  997. if (prefs->ldmBucketSizeLog != FIO_LDM_PARAM_NOTSET) {
  998. CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_ldmBucketSizeLog, prefs->ldmBucketSizeLog) );
  999. }
  1000. if (prefs->ldmHashRateLog != FIO_LDM_PARAM_NOTSET) {
  1001. CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_ldmHashRateLog, prefs->ldmHashRateLog) );
  1002. }
  1003. CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_useRowMatchFinder, prefs->useRowMatchFinder));
  1004. /* compression parameters */
  1005. CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_windowLog, (int)comprParams.windowLog) );
  1006. CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_chainLog, (int)comprParams.chainLog) );
  1007. CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_hashLog, (int)comprParams.hashLog) );
  1008. CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_searchLog, (int)comprParams.searchLog) );
  1009. CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_minMatch, (int)comprParams.minMatch) );
  1010. CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_targetLength, (int)comprParams.targetLength) );
  1011. CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_strategy, (int)comprParams.strategy) );
  1012. CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_literalCompressionMode, (int)prefs->literalCompressionMode) );
  1013. CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_enableDedicatedDictSearch, 1) );
  1014. /* multi-threading */
  1015. #ifdef ZSTD_MULTITHREAD
  1016. DISPLAYLEVEL(5,"set nb workers = %u \n", prefs->nbWorkers);
  1017. CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_nbWorkers, prefs->nbWorkers) );
  1018. CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_jobSize, prefs->blockSize) );
  1019. if (prefs->overlapLog != FIO_OVERLAP_LOG_NOTSET) {
  1020. DISPLAYLEVEL(3,"set overlapLog = %u \n", prefs->overlapLog);
  1021. CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_overlapLog, prefs->overlapLog) );
  1022. }
  1023. CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_rsyncable, prefs->rsyncable) );
  1024. #endif
  1025. /* dictionary */
  1026. if (prefs->patchFromMode) {
  1027. CHECK( ZSTD_CCtx_refPrefix(ress.cctx, ress.dict.dictBuffer, ress.dict.dictBufferSize) );
  1028. } else {
  1029. CHECK( ZSTD_CCtx_loadDictionary_byReference(ress.cctx, ress.dict.dictBuffer, ress.dict.dictBufferSize) );
  1030. }
  1031. return ress;
  1032. }
  1033. static void FIO_freeCResources(cRess_t* const ress)
  1034. {
  1035. FIO_freeDict(&(ress->dict));
  1036. AIO_WritePool_free(ress->writeCtx);
  1037. AIO_ReadPool_free(ress->readCtx);
  1038. ZSTD_freeCStream(ress->cctx); /* never fails */
  1039. }
  1040. #ifdef ZSTD_GZCOMPRESS
  1041. static unsigned long long
  1042. FIO_compressGzFrame(const cRess_t* ress, /* buffers & handlers are used, but not changed */
  1043. const char* srcFileName, U64 const srcFileSize,
  1044. int compressionLevel, U64* readsize)
  1045. {
  1046. unsigned long long inFileSize = 0, outFileSize = 0;
  1047. z_stream strm;
  1048. IOJob_t *writeJob = NULL;
  1049. if (compressionLevel > Z_BEST_COMPRESSION)
  1050. compressionLevel = Z_BEST_COMPRESSION;
  1051. strm.zalloc = Z_NULL;
  1052. strm.zfree = Z_NULL;
  1053. strm.opaque = Z_NULL;
  1054. { int const ret = deflateInit2(&strm, compressionLevel, Z_DEFLATED,
  1055. 15 /* maxWindowLogSize */ + 16 /* gzip only */,
  1056. 8, Z_DEFAULT_STRATEGY); /* see https://www.zlib.net/manual.html */
  1057. if (ret != Z_OK) {
  1058. EXM_THROW(71, "zstd: %s: deflateInit2 error %d \n", srcFileName, ret);
  1059. } }
  1060. writeJob = AIO_WritePool_acquireJob(ress->writeCtx);
  1061. strm.next_in = 0;
  1062. strm.avail_in = 0;
  1063. strm.next_out = (Bytef*)writeJob->buffer;
  1064. strm.avail_out = (uInt)writeJob->bufferSize;
  1065. while (1) {
  1066. int ret;
  1067. if (strm.avail_in == 0) {
  1068. AIO_ReadPool_fillBuffer(ress->readCtx, ZSTD_CStreamInSize());
  1069. if (ress->readCtx->srcBufferLoaded == 0) break;
  1070. inFileSize += ress->readCtx->srcBufferLoaded;
  1071. strm.next_in = (z_const unsigned char*)ress->readCtx->srcBuffer;
  1072. strm.avail_in = (uInt)ress->readCtx->srcBufferLoaded;
  1073. }
  1074. {
  1075. size_t const availBefore = strm.avail_in;
  1076. ret = deflate(&strm, Z_NO_FLUSH);
  1077. AIO_ReadPool_consumeBytes(ress->readCtx, availBefore - strm.avail_in);
  1078. }
  1079. if (ret != Z_OK)
  1080. EXM_THROW(72, "zstd: %s: deflate error %d \n", srcFileName, ret);
  1081. { size_t const cSize = writeJob->bufferSize - strm.avail_out;
  1082. if (cSize) {
  1083. writeJob->usedBufferSize = cSize;
  1084. AIO_WritePool_enqueueAndReacquireWriteJob(&writeJob);
  1085. outFileSize += cSize;
  1086. strm.next_out = (Bytef*)writeJob->buffer;
  1087. strm.avail_out = (uInt)writeJob->bufferSize;
  1088. } }
  1089. if (srcFileSize == UTIL_FILESIZE_UNKNOWN) {
  1090. DISPLAYUPDATE_PROGRESS(
  1091. "\rRead : %u MB ==> %.2f%% ",
  1092. (unsigned)(inFileSize>>20),
  1093. (double)outFileSize/(double)inFileSize*100)
  1094. } else {
  1095. DISPLAYUPDATE_PROGRESS(
  1096. "\rRead : %u / %u MB ==> %.2f%% ",
  1097. (unsigned)(inFileSize>>20), (unsigned)(srcFileSize>>20),
  1098. (double)outFileSize/(double)inFileSize*100);
  1099. } }
  1100. while (1) {
  1101. int const ret = deflate(&strm, Z_FINISH);
  1102. { size_t const cSize = writeJob->bufferSize - strm.avail_out;
  1103. if (cSize) {
  1104. writeJob->usedBufferSize = cSize;
  1105. AIO_WritePool_enqueueAndReacquireWriteJob(&writeJob);
  1106. outFileSize += cSize;
  1107. strm.next_out = (Bytef*)writeJob->buffer;
  1108. strm.avail_out = (uInt)writeJob->bufferSize;
  1109. } }
  1110. if (ret == Z_STREAM_END) break;
  1111. if (ret != Z_BUF_ERROR)
  1112. EXM_THROW(77, "zstd: %s: deflate error %d \n", srcFileName, ret);
  1113. }
  1114. { int const ret = deflateEnd(&strm);
  1115. if (ret != Z_OK) {
  1116. EXM_THROW(79, "zstd: %s: deflateEnd error %d \n", srcFileName, ret);
  1117. } }
  1118. *readsize = inFileSize;
  1119. AIO_WritePool_releaseIoJob(writeJob);
  1120. AIO_WritePool_sparseWriteEnd(ress->writeCtx);
  1121. return outFileSize;
  1122. }
  1123. #endif
  1124. #ifdef ZSTD_LZMACOMPRESS
  1125. static unsigned long long
  1126. FIO_compressLzmaFrame(cRess_t* ress,
  1127. const char* srcFileName, U64 const srcFileSize,
  1128. int compressionLevel, U64* readsize, int plain_lzma)
  1129. {
  1130. unsigned long long inFileSize = 0, outFileSize = 0;
  1131. lzma_stream strm = LZMA_STREAM_INIT;
  1132. lzma_action action = LZMA_RUN;
  1133. lzma_ret ret;
  1134. IOJob_t *writeJob = NULL;
  1135. if (compressionLevel < 0) compressionLevel = 0;
  1136. if (compressionLevel > 9) compressionLevel = 9;
  1137. if (plain_lzma) {
  1138. lzma_options_lzma opt_lzma;
  1139. if (lzma_lzma_preset(&opt_lzma, compressionLevel))
  1140. EXM_THROW(81, "zstd: %s: lzma_lzma_preset error", srcFileName);
  1141. ret = lzma_alone_encoder(&strm, &opt_lzma); /* LZMA */
  1142. if (ret != LZMA_OK)
  1143. EXM_THROW(82, "zstd: %s: lzma_alone_encoder error %d", srcFileName, ret);
  1144. } else {
  1145. ret = lzma_easy_encoder(&strm, compressionLevel, LZMA_CHECK_CRC64); /* XZ */
  1146. if (ret != LZMA_OK)
  1147. EXM_THROW(83, "zstd: %s: lzma_easy_encoder error %d", srcFileName, ret);
  1148. }
  1149. writeJob =AIO_WritePool_acquireJob(ress->writeCtx);
  1150. strm.next_out = (BYTE*)writeJob->buffer;
  1151. strm.avail_out = writeJob->bufferSize;
  1152. strm.next_in = 0;
  1153. strm.avail_in = 0;
  1154. while (1) {
  1155. if (strm.avail_in == 0) {
  1156. size_t const inSize = AIO_ReadPool_fillBuffer(ress->readCtx, ZSTD_CStreamInSize());
  1157. if (ress->readCtx->srcBufferLoaded == 0) action = LZMA_FINISH;
  1158. inFileSize += inSize;
  1159. strm.next_in = (BYTE const*)ress->readCtx->srcBuffer;
  1160. strm.avail_in = ress->readCtx->srcBufferLoaded;
  1161. }
  1162. {
  1163. size_t const availBefore = strm.avail_in;
  1164. ret = lzma_code(&strm, action);
  1165. AIO_ReadPool_consumeBytes(ress->readCtx, availBefore - strm.avail_in);
  1166. }
  1167. if (ret != LZMA_OK && ret != LZMA_STREAM_END)
  1168. EXM_THROW(84, "zstd: %s: lzma_code encoding error %d", srcFileName, ret);
  1169. { size_t const compBytes = writeJob->bufferSize - strm.avail_out;
  1170. if (compBytes) {
  1171. writeJob->usedBufferSize = compBytes;
  1172. AIO_WritePool_enqueueAndReacquireWriteJob(&writeJob);
  1173. outFileSize += compBytes;
  1174. strm.next_out = (BYTE*)writeJob->buffer;
  1175. strm.avail_out = writeJob->bufferSize;
  1176. } }
  1177. if (srcFileSize == UTIL_FILESIZE_UNKNOWN)
  1178. DISPLAYUPDATE_PROGRESS("\rRead : %u MB ==> %.2f%%",
  1179. (unsigned)(inFileSize>>20),
  1180. (double)outFileSize/(double)inFileSize*100)
  1181. else
  1182. DISPLAYUPDATE_PROGRESS("\rRead : %u / %u MB ==> %.2f%%",
  1183. (unsigned)(inFileSize>>20), (unsigned)(srcFileSize>>20),
  1184. (double)outFileSize/(double)inFileSize*100);
  1185. if (ret == LZMA_STREAM_END) break;
  1186. }
  1187. lzma_end(&strm);
  1188. *readsize = inFileSize;
  1189. AIO_WritePool_releaseIoJob(writeJob);
  1190. AIO_WritePool_sparseWriteEnd(ress->writeCtx);
  1191. return outFileSize;
  1192. }
  1193. #endif
  1194. #ifdef ZSTD_LZ4COMPRESS
  1195. #if LZ4_VERSION_NUMBER <= 10600
  1196. #define LZ4F_blockLinked blockLinked
  1197. #define LZ4F_max64KB max64KB
  1198. #endif
  1199. static int FIO_LZ4_GetBlockSize_FromBlockId (int id) { return (1 << (8 + (2 * id))); }
  1200. static unsigned long long
  1201. FIO_compressLz4Frame(cRess_t* ress,
  1202. const char* srcFileName, U64 const srcFileSize,
  1203. int compressionLevel, int checksumFlag,
  1204. U64* readsize)
  1205. {
  1206. const size_t blockSize = FIO_LZ4_GetBlockSize_FromBlockId(LZ4F_max64KB);
  1207. unsigned long long inFileSize = 0, outFileSize = 0;
  1208. LZ4F_preferences_t prefs;
  1209. LZ4F_compressionContext_t ctx;
  1210. IOJob_t* writeJob = AIO_WritePool_acquireJob(ress->writeCtx);
  1211. LZ4F_errorCode_t const errorCode = LZ4F_createCompressionContext(&ctx, LZ4F_VERSION);
  1212. if (LZ4F_isError(errorCode))
  1213. EXM_THROW(31, "zstd: failed to create lz4 compression context");
  1214. memset(&prefs, 0, sizeof(prefs));
  1215. assert(blockSize <= ress->readCtx->base.jobBufferSize);
  1216. /* autoflush off to mitigate a bug in lz4<=1.9.3 for compression level 12 */
  1217. prefs.autoFlush = 0;
  1218. prefs.compressionLevel = compressionLevel;
  1219. prefs.frameInfo.blockMode = LZ4F_blockLinked;
  1220. prefs.frameInfo.blockSizeID = LZ4F_max64KB;
  1221. prefs.frameInfo.contentChecksumFlag = (contentChecksum_t)checksumFlag;
  1222. #if LZ4_VERSION_NUMBER >= 10600
  1223. prefs.frameInfo.contentSize = (srcFileSize==UTIL_FILESIZE_UNKNOWN) ? 0 : srcFileSize;
  1224. #endif
  1225. assert(LZ4F_compressBound(blockSize, &prefs) <= writeJob->bufferSize);
  1226. {
  1227. size_t headerSize = LZ4F_compressBegin(ctx, writeJob->buffer, writeJob->bufferSize, &prefs);
  1228. if (LZ4F_isError(headerSize))
  1229. EXM_THROW(33, "File header generation failed : %s",
  1230. LZ4F_getErrorName(headerSize));
  1231. writeJob->usedBufferSize = headerSize;
  1232. AIO_WritePool_enqueueAndReacquireWriteJob(&writeJob);
  1233. outFileSize += headerSize;
  1234. /* Read first block */
  1235. inFileSize += AIO_ReadPool_fillBuffer(ress->readCtx, blockSize);
  1236. /* Main Loop */
  1237. while (ress->readCtx->srcBufferLoaded) {
  1238. size_t inSize = MIN(blockSize, ress->readCtx->srcBufferLoaded);
  1239. size_t const outSize = LZ4F_compressUpdate(ctx, writeJob->buffer, writeJob->bufferSize,
  1240. ress->readCtx->srcBuffer, inSize, NULL);
  1241. if (LZ4F_isError(outSize))
  1242. EXM_THROW(35, "zstd: %s: lz4 compression failed : %s",
  1243. srcFileName, LZ4F_getErrorName(outSize));
  1244. outFileSize += outSize;
  1245. if (srcFileSize == UTIL_FILESIZE_UNKNOWN) {
  1246. DISPLAYUPDATE_PROGRESS("\rRead : %u MB ==> %.2f%%",
  1247. (unsigned)(inFileSize>>20),
  1248. (double)outFileSize/(double)inFileSize*100)
  1249. } else {
  1250. DISPLAYUPDATE_PROGRESS("\rRead : %u / %u MB ==> %.2f%%",
  1251. (unsigned)(inFileSize>>20), (unsigned)(srcFileSize>>20),
  1252. (double)outFileSize/(double)inFileSize*100);
  1253. }
  1254. /* Write Block */
  1255. writeJob->usedBufferSize = outSize;
  1256. AIO_WritePool_enqueueAndReacquireWriteJob(&writeJob);
  1257. /* Read next block */
  1258. AIO_ReadPool_consumeBytes(ress->readCtx, inSize);
  1259. inFileSize += AIO_ReadPool_fillBuffer(ress->readCtx, blockSize);
  1260. }
  1261. /* End of Stream mark */
  1262. headerSize = LZ4F_compressEnd(ctx, writeJob->buffer, writeJob->bufferSize, NULL);
  1263. if (LZ4F_isError(headerSize))
  1264. EXM_THROW(38, "zstd: %s: lz4 end of file generation failed : %s",
  1265. srcFileName, LZ4F_getErrorName(headerSize));
  1266. writeJob->usedBufferSize = headerSize;
  1267. AIO_WritePool_enqueueAndReacquireWriteJob(&writeJob);
  1268. outFileSize += headerSize;
  1269. }
  1270. *readsize = inFileSize;
  1271. LZ4F_freeCompressionContext(ctx);
  1272. AIO_WritePool_releaseIoJob(writeJob);
  1273. AIO_WritePool_sparseWriteEnd(ress->writeCtx);
  1274. return outFileSize;
  1275. }
  1276. #endif
  1277. static unsigned long long
  1278. FIO_compressZstdFrame(FIO_ctx_t* const fCtx,
  1279. FIO_prefs_t* const prefs,
  1280. const cRess_t* ressPtr,
  1281. const char* srcFileName, U64 fileSize,
  1282. int compressionLevel, U64* readsize)
  1283. {
  1284. cRess_t const ress = *ressPtr;
  1285. IOJob_t *writeJob = AIO_WritePool_acquireJob(ressPtr->writeCtx);
  1286. U64 compressedfilesize = 0;
  1287. ZSTD_EndDirective directive = ZSTD_e_continue;
  1288. U64 pledgedSrcSize = ZSTD_CONTENTSIZE_UNKNOWN;
  1289. /* stats */
  1290. ZSTD_frameProgression previous_zfp_update = { 0, 0, 0, 0, 0, 0 };
  1291. ZSTD_frameProgression previous_zfp_correction = { 0, 0, 0, 0, 0, 0 };
  1292. typedef enum { noChange, slower, faster } speedChange_e;
  1293. speedChange_e speedChange = noChange;
  1294. unsigned flushWaiting = 0;
  1295. unsigned inputPresented = 0;
  1296. unsigned inputBlocked = 0;
  1297. unsigned lastJobID = 0;
  1298. UTIL_time_t lastAdaptTime = UTIL_getTime();
  1299. U64 const adaptEveryMicro = REFRESH_RATE;
  1300. UTIL_HumanReadableSize_t const file_hrs = UTIL_makeHumanReadableSize(fileSize);
  1301. DISPLAYLEVEL(6, "compression using zstd format \n");
  1302. /* init */
  1303. if (fileSize != UTIL_FILESIZE_UNKNOWN) {
  1304. pledgedSrcSize = fileSize;
  1305. CHECK(ZSTD_CCtx_setPledgedSrcSize(ress.cctx, fileSize));
  1306. } else if (prefs->streamSrcSize > 0) {
  1307. /* unknown source size; use the declared stream size */
  1308. pledgedSrcSize = prefs->streamSrcSize;
  1309. CHECK( ZSTD_CCtx_setPledgedSrcSize(ress.cctx, prefs->streamSrcSize) );
  1310. }
  1311. {
  1312. int windowLog;
  1313. UTIL_HumanReadableSize_t windowSize;
  1314. CHECK(ZSTD_CCtx_getParameter(ress.cctx, ZSTD_c_windowLog, &windowLog));
  1315. if (windowLog == 0) {
  1316. if (prefs->ldmFlag) {
  1317. /* If long mode is set without a window size libzstd will set this size internally */
  1318. windowLog = ZSTD_WINDOWLOG_LIMIT_DEFAULT;
  1319. } else {
  1320. const ZSTD_compressionParameters cParams = ZSTD_getCParams(compressionLevel, fileSize, 0);
  1321. windowLog = (int)cParams.windowLog;
  1322. }
  1323. }
  1324. windowSize = UTIL_makeHumanReadableSize(MAX(1ULL, MIN(1ULL << windowLog, pledgedSrcSize)));
  1325. DISPLAYLEVEL(4, "Decompression will require %.*f%s of memory\n", windowSize.precision, windowSize.value, windowSize.suffix);
  1326. }
  1327. (void)srcFileName;
  1328. /* Main compression loop */
  1329. do {
  1330. size_t stillToFlush;
  1331. /* Fill input Buffer */
  1332. size_t const inSize = AIO_ReadPool_fillBuffer(ress.readCtx, ZSTD_CStreamInSize());
  1333. ZSTD_inBuffer inBuff = setInBuffer( ress.readCtx->srcBuffer, ress.readCtx->srcBufferLoaded, 0 );
  1334. DISPLAYLEVEL(6, "fread %u bytes from source \n", (unsigned)inSize);
  1335. *readsize += inSize;
  1336. if ((ress.readCtx->srcBufferLoaded == 0) || (*readsize == fileSize))
  1337. directive = ZSTD_e_end;
  1338. stillToFlush = 1;
  1339. while ((inBuff.pos != inBuff.size) /* input buffer must be entirely ingested */
  1340. || (directive == ZSTD_e_end && stillToFlush != 0) ) {
  1341. size_t const oldIPos = inBuff.pos;
  1342. ZSTD_outBuffer outBuff = setOutBuffer( writeJob->buffer, writeJob->bufferSize, 0 );
  1343. size_t const toFlushNow = ZSTD_toFlushNow(ress.cctx);
  1344. CHECK_V(stillToFlush, ZSTD_compressStream2(ress.cctx, &outBuff, &inBuff, directive));
  1345. AIO_ReadPool_consumeBytes(ress.readCtx, inBuff.pos - oldIPos);
  1346. /* count stats */
  1347. inputPresented++;
  1348. if (oldIPos == inBuff.pos) inputBlocked++; /* input buffer is full and can't take any more : input speed is faster than consumption rate */
  1349. if (!toFlushNow) flushWaiting = 1;
  1350. /* Write compressed stream */
  1351. DISPLAYLEVEL(6, "ZSTD_compress_generic(end:%u) => input pos(%u)<=(%u)size ; output generated %u bytes \n",
  1352. (unsigned)directive, (unsigned)inBuff.pos, (unsigned)inBuff.size, (unsigned)outBuff.pos);
  1353. if (outBuff.pos) {
  1354. writeJob->usedBufferSize = outBuff.pos;
  1355. AIO_WritePool_enqueueAndReacquireWriteJob(&writeJob);
  1356. compressedfilesize += outBuff.pos;
  1357. }
  1358. /* adaptive mode : statistics measurement and speed correction */
  1359. if (prefs->adaptiveMode && UTIL_clockSpanMicro(lastAdaptTime) > adaptEveryMicro) {
  1360. ZSTD_frameProgression const zfp = ZSTD_getFrameProgression(ress.cctx);
  1361. lastAdaptTime = UTIL_getTime();
  1362. /* check output speed */
  1363. if (zfp.currentJobID > 1) { /* only possible if nbWorkers >= 1 */
  1364. unsigned long long newlyProduced = zfp.produced - previous_zfp_update.produced;
  1365. unsigned long long newlyFlushed = zfp.flushed - previous_zfp_update.flushed;
  1366. assert(zfp.produced >= previous_zfp_update.produced);
  1367. assert(prefs->nbWorkers >= 1);
  1368. /* test if compression is blocked
  1369. * either because output is slow and all buffers are full
  1370. * or because input is slow and no job can start while waiting for at least one buffer to be filled.
  1371. * note : exclude starting part, since currentJobID > 1 */
  1372. if ( (zfp.consumed == previous_zfp_update.consumed) /* no data compressed : no data available, or no more buffer to compress to, OR compression is really slow (compression of a single block is slower than update rate)*/
  1373. && (zfp.nbActiveWorkers == 0) /* confirmed : no compression ongoing */
  1374. ) {
  1375. DISPLAYLEVEL(6, "all buffers full : compression stopped => slow down \n")
  1376. speedChange = slower;
  1377. }
  1378. previous_zfp_update = zfp;
  1379. if ( (newlyProduced > (newlyFlushed * 9 / 8)) /* compression produces more data than output can flush (though production can be spiky, due to work unit : (N==4)*block sizes) */
  1380. && (flushWaiting == 0) /* flush speed was never slowed by lack of production, so it's operating at max capacity */
  1381. ) {
  1382. DISPLAYLEVEL(6, "compression faster than flush (%llu > %llu), and flushed was never slowed down by lack of production => slow down \n", newlyProduced, newlyFlushed);
  1383. speedChange = slower;
  1384. }
  1385. flushWaiting = 0;
  1386. }
  1387. /* course correct only if there is at least one new job completed */
  1388. if (zfp.currentJobID > lastJobID) {
  1389. DISPLAYLEVEL(6, "compression level adaptation check \n")
  1390. /* check input speed */
  1391. if (zfp.currentJobID > (unsigned)(prefs->nbWorkers+1)) { /* warm up period, to fill all workers */
  1392. if (inputBlocked <= 0) {
  1393. DISPLAYLEVEL(6, "input is never blocked => input is slower than ingestion \n");
  1394. speedChange = slower;
  1395. } else if (speedChange == noChange) {
  1396. unsigned long long newlyIngested = zfp.ingested - previous_zfp_correction.ingested;
  1397. unsigned long long newlyConsumed = zfp.consumed - previous_zfp_correction.consumed;
  1398. unsigned long long newlyProduced = zfp.produced - previous_zfp_correction.produced;
  1399. unsigned long long newlyFlushed = zfp.flushed - previous_zfp_correction.flushed;
  1400. previous_zfp_correction = zfp;
  1401. assert(inputPresented > 0);
  1402. DISPLAYLEVEL(6, "input blocked %u/%u(%.2f) - ingested:%u vs %u:consumed - flushed:%u vs %u:produced \n",
  1403. inputBlocked, inputPresented, (double)inputBlocked/inputPresented*100,
  1404. (unsigned)newlyIngested, (unsigned)newlyConsumed,
  1405. (unsigned)newlyFlushed, (unsigned)newlyProduced);
  1406. if ( (inputBlocked > inputPresented / 8) /* input is waiting often, because input buffers is full : compression or output too slow */
  1407. && (newlyFlushed * 33 / 32 > newlyProduced) /* flush everything that is produced */
  1408. && (newlyIngested * 33 / 32 > newlyConsumed) /* input speed as fast or faster than compression speed */
  1409. ) {
  1410. DISPLAYLEVEL(6, "recommend faster as in(%llu) >= (%llu)comp(%llu) <= out(%llu) \n",
  1411. newlyIngested, newlyConsumed, newlyProduced, newlyFlushed);
  1412. speedChange = faster;
  1413. }
  1414. }
  1415. inputBlocked = 0;
  1416. inputPresented = 0;
  1417. }
  1418. if (speedChange == slower) {
  1419. DISPLAYLEVEL(6, "slower speed , higher compression \n")
  1420. compressionLevel ++;
  1421. if (compressionLevel > ZSTD_maxCLevel()) compressionLevel = ZSTD_maxCLevel();
  1422. if (compressionLevel > prefs->maxAdaptLevel) compressionLevel = prefs->maxAdaptLevel;
  1423. compressionLevel += (compressionLevel == 0); /* skip 0 */
  1424. ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_compressionLevel, compressionLevel);
  1425. }
  1426. if (speedChange == faster) {
  1427. DISPLAYLEVEL(6, "faster speed , lighter compression \n")
  1428. compressionLevel --;
  1429. if (compressionLevel < prefs->minAdaptLevel) compressionLevel = prefs->minAdaptLevel;
  1430. compressionLevel -= (compressionLevel == 0); /* skip 0 */
  1431. ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_compressionLevel, compressionLevel);
  1432. }
  1433. speedChange = noChange;
  1434. lastJobID = zfp.currentJobID;
  1435. } /* if (zfp.currentJobID > lastJobID) */
  1436. } /* if (prefs->adaptiveMode && UTIL_clockSpanMicro(lastAdaptTime) > adaptEveryMicro) */
  1437. /* display notification */
  1438. if (SHOULD_DISPLAY_PROGRESS() && READY_FOR_UPDATE()) {
  1439. ZSTD_frameProgression const zfp = ZSTD_getFrameProgression(ress.cctx);
  1440. double const cShare = (double)zfp.produced / (double)(zfp.consumed + !zfp.consumed/*avoid div0*/) * 100;
  1441. UTIL_HumanReadableSize_t const buffered_hrs = UTIL_makeHumanReadableSize(zfp.ingested - zfp.consumed);
  1442. UTIL_HumanReadableSize_t const consumed_hrs = UTIL_makeHumanReadableSize(zfp.consumed);
  1443. UTIL_HumanReadableSize_t const produced_hrs = UTIL_makeHumanReadableSize(zfp.produced);
  1444. DELAY_NEXT_UPDATE();
  1445. /* display progress notifications */
  1446. DISPLAY_PROGRESS("\r%79s\r", ""); /* Clear out the current displayed line */
  1447. if (g_display_prefs.displayLevel >= 3) {
  1448. /* Verbose progress update */
  1449. DISPLAY_PROGRESS(
  1450. "(L%i) Buffered:%5.*f%s - Consumed:%5.*f%s - Compressed:%5.*f%s => %.2f%% ",
  1451. compressionLevel,
  1452. buffered_hrs.precision, buffered_hrs.value, buffered_hrs.suffix,
  1453. consumed_hrs.precision, consumed_hrs.value, consumed_hrs.suffix,
  1454. produced_hrs.precision, produced_hrs.value, produced_hrs.suffix,
  1455. cShare );
  1456. } else {
  1457. /* Require level 2 or forcibly displayed progress counter for summarized updates */
  1458. if (fCtx->nbFilesTotal > 1) {
  1459. size_t srcFileNameSize = strlen(srcFileName);
  1460. /* Ensure that the string we print is roughly the same size each time */
  1461. if (srcFileNameSize > 18) {
  1462. const char* truncatedSrcFileName = srcFileName + srcFileNameSize - 15;
  1463. DISPLAY_PROGRESS("Compress: %u/%u files. Current: ...%s ",
  1464. fCtx->currFileIdx+1, fCtx->nbFilesTotal, truncatedSrcFileName);
  1465. } else {
  1466. DISPLAY_PROGRESS("Compress: %u/%u files. Current: %*s ",
  1467. fCtx->currFileIdx+1, fCtx->nbFilesTotal, (int)(18-srcFileNameSize), srcFileName);
  1468. }
  1469. }
  1470. DISPLAY_PROGRESS("Read:%6.*f%4s ", consumed_hrs.precision, consumed_hrs.value, consumed_hrs.suffix);
  1471. if (fileSize != UTIL_FILESIZE_UNKNOWN)
  1472. DISPLAY_PROGRESS("/%6.*f%4s", file_hrs.precision, file_hrs.value, file_hrs.suffix);
  1473. DISPLAY_PROGRESS(" ==> %2.f%%", cShare);
  1474. }
  1475. } /* if (SHOULD_DISPLAY_PROGRESS() && READY_FOR_UPDATE()) */
  1476. } /* while ((inBuff.pos != inBuff.size) */
  1477. } while (directive != ZSTD_e_end);
  1478. if (fileSize != UTIL_FILESIZE_UNKNOWN && *readsize != fileSize) {
  1479. EXM_THROW(27, "Read error : Incomplete read : %llu / %llu B",
  1480. (unsigned long long)*readsize, (unsigned long long)fileSize);
  1481. }
  1482. AIO_WritePool_releaseIoJob(writeJob);
  1483. AIO_WritePool_sparseWriteEnd(ressPtr->writeCtx);
  1484. return compressedfilesize;
  1485. }
  1486. /*! FIO_compressFilename_internal() :
  1487. * same as FIO_compressFilename_extRess(), with `ress.desFile` already opened.
  1488. * @return : 0 : compression completed correctly,
  1489. * 1 : missing or pb opening srcFileName
  1490. */
  1491. static int
  1492. FIO_compressFilename_internal(FIO_ctx_t* const fCtx,
  1493. FIO_prefs_t* const prefs,
  1494. cRess_t ress,
  1495. const char* dstFileName, const char* srcFileName,
  1496. int compressionLevel)
  1497. {
  1498. UTIL_time_t const timeStart = UTIL_getTime();
  1499. clock_t const cpuStart = clock();
  1500. U64 readsize = 0;
  1501. U64 compressedfilesize = 0;
  1502. U64 const fileSize = UTIL_getFileSize(srcFileName);
  1503. DISPLAYLEVEL(5, "%s: %llu bytes \n", srcFileName, (unsigned long long)fileSize);
  1504. /* compression format selection */
  1505. switch (prefs->compressionType) {
  1506. default:
  1507. case FIO_zstdCompression:
  1508. compressedfilesize = FIO_compressZstdFrame(fCtx, prefs, &ress, srcFileName, fileSize, compressionLevel, &readsize);
  1509. break;
  1510. case FIO_gzipCompression:
  1511. #ifdef ZSTD_GZCOMPRESS
  1512. compressedfilesize = FIO_compressGzFrame(&ress, srcFileName, fileSize, compressionLevel, &readsize);
  1513. #else
  1514. (void)compressionLevel;
  1515. EXM_THROW(20, "zstd: %s: file cannot be compressed as gzip (zstd compiled without ZSTD_GZCOMPRESS) -- ignored \n",
  1516. srcFileName);
  1517. #endif
  1518. break;
  1519. case FIO_xzCompression:
  1520. case FIO_lzmaCompression:
  1521. #ifdef ZSTD_LZMACOMPRESS
  1522. compressedfilesize = FIO_compressLzmaFrame(&ress, srcFileName, fileSize, compressionLevel, &readsize, prefs->compressionType==FIO_lzmaCompression);
  1523. #else
  1524. (void)compressionLevel;
  1525. EXM_THROW(20, "zstd: %s: file cannot be compressed as xz/lzma (zstd compiled without ZSTD_LZMACOMPRESS) -- ignored \n",
  1526. srcFileName);
  1527. #endif
  1528. break;
  1529. case FIO_lz4Compression:
  1530. #ifdef ZSTD_LZ4COMPRESS
  1531. compressedfilesize = FIO_compressLz4Frame(&ress, srcFileName, fileSize, compressionLevel, prefs->checksumFlag, &readsize);
  1532. #else
  1533. (void)compressionLevel;
  1534. EXM_THROW(20, "zstd: %s: file cannot be compressed as lz4 (zstd compiled without ZSTD_LZ4COMPRESS) -- ignored \n",
  1535. srcFileName);
  1536. #endif
  1537. break;
  1538. }
  1539. /* Status */
  1540. fCtx->totalBytesInput += (size_t)readsize;
  1541. fCtx->totalBytesOutput += (size_t)compressedfilesize;
  1542. DISPLAY_PROGRESS("\r%79s\r", "");
  1543. if (FIO_shouldDisplayFileSummary(fCtx)) {
  1544. UTIL_HumanReadableSize_t hr_isize = UTIL_makeHumanReadableSize((U64) readsize);
  1545. UTIL_HumanReadableSize_t hr_osize = UTIL_makeHumanReadableSize((U64) compressedfilesize);
  1546. if (readsize == 0) {
  1547. DISPLAY_SUMMARY("%-20s : (%6.*f%s => %6.*f%s, %s) \n",
  1548. srcFileName,
  1549. hr_isize.precision, hr_isize.value, hr_isize.suffix,
  1550. hr_osize.precision, hr_osize.value, hr_osize.suffix,
  1551. dstFileName);
  1552. } else {
  1553. DISPLAY_SUMMARY("%-20s :%6.2f%% (%6.*f%s => %6.*f%s, %s) \n",
  1554. srcFileName,
  1555. (double)compressedfilesize / (double)readsize * 100,
  1556. hr_isize.precision, hr_isize.value, hr_isize.suffix,
  1557. hr_osize.precision, hr_osize.value, hr_osize.suffix,
  1558. dstFileName);
  1559. }
  1560. }
  1561. /* Elapsed Time and CPU Load */
  1562. { clock_t const cpuEnd = clock();
  1563. double const cpuLoad_s = (double)(cpuEnd - cpuStart) / CLOCKS_PER_SEC;
  1564. U64 const timeLength_ns = UTIL_clockSpanNano(timeStart);
  1565. double const timeLength_s = (double)timeLength_ns / 1000000000;
  1566. double const cpuLoad_pct = (cpuLoad_s / timeLength_s) * 100;
  1567. DISPLAYLEVEL(4, "%-20s : Completed in %.2f sec (cpu load : %.0f%%)\n",
  1568. srcFileName, timeLength_s, cpuLoad_pct);
  1569. }
  1570. return 0;
  1571. }
  1572. /*! FIO_compressFilename_dstFile() :
  1573. * open dstFileName, or pass-through if ress.file != NULL,
  1574. * then start compression with FIO_compressFilename_internal().
  1575. * Manages source removal (--rm) and file permissions transfer.
  1576. * note : ress.srcFile must be != NULL,
  1577. * so reach this function through FIO_compressFilename_srcFile().
  1578. * @return : 0 : compression completed correctly,
  1579. * 1 : pb
  1580. */
  1581. static int FIO_compressFilename_dstFile(FIO_ctx_t* const fCtx,
  1582. FIO_prefs_t* const prefs,
  1583. cRess_t ress,
  1584. const char* dstFileName,
  1585. const char* srcFileName,
  1586. const stat_t* srcFileStat,
  1587. int compressionLevel)
  1588. {
  1589. int closeDstFile = 0;
  1590. int result;
  1591. int transferStat = 0;
  1592. int dstFd = -1;
  1593. assert(AIO_ReadPool_getFile(ress.readCtx) != NULL);
  1594. if (AIO_WritePool_getFile(ress.writeCtx) == NULL) {
  1595. int dstFileInitialPermissions = DEFAULT_FILE_PERMISSIONS;
  1596. if ( strcmp (srcFileName, stdinmark)
  1597. && strcmp (dstFileName, stdoutmark)
  1598. && UTIL_isRegularFileStat(srcFileStat) ) {
  1599. transferStat = 1;
  1600. dstFileInitialPermissions = TEMPORARY_FILE_PERMISSIONS;
  1601. }
  1602. closeDstFile = 1;
  1603. DISPLAYLEVEL(6, "FIO_compressFilename_dstFile: opening dst: %s \n", dstFileName);
  1604. { FILE *dstFile = FIO_openDstFile(fCtx, prefs, srcFileName, dstFileName, dstFileInitialPermissions);
  1605. if (dstFile==NULL) return 1; /* could not open dstFileName */
  1606. dstFd = fileno(dstFile);
  1607. AIO_WritePool_setFile(ress.writeCtx, dstFile);
  1608. }
  1609. /* Must only be added after FIO_openDstFile() succeeds.
  1610. * Otherwise we may delete the destination file if it already exists,
  1611. * and the user presses Ctrl-C when asked if they wish to overwrite.
  1612. */
  1613. addHandler(dstFileName);
  1614. }
  1615. result = FIO_compressFilename_internal(fCtx, prefs, ress, dstFileName, srcFileName, compressionLevel);
  1616. if (closeDstFile) {
  1617. clearHandler();
  1618. if (transferStat) {
  1619. UTIL_setFDStat(dstFd, dstFileName, srcFileStat);
  1620. }
  1621. DISPLAYLEVEL(6, "FIO_compressFilename_dstFile: closing dst: %s \n", dstFileName);
  1622. if (AIO_WritePool_closeFile(ress.writeCtx)) { /* error closing file */
  1623. DISPLAYLEVEL(1, "zstd: %s: %s \n", dstFileName, strerror(errno));
  1624. result=1;
  1625. }
  1626. if (transferStat) {
  1627. UTIL_utime(dstFileName, srcFileStat);
  1628. }
  1629. if ( (result != 0) /* operation failure */
  1630. && strcmp(dstFileName, stdoutmark) /* special case : don't remove() stdout */
  1631. ) {
  1632. FIO_removeFile(dstFileName); /* remove compression artefact; note don't do anything special if remove() fails */
  1633. }
  1634. }
  1635. return result;
  1636. }
  1637. /* List used to compare file extensions (used with --exclude-compressed flag)
  1638. * Different from the suffixList and should only apply to ZSTD compress operationResult
  1639. */
  1640. static const char *compressedFileExtensions[] = {
  1641. ZSTD_EXTENSION,
  1642. TZSTD_EXTENSION,
  1643. GZ_EXTENSION,
  1644. TGZ_EXTENSION,
  1645. LZMA_EXTENSION,
  1646. XZ_EXTENSION,
  1647. TXZ_EXTENSION,
  1648. LZ4_EXTENSION,
  1649. TLZ4_EXTENSION,
  1650. ".7z",
  1651. ".aa3",
  1652. ".aac",
  1653. ".aar",
  1654. ".ace",
  1655. ".alac",
  1656. ".ape",
  1657. ".apk",
  1658. ".apng",
  1659. ".arc",
  1660. ".archive",
  1661. ".arj",
  1662. ".ark",
  1663. ".asf",
  1664. ".avi",
  1665. ".avif",
  1666. ".ba",
  1667. ".br",
  1668. ".bz2",
  1669. ".cab",
  1670. ".cdx",
  1671. ".chm",
  1672. ".cr2",
  1673. ".divx",
  1674. ".dmg",
  1675. ".dng",
  1676. ".docm",
  1677. ".docx",
  1678. ".dotm",
  1679. ".dotx",
  1680. ".dsft",
  1681. ".ear",
  1682. ".eftx",
  1683. ".emz",
  1684. ".eot",
  1685. ".epub",
  1686. ".f4v",
  1687. ".flac",
  1688. ".flv",
  1689. ".gho",
  1690. ".gif",
  1691. ".gifv",
  1692. ".gnp",
  1693. ".iso",
  1694. ".jar",
  1695. ".jpeg",
  1696. ".jpg",
  1697. ".jxl",
  1698. ".lz",
  1699. ".lzh",
  1700. ".m4a",
  1701. ".m4v",
  1702. ".mkv",
  1703. ".mov",
  1704. ".mp2",
  1705. ".mp3",
  1706. ".mp4",
  1707. ".mpa",
  1708. ".mpc",
  1709. ".mpe",
  1710. ".mpeg",
  1711. ".mpg",
  1712. ".mpl",
  1713. ".mpv",
  1714. ".msi",
  1715. ".odp",
  1716. ".ods",
  1717. ".odt",
  1718. ".ogg",
  1719. ".ogv",
  1720. ".otp",
  1721. ".ots",
  1722. ".ott",
  1723. ".pea",
  1724. ".png",
  1725. ".pptx",
  1726. ".qt",
  1727. ".rar",
  1728. ".s7z",
  1729. ".sfx",
  1730. ".sit",
  1731. ".sitx",
  1732. ".sqx",
  1733. ".svgz",
  1734. ".swf",
  1735. ".tbz2",
  1736. ".tib",
  1737. ".tlz",
  1738. ".vob",
  1739. ".war",
  1740. ".webm",
  1741. ".webp",
  1742. ".wma",
  1743. ".wmv",
  1744. ".woff",
  1745. ".woff2",
  1746. ".wvl",
  1747. ".xlsx",
  1748. ".xpi",
  1749. ".xps",
  1750. ".zip",
  1751. ".zipx",
  1752. ".zoo",
  1753. ".zpaq",
  1754. NULL
  1755. };
  1756. /*! FIO_compressFilename_srcFile() :
  1757. * @return : 0 : compression completed correctly,
  1758. * 1 : missing or pb opening srcFileName
  1759. */
  1760. static int
  1761. FIO_compressFilename_srcFile(FIO_ctx_t* const fCtx,
  1762. FIO_prefs_t* const prefs,
  1763. cRess_t ress,
  1764. const char* dstFileName,
  1765. const char* srcFileName,
  1766. int compressionLevel)
  1767. {
  1768. int result;
  1769. FILE* srcFile;
  1770. stat_t srcFileStat;
  1771. U64 fileSize = UTIL_FILESIZE_UNKNOWN;
  1772. DISPLAYLEVEL(6, "FIO_compressFilename_srcFile: %s \n", srcFileName);
  1773. if (strcmp(srcFileName, stdinmark)) {
  1774. if (UTIL_stat(srcFileName, &srcFileStat)) {
  1775. /* failure to stat at all is handled during opening */
  1776. /* ensure src is not a directory */
  1777. if (UTIL_isDirectoryStat(&srcFileStat)) {
  1778. DISPLAYLEVEL(1, "zstd: %s is a directory -- ignored \n", srcFileName);
  1779. return 1;
  1780. }
  1781. /* ensure src is not the same as dict (if present) */
  1782. if (ress.dictFileName != NULL && UTIL_isSameFileStat(srcFileName, ress.dictFileName, &srcFileStat, &ress.dictFileStat)) {
  1783. DISPLAYLEVEL(1, "zstd: cannot use %s as an input file and dictionary \n", srcFileName);
  1784. return 1;
  1785. }
  1786. }
  1787. }
  1788. /* Check if "srcFile" is compressed. Only done if --exclude-compressed flag is used
  1789. * YES => ZSTD will skip compression of the file and will return 0.
  1790. * NO => ZSTD will resume with compress operation.
  1791. */
  1792. if (prefs->excludeCompressedFiles == 1 && UTIL_isCompressedFile(srcFileName, compressedFileExtensions)) {
  1793. DISPLAYLEVEL(4, "File is already compressed : %s \n", srcFileName);
  1794. return 0;
  1795. }
  1796. srcFile = FIO_openSrcFile(prefs, srcFileName, &srcFileStat);
  1797. if (srcFile == NULL) return 1; /* srcFile could not be opened */
  1798. /* Don't use AsyncIO for small files */
  1799. if (strcmp(srcFileName, stdinmark)) /* Stdin doesn't have stats */
  1800. fileSize = UTIL_getFileSizeStat(&srcFileStat);
  1801. if(fileSize != UTIL_FILESIZE_UNKNOWN && fileSize < ZSTD_BLOCKSIZE_MAX * 3) {
  1802. AIO_ReadPool_setAsync(ress.readCtx, 0);
  1803. AIO_WritePool_setAsync(ress.writeCtx, 0);
  1804. } else {
  1805. AIO_ReadPool_setAsync(ress.readCtx, 1);
  1806. AIO_WritePool_setAsync(ress.writeCtx, 1);
  1807. }
  1808. AIO_ReadPool_setFile(ress.readCtx, srcFile);
  1809. result = FIO_compressFilename_dstFile(
  1810. fCtx, prefs, ress,
  1811. dstFileName, srcFileName,
  1812. &srcFileStat, compressionLevel);
  1813. AIO_ReadPool_closeFile(ress.readCtx);
  1814. if ( prefs->removeSrcFile /* --rm */
  1815. && result == 0 /* success */
  1816. && strcmp(srcFileName, stdinmark) /* exception : don't erase stdin */
  1817. ) {
  1818. /* We must clear the handler, since after this point calling it would
  1819. * delete both the source and destination files.
  1820. */
  1821. clearHandler();
  1822. if (FIO_removeFile(srcFileName))
  1823. EXM_THROW(1, "zstd: %s: %s", srcFileName, strerror(errno));
  1824. }
  1825. return result;
  1826. }
  1827. static const char*
  1828. checked_index(const char* options[], size_t length, size_t index) {
  1829. assert(index < length);
  1830. /* Necessary to avoid warnings since -O3 will omit the above `assert` */
  1831. (void) length;
  1832. return options[index];
  1833. }
  1834. #define INDEX(options, index) checked_index((options), sizeof(options) / sizeof(char*), (size_t)(index))
  1835. void FIO_displayCompressionParameters(const FIO_prefs_t* prefs)
  1836. {
  1837. static const char* formatOptions[5] = {ZSTD_EXTENSION, GZ_EXTENSION, XZ_EXTENSION,
  1838. LZMA_EXTENSION, LZ4_EXTENSION};
  1839. static const char* sparseOptions[3] = {" --no-sparse", "", " --sparse"};
  1840. static const char* checkSumOptions[3] = {" --no-check", "", " --check"};
  1841. static const char* rowMatchFinderOptions[3] = {"", " --no-row-match-finder", " --row-match-finder"};
  1842. static const char* compressLiteralsOptions[3] = {"", " --compress-literals", " --no-compress-literals"};
  1843. assert(g_display_prefs.displayLevel >= 4);
  1844. DISPLAY("--format=%s", formatOptions[prefs->compressionType]);
  1845. DISPLAY("%s", INDEX(sparseOptions, prefs->sparseFileSupport));
  1846. DISPLAY("%s", prefs->dictIDFlag ? "" : " --no-dictID");
  1847. DISPLAY("%s", INDEX(checkSumOptions, prefs->checksumFlag));
  1848. DISPLAY(" --block-size=%d", prefs->blockSize);
  1849. if (prefs->adaptiveMode)
  1850. DISPLAY(" --adapt=min=%d,max=%d", prefs->minAdaptLevel, prefs->maxAdaptLevel);
  1851. DISPLAY("%s", INDEX(rowMatchFinderOptions, prefs->useRowMatchFinder));
  1852. DISPLAY("%s", prefs->rsyncable ? " --rsyncable" : "");
  1853. if (prefs->streamSrcSize)
  1854. DISPLAY(" --stream-size=%u", (unsigned) prefs->streamSrcSize);
  1855. if (prefs->srcSizeHint)
  1856. DISPLAY(" --size-hint=%d", prefs->srcSizeHint);
  1857. if (prefs->targetCBlockSize)
  1858. DISPLAY(" --target-compressed-block-size=%u", (unsigned) prefs->targetCBlockSize);
  1859. DISPLAY("%s", INDEX(compressLiteralsOptions, prefs->literalCompressionMode));
  1860. DISPLAY(" --memory=%u", prefs->memLimit ? prefs->memLimit : 128 MB);
  1861. DISPLAY(" --threads=%d", prefs->nbWorkers);
  1862. DISPLAY("%s", prefs->excludeCompressedFiles ? " --exclude-compressed" : "");
  1863. DISPLAY(" --%scontent-size", prefs->contentSize ? "" : "no-");
  1864. DISPLAY("\n");
  1865. }
  1866. #undef INDEX
  1867. int FIO_compressFilename(FIO_ctx_t* const fCtx, FIO_prefs_t* const prefs, const char* dstFileName,
  1868. const char* srcFileName, const char* dictFileName,
  1869. int compressionLevel, ZSTD_compressionParameters comprParams)
  1870. {
  1871. cRess_t ress = FIO_createCResources(prefs, dictFileName, UTIL_getFileSize(srcFileName), compressionLevel, comprParams);
  1872. int const result = FIO_compressFilename_srcFile(fCtx, prefs, ress, dstFileName, srcFileName, compressionLevel);
  1873. #define DISPLAY_LEVEL_DEFAULT 2
  1874. FIO_freeCResources(&ress);
  1875. return result;
  1876. }
  1877. /* FIO_determineCompressedName() :
  1878. * create a destination filename for compressed srcFileName.
  1879. * @return a pointer to it.
  1880. * This function never returns an error (it may abort() in case of pb)
  1881. */
  1882. static const char*
  1883. FIO_determineCompressedName(const char* srcFileName, const char* outDirName, const char* suffix)
  1884. {
  1885. static size_t dfnbCapacity = 0;
  1886. static char* dstFileNameBuffer = NULL; /* using static allocation : this function cannot be multi-threaded */
  1887. char* outDirFilename = NULL;
  1888. size_t sfnSize = strlen(srcFileName);
  1889. size_t const srcSuffixLen = strlen(suffix);
  1890. if(!strcmp(srcFileName, stdinmark)) {
  1891. return stdoutmark;
  1892. }
  1893. if (outDirName) {
  1894. outDirFilename = FIO_createFilename_fromOutDir(srcFileName, outDirName, srcSuffixLen);
  1895. sfnSize = strlen(outDirFilename);
  1896. assert(outDirFilename != NULL);
  1897. }
  1898. if (dfnbCapacity <= sfnSize+srcSuffixLen+1) {
  1899. /* resize buffer for dstName */
  1900. free(dstFileNameBuffer);
  1901. dfnbCapacity = sfnSize + srcSuffixLen + 30;
  1902. dstFileNameBuffer = (char*)malloc(dfnbCapacity);
  1903. if (!dstFileNameBuffer) {
  1904. EXM_THROW(30, "zstd: %s", strerror(errno));
  1905. }
  1906. }
  1907. assert(dstFileNameBuffer != NULL);
  1908. if (outDirFilename) {
  1909. memcpy(dstFileNameBuffer, outDirFilename, sfnSize);
  1910. free(outDirFilename);
  1911. } else {
  1912. memcpy(dstFileNameBuffer, srcFileName, sfnSize);
  1913. }
  1914. memcpy(dstFileNameBuffer+sfnSize, suffix, srcSuffixLen+1 /* Include terminating null */);
  1915. return dstFileNameBuffer;
  1916. }
  1917. static unsigned long long FIO_getLargestFileSize(const char** inFileNames, unsigned nbFiles)
  1918. {
  1919. size_t i;
  1920. unsigned long long fileSize, maxFileSize = 0;
  1921. for (i = 0; i < nbFiles; i++) {
  1922. fileSize = UTIL_getFileSize(inFileNames[i]);
  1923. maxFileSize = fileSize > maxFileSize ? fileSize : maxFileSize;
  1924. }
  1925. return maxFileSize;
  1926. }
  1927. /* FIO_compressMultipleFilenames() :
  1928. * compress nbFiles files
  1929. * into either one destination (outFileName),
  1930. * or into one file each (outFileName == NULL, but suffix != NULL),
  1931. * or into a destination folder (specified with -O)
  1932. */
  1933. int FIO_compressMultipleFilenames(FIO_ctx_t* const fCtx,
  1934. FIO_prefs_t* const prefs,
  1935. const char** inFileNamesTable,
  1936. const char* outMirroredRootDirName,
  1937. const char* outDirName,
  1938. const char* outFileName, const char* suffix,
  1939. const char* dictFileName, int compressionLevel,
  1940. ZSTD_compressionParameters comprParams)
  1941. {
  1942. int status;
  1943. int error = 0;
  1944. cRess_t ress = FIO_createCResources(prefs, dictFileName,
  1945. FIO_getLargestFileSize(inFileNamesTable, (unsigned)fCtx->nbFilesTotal),
  1946. compressionLevel, comprParams);
  1947. /* init */
  1948. assert(outFileName != NULL || suffix != NULL);
  1949. if (outFileName != NULL) { /* output into a single destination (stdout typically) */
  1950. FILE *dstFile;
  1951. if (FIO_multiFilesConcatWarning(fCtx, prefs, outFileName, 1 /* displayLevelCutoff */)) {
  1952. FIO_freeCResources(&ress);
  1953. return 1;
  1954. }
  1955. dstFile = FIO_openDstFile(fCtx, prefs, NULL, outFileName, DEFAULT_FILE_PERMISSIONS);
  1956. if (dstFile == NULL) { /* could not open outFileName */
  1957. error = 1;
  1958. } else {
  1959. AIO_WritePool_setFile(ress.writeCtx, dstFile);
  1960. for (; fCtx->currFileIdx < fCtx->nbFilesTotal; ++fCtx->currFileIdx) {
  1961. status = FIO_compressFilename_srcFile(fCtx, prefs, ress, outFileName, inFileNamesTable[fCtx->currFileIdx], compressionLevel);
  1962. if (!status) fCtx->nbFilesProcessed++;
  1963. error |= status;
  1964. }
  1965. if (AIO_WritePool_closeFile(ress.writeCtx))
  1966. EXM_THROW(29, "Write error (%s) : cannot properly close %s",
  1967. strerror(errno), outFileName);
  1968. }
  1969. } else {
  1970. if (outMirroredRootDirName)
  1971. UTIL_mirrorSourceFilesDirectories(inFileNamesTable, (unsigned)fCtx->nbFilesTotal, outMirroredRootDirName);
  1972. for (; fCtx->currFileIdx < fCtx->nbFilesTotal; ++fCtx->currFileIdx) {
  1973. const char* const srcFileName = inFileNamesTable[fCtx->currFileIdx];
  1974. const char* dstFileName = NULL;
  1975. if (outMirroredRootDirName) {
  1976. char* validMirroredDirName = UTIL_createMirroredDestDirName(srcFileName, outMirroredRootDirName);
  1977. if (validMirroredDirName) {
  1978. dstFileName = FIO_determineCompressedName(srcFileName, validMirroredDirName, suffix);
  1979. free(validMirroredDirName);
  1980. } else {
  1981. DISPLAYLEVEL(2, "zstd: --output-dir-mirror cannot compress '%s' into '%s' \n", srcFileName, outMirroredRootDirName);
  1982. error=1;
  1983. continue;
  1984. }
  1985. } else {
  1986. dstFileName = FIO_determineCompressedName(srcFileName, outDirName, suffix); /* cannot fail */
  1987. }
  1988. status = FIO_compressFilename_srcFile(fCtx, prefs, ress, dstFileName, srcFileName, compressionLevel);
  1989. if (!status) fCtx->nbFilesProcessed++;
  1990. error |= status;
  1991. }
  1992. if (outDirName)
  1993. FIO_checkFilenameCollisions(inFileNamesTable , (unsigned)fCtx->nbFilesTotal);
  1994. }
  1995. if (FIO_shouldDisplayMultipleFileSummary(fCtx)) {
  1996. UTIL_HumanReadableSize_t hr_isize = UTIL_makeHumanReadableSize((U64) fCtx->totalBytesInput);
  1997. UTIL_HumanReadableSize_t hr_osize = UTIL_makeHumanReadableSize((U64) fCtx->totalBytesOutput);
  1998. DISPLAY_PROGRESS("\r%79s\r", "");
  1999. if (fCtx->totalBytesInput == 0) {
  2000. DISPLAY_SUMMARY("%3d files compressed : (%6.*f%4s => %6.*f%4s)\n",
  2001. fCtx->nbFilesProcessed,
  2002. hr_isize.precision, hr_isize.value, hr_isize.suffix,
  2003. hr_osize.precision, hr_osize.value, hr_osize.suffix);
  2004. } else {
  2005. DISPLAY_SUMMARY("%3d files compressed : %.2f%% (%6.*f%4s => %6.*f%4s)\n",
  2006. fCtx->nbFilesProcessed,
  2007. (double)fCtx->totalBytesOutput/((double)fCtx->totalBytesInput)*100,
  2008. hr_isize.precision, hr_isize.value, hr_isize.suffix,
  2009. hr_osize.precision, hr_osize.value, hr_osize.suffix);
  2010. }
  2011. }
  2012. FIO_freeCResources(&ress);
  2013. return error;
  2014. }
  2015. #endif /* #ifndef ZSTD_NOCOMPRESS */
  2016. #ifndef ZSTD_NODECOMPRESS
  2017. /* **************************************************************************
  2018. * Decompression
  2019. ***************************************************************************/
  2020. typedef struct {
  2021. FIO_Dict_t dict;
  2022. ZSTD_DStream* dctx;
  2023. WritePoolCtx_t *writeCtx;
  2024. ReadPoolCtx_t *readCtx;
  2025. } dRess_t;
  2026. static dRess_t FIO_createDResources(FIO_prefs_t* const prefs, const char* dictFileName)
  2027. {
  2028. int useMMap = prefs->mmapDict == ZSTD_ps_enable;
  2029. int forceNoUseMMap = prefs->mmapDict == ZSTD_ps_disable;
  2030. stat_t statbuf;
  2031. dRess_t ress;
  2032. memset(&statbuf, 0, sizeof(statbuf));
  2033. memset(&ress, 0, sizeof(ress));
  2034. FIO_getDictFileStat(dictFileName, &statbuf);
  2035. if (prefs->patchFromMode){
  2036. U64 const dictSize = UTIL_getFileSizeStat(&statbuf);
  2037. useMMap |= dictSize > prefs->memLimit;
  2038. FIO_adjustMemLimitForPatchFromMode(prefs, dictSize, 0 /* just use the dict size */);
  2039. }
  2040. /* Allocation */
  2041. ress.dctx = ZSTD_createDStream();
  2042. if (ress.dctx==NULL)
  2043. EXM_THROW(60, "Error: %s : can't create ZSTD_DStream", strerror(errno));
  2044. CHECK( ZSTD_DCtx_setMaxWindowSize(ress.dctx, prefs->memLimit) );
  2045. CHECK( ZSTD_DCtx_setParameter(ress.dctx, ZSTD_d_forceIgnoreChecksum, !prefs->checksumFlag));
  2046. /* dictionary */
  2047. {
  2048. FIO_dictBufferType_t dictBufferType = (useMMap && !forceNoUseMMap) ? FIO_mmapDict : FIO_mallocDict;
  2049. FIO_initDict(&ress.dict, dictFileName, prefs, &statbuf, dictBufferType);
  2050. CHECK(ZSTD_DCtx_reset(ress.dctx, ZSTD_reset_session_only) );
  2051. if (prefs->patchFromMode){
  2052. CHECK(ZSTD_DCtx_refPrefix(ress.dctx, ress.dict.dictBuffer, ress.dict.dictBufferSize));
  2053. } else {
  2054. CHECK(ZSTD_DCtx_loadDictionary_byReference(ress.dctx, ress.dict.dictBuffer, ress.dict.dictBufferSize));
  2055. }
  2056. }
  2057. ress.writeCtx = AIO_WritePool_create(prefs, ZSTD_DStreamOutSize());
  2058. ress.readCtx = AIO_ReadPool_create(prefs, ZSTD_DStreamInSize());
  2059. return ress;
  2060. }
  2061. static void FIO_freeDResources(dRess_t ress)
  2062. {
  2063. FIO_freeDict(&(ress.dict));
  2064. CHECK( ZSTD_freeDStream(ress.dctx) );
  2065. AIO_WritePool_free(ress.writeCtx);
  2066. AIO_ReadPool_free(ress.readCtx);
  2067. }
  2068. /* FIO_passThrough() : just copy input into output, for compatibility with gzip -df mode
  2069. * @return : 0 (no error) */
  2070. static int FIO_passThrough(dRess_t *ress)
  2071. {
  2072. size_t const blockSize = MIN(MIN(64 KB, ZSTD_DStreamInSize()), ZSTD_DStreamOutSize());
  2073. IOJob_t *writeJob = AIO_WritePool_acquireJob(ress->writeCtx);
  2074. AIO_ReadPool_fillBuffer(ress->readCtx, blockSize);
  2075. while(ress->readCtx->srcBufferLoaded) {
  2076. size_t writeSize;
  2077. writeSize = MIN(blockSize, ress->readCtx->srcBufferLoaded);
  2078. assert(writeSize <= writeJob->bufferSize);
  2079. memcpy(writeJob->buffer, ress->readCtx->srcBuffer, writeSize);
  2080. writeJob->usedBufferSize = writeSize;
  2081. AIO_WritePool_enqueueAndReacquireWriteJob(&writeJob);
  2082. AIO_ReadPool_consumeBytes(ress->readCtx, writeSize);
  2083. AIO_ReadPool_fillBuffer(ress->readCtx, blockSize);
  2084. }
  2085. assert(ress->readCtx->reachedEof);
  2086. AIO_WritePool_releaseIoJob(writeJob);
  2087. AIO_WritePool_sparseWriteEnd(ress->writeCtx);
  2088. return 0;
  2089. }
  2090. /* FIO_zstdErrorHelp() :
  2091. * detailed error message when requested window size is too large */
  2092. static void
  2093. FIO_zstdErrorHelp(const FIO_prefs_t* const prefs,
  2094. const dRess_t* ress,
  2095. size_t err,
  2096. const char* srcFileName)
  2097. {
  2098. ZSTD_frameHeader header;
  2099. /* Help message only for one specific error */
  2100. if (ZSTD_getErrorCode(err) != ZSTD_error_frameParameter_windowTooLarge)
  2101. return;
  2102. /* Try to decode the frame header */
  2103. err = ZSTD_getFrameHeader(&header, ress->readCtx->srcBuffer, ress->readCtx->srcBufferLoaded);
  2104. if (err == 0) {
  2105. unsigned long long const windowSize = header.windowSize;
  2106. unsigned const windowLog = FIO_highbit64(windowSize) + ((windowSize & (windowSize - 1)) != 0);
  2107. assert(prefs->memLimit > 0);
  2108. DISPLAYLEVEL(1, "%s : Window size larger than maximum : %llu > %u \n",
  2109. srcFileName, windowSize, prefs->memLimit);
  2110. if (windowLog <= ZSTD_WINDOWLOG_MAX) {
  2111. unsigned const windowMB = (unsigned)((windowSize >> 20) + ((windowSize & ((1 MB) - 1)) != 0));
  2112. assert(windowSize < (U64)(1ULL << 52)); /* ensure now overflow for windowMB */
  2113. DISPLAYLEVEL(1, "%s : Use --long=%u or --memory=%uMB \n",
  2114. srcFileName, windowLog, windowMB);
  2115. return;
  2116. } }
  2117. DISPLAYLEVEL(1, "%s : Window log larger than ZSTD_WINDOWLOG_MAX=%u; not supported \n",
  2118. srcFileName, ZSTD_WINDOWLOG_MAX);
  2119. }
  2120. /** FIO_decompressFrame() :
  2121. * @return : size of decoded zstd frame, or an error code
  2122. */
  2123. #define FIO_ERROR_FRAME_DECODING ((unsigned long long)(-2))
  2124. static unsigned long long
  2125. FIO_decompressZstdFrame(FIO_ctx_t* const fCtx, dRess_t* ress,
  2126. const FIO_prefs_t* const prefs,
  2127. const char* srcFileName,
  2128. U64 alreadyDecoded) /* for multi-frames streams */
  2129. {
  2130. U64 frameSize = 0;
  2131. IOJob_t *writeJob = AIO_WritePool_acquireJob(ress->writeCtx);
  2132. /* display last 20 characters only when not --verbose */
  2133. { size_t const srcFileLength = strlen(srcFileName);
  2134. if ((srcFileLength>20) && (g_display_prefs.displayLevel<3))
  2135. srcFileName += srcFileLength-20;
  2136. }
  2137. ZSTD_DCtx_reset(ress->dctx, ZSTD_reset_session_only);
  2138. /* Header loading : ensures ZSTD_getFrameHeader() will succeed */
  2139. AIO_ReadPool_fillBuffer(ress->readCtx, ZSTD_FRAMEHEADERSIZE_MAX);
  2140. /* Main decompression Loop */
  2141. while (1) {
  2142. ZSTD_inBuffer inBuff = setInBuffer( ress->readCtx->srcBuffer, ress->readCtx->srcBufferLoaded, 0 );
  2143. ZSTD_outBuffer outBuff= setOutBuffer( writeJob->buffer, writeJob->bufferSize, 0 );
  2144. size_t const readSizeHint = ZSTD_decompressStream(ress->dctx, &outBuff, &inBuff);
  2145. UTIL_HumanReadableSize_t const hrs = UTIL_makeHumanReadableSize(alreadyDecoded+frameSize);
  2146. if (ZSTD_isError(readSizeHint)) {
  2147. DISPLAYLEVEL(1, "%s : Decoding error (36) : %s \n",
  2148. srcFileName, ZSTD_getErrorName(readSizeHint));
  2149. FIO_zstdErrorHelp(prefs, ress, readSizeHint, srcFileName);
  2150. AIO_WritePool_releaseIoJob(writeJob);
  2151. return FIO_ERROR_FRAME_DECODING;
  2152. }
  2153. /* Write block */
  2154. writeJob->usedBufferSize = outBuff.pos;
  2155. AIO_WritePool_enqueueAndReacquireWriteJob(&writeJob);
  2156. frameSize += outBuff.pos;
  2157. if (fCtx->nbFilesTotal > 1) {
  2158. size_t srcFileNameSize = strlen(srcFileName);
  2159. if (srcFileNameSize > 18) {
  2160. const char* truncatedSrcFileName = srcFileName + srcFileNameSize - 15;
  2161. DISPLAYUPDATE_PROGRESS(
  2162. "\rDecompress: %2u/%2u files. Current: ...%s : %.*f%s... ",
  2163. fCtx->currFileIdx+1, fCtx->nbFilesTotal, truncatedSrcFileName, hrs.precision, hrs.value, hrs.suffix);
  2164. } else {
  2165. DISPLAYUPDATE_PROGRESS("\rDecompress: %2u/%2u files. Current: %s : %.*f%s... ",
  2166. fCtx->currFileIdx+1, fCtx->nbFilesTotal, srcFileName, hrs.precision, hrs.value, hrs.suffix);
  2167. }
  2168. } else {
  2169. DISPLAYUPDATE_PROGRESS("\r%-20.20s : %.*f%s... ",
  2170. srcFileName, hrs.precision, hrs.value, hrs.suffix);
  2171. }
  2172. AIO_ReadPool_consumeBytes(ress->readCtx, inBuff.pos);
  2173. if (readSizeHint == 0) break; /* end of frame */
  2174. /* Fill input buffer */
  2175. { size_t const toDecode = MIN(readSizeHint, ZSTD_DStreamInSize()); /* support large skippable frames */
  2176. if (ress->readCtx->srcBufferLoaded < toDecode) {
  2177. size_t const readSize = AIO_ReadPool_fillBuffer(ress->readCtx, toDecode);
  2178. if (readSize==0) {
  2179. DISPLAYLEVEL(1, "%s : Read error (39) : premature end \n",
  2180. srcFileName);
  2181. AIO_WritePool_releaseIoJob(writeJob);
  2182. return FIO_ERROR_FRAME_DECODING;
  2183. }
  2184. } } }
  2185. AIO_WritePool_releaseIoJob(writeJob);
  2186. AIO_WritePool_sparseWriteEnd(ress->writeCtx);
  2187. return frameSize;
  2188. }
  2189. #ifdef ZSTD_GZDECOMPRESS
  2190. static unsigned long long
  2191. FIO_decompressGzFrame(dRess_t* ress, const char* srcFileName)
  2192. {
  2193. unsigned long long outFileSize = 0;
  2194. z_stream strm;
  2195. int flush = Z_NO_FLUSH;
  2196. int decodingError = 0;
  2197. IOJob_t *writeJob = NULL;
  2198. strm.zalloc = Z_NULL;
  2199. strm.zfree = Z_NULL;
  2200. strm.opaque = Z_NULL;
  2201. strm.next_in = 0;
  2202. strm.avail_in = 0;
  2203. /* see https://www.zlib.net/manual.html */
  2204. if (inflateInit2(&strm, 15 /* maxWindowLogSize */ + 16 /* gzip only */) != Z_OK)
  2205. return FIO_ERROR_FRAME_DECODING;
  2206. writeJob = AIO_WritePool_acquireJob(ress->writeCtx);
  2207. strm.next_out = (Bytef*)writeJob->buffer;
  2208. strm.avail_out = (uInt)writeJob->bufferSize;
  2209. strm.avail_in = (uInt)ress->readCtx->srcBufferLoaded;
  2210. strm.next_in = (z_const unsigned char*)ress->readCtx->srcBuffer;
  2211. for ( ; ; ) {
  2212. int ret;
  2213. if (strm.avail_in == 0) {
  2214. AIO_ReadPool_consumeAndRefill(ress->readCtx);
  2215. if (ress->readCtx->srcBufferLoaded == 0) flush = Z_FINISH;
  2216. strm.next_in = (z_const unsigned char*)ress->readCtx->srcBuffer;
  2217. strm.avail_in = (uInt)ress->readCtx->srcBufferLoaded;
  2218. }
  2219. ret = inflate(&strm, flush);
  2220. if (ret == Z_BUF_ERROR) {
  2221. DISPLAYLEVEL(1, "zstd: %s: premature gz end \n", srcFileName);
  2222. decodingError = 1; break;
  2223. }
  2224. if (ret != Z_OK && ret != Z_STREAM_END) {
  2225. DISPLAYLEVEL(1, "zstd: %s: inflate error %d \n", srcFileName, ret);
  2226. decodingError = 1; break;
  2227. }
  2228. { size_t const decompBytes = writeJob->bufferSize - strm.avail_out;
  2229. if (decompBytes) {
  2230. writeJob->usedBufferSize = decompBytes;
  2231. AIO_WritePool_enqueueAndReacquireWriteJob(&writeJob);
  2232. outFileSize += decompBytes;
  2233. strm.next_out = (Bytef*)writeJob->buffer;
  2234. strm.avail_out = (uInt)writeJob->bufferSize;
  2235. }
  2236. }
  2237. if (ret == Z_STREAM_END) break;
  2238. }
  2239. AIO_ReadPool_consumeBytes(ress->readCtx, ress->readCtx->srcBufferLoaded - strm.avail_in);
  2240. if ( (inflateEnd(&strm) != Z_OK) /* release resources ; error detected */
  2241. && (decodingError==0) ) {
  2242. DISPLAYLEVEL(1, "zstd: %s: inflateEnd error \n", srcFileName);
  2243. decodingError = 1;
  2244. }
  2245. AIO_WritePool_releaseIoJob(writeJob);
  2246. AIO_WritePool_sparseWriteEnd(ress->writeCtx);
  2247. return decodingError ? FIO_ERROR_FRAME_DECODING : outFileSize;
  2248. }
  2249. #endif
  2250. #ifdef ZSTD_LZMADECOMPRESS
  2251. static unsigned long long
  2252. FIO_decompressLzmaFrame(dRess_t* ress,
  2253. const char* srcFileName, int plain_lzma)
  2254. {
  2255. unsigned long long outFileSize = 0;
  2256. lzma_stream strm = LZMA_STREAM_INIT;
  2257. lzma_action action = LZMA_RUN;
  2258. lzma_ret initRet;
  2259. int decodingError = 0;
  2260. IOJob_t *writeJob = NULL;
  2261. strm.next_in = 0;
  2262. strm.avail_in = 0;
  2263. if (plain_lzma) {
  2264. initRet = lzma_alone_decoder(&strm, UINT64_MAX); /* LZMA */
  2265. } else {
  2266. initRet = lzma_stream_decoder(&strm, UINT64_MAX, 0); /* XZ */
  2267. }
  2268. if (initRet != LZMA_OK) {
  2269. DISPLAYLEVEL(1, "zstd: %s: %s error %d \n",
  2270. plain_lzma ? "lzma_alone_decoder" : "lzma_stream_decoder",
  2271. srcFileName, initRet);
  2272. return FIO_ERROR_FRAME_DECODING;
  2273. }
  2274. writeJob = AIO_WritePool_acquireJob(ress->writeCtx);
  2275. strm.next_out = (BYTE*)writeJob->buffer;
  2276. strm.avail_out = writeJob->bufferSize;
  2277. strm.next_in = (BYTE const*)ress->readCtx->srcBuffer;
  2278. strm.avail_in = ress->readCtx->srcBufferLoaded;
  2279. for ( ; ; ) {
  2280. lzma_ret ret;
  2281. if (strm.avail_in == 0) {
  2282. AIO_ReadPool_consumeAndRefill(ress->readCtx);
  2283. if (ress->readCtx->srcBufferLoaded == 0) action = LZMA_FINISH;
  2284. strm.next_in = (BYTE const*)ress->readCtx->srcBuffer;
  2285. strm.avail_in = ress->readCtx->srcBufferLoaded;
  2286. }
  2287. ret = lzma_code(&strm, action);
  2288. if (ret == LZMA_BUF_ERROR) {
  2289. DISPLAYLEVEL(1, "zstd: %s: premature lzma end \n", srcFileName);
  2290. decodingError = 1; break;
  2291. }
  2292. if (ret != LZMA_OK && ret != LZMA_STREAM_END) {
  2293. DISPLAYLEVEL(1, "zstd: %s: lzma_code decoding error %d \n",
  2294. srcFileName, ret);
  2295. decodingError = 1; break;
  2296. }
  2297. { size_t const decompBytes = writeJob->bufferSize - strm.avail_out;
  2298. if (decompBytes) {
  2299. writeJob->usedBufferSize = decompBytes;
  2300. AIO_WritePool_enqueueAndReacquireWriteJob(&writeJob);
  2301. outFileSize += decompBytes;
  2302. strm.next_out = (BYTE*)writeJob->buffer;
  2303. strm.avail_out = writeJob->bufferSize;
  2304. } }
  2305. if (ret == LZMA_STREAM_END) break;
  2306. }
  2307. AIO_ReadPool_consumeBytes(ress->readCtx, ress->readCtx->srcBufferLoaded - strm.avail_in);
  2308. lzma_end(&strm);
  2309. AIO_WritePool_releaseIoJob(writeJob);
  2310. AIO_WritePool_sparseWriteEnd(ress->writeCtx);
  2311. return decodingError ? FIO_ERROR_FRAME_DECODING : outFileSize;
  2312. }
  2313. #endif
  2314. #ifdef ZSTD_LZ4DECOMPRESS
  2315. static unsigned long long
  2316. FIO_decompressLz4Frame(dRess_t* ress, const char* srcFileName)
  2317. {
  2318. unsigned long long filesize = 0;
  2319. LZ4F_errorCode_t nextToLoad = 4;
  2320. LZ4F_decompressionContext_t dCtx;
  2321. LZ4F_errorCode_t const errorCode = LZ4F_createDecompressionContext(&dCtx, LZ4F_VERSION);
  2322. int decodingError = 0;
  2323. IOJob_t *writeJob = NULL;
  2324. if (LZ4F_isError(errorCode)) {
  2325. DISPLAYLEVEL(1, "zstd: failed to create lz4 decompression context \n");
  2326. return FIO_ERROR_FRAME_DECODING;
  2327. }
  2328. writeJob = AIO_WritePool_acquireJob(ress->writeCtx);
  2329. /* Main Loop */
  2330. for (;nextToLoad;) {
  2331. size_t pos = 0;
  2332. size_t decodedBytes = writeJob->bufferSize;
  2333. int fullBufferDecoded = 0;
  2334. /* Read input */
  2335. AIO_ReadPool_fillBuffer(ress->readCtx, nextToLoad);
  2336. if(!ress->readCtx->srcBufferLoaded) break; /* reached end of file */
  2337. while ((pos < ress->readCtx->srcBufferLoaded) || fullBufferDecoded) { /* still to read, or still to flush */
  2338. /* Decode Input (at least partially) */
  2339. size_t remaining = ress->readCtx->srcBufferLoaded - pos;
  2340. decodedBytes = writeJob->bufferSize;
  2341. nextToLoad = LZ4F_decompress(dCtx, writeJob->buffer, &decodedBytes, (char*)(ress->readCtx->srcBuffer)+pos,
  2342. &remaining, NULL);
  2343. if (LZ4F_isError(nextToLoad)) {
  2344. DISPLAYLEVEL(1, "zstd: %s: lz4 decompression error : %s \n",
  2345. srcFileName, LZ4F_getErrorName(nextToLoad));
  2346. decodingError = 1; nextToLoad = 0; break;
  2347. }
  2348. pos += remaining;
  2349. assert(pos <= ress->readCtx->srcBufferLoaded);
  2350. fullBufferDecoded = decodedBytes == writeJob->bufferSize;
  2351. /* Write Block */
  2352. if (decodedBytes) {
  2353. UTIL_HumanReadableSize_t hrs;
  2354. writeJob->usedBufferSize = decodedBytes;
  2355. AIO_WritePool_enqueueAndReacquireWriteJob(&writeJob);
  2356. filesize += decodedBytes;
  2357. hrs = UTIL_makeHumanReadableSize(filesize);
  2358. DISPLAYUPDATE_PROGRESS("\rDecompressed : %.*f%s ", hrs.precision, hrs.value, hrs.suffix);
  2359. }
  2360. if (!nextToLoad) break;
  2361. }
  2362. AIO_ReadPool_consumeBytes(ress->readCtx, pos);
  2363. }
  2364. if (nextToLoad!=0) {
  2365. DISPLAYLEVEL(1, "zstd: %s: unfinished lz4 stream \n", srcFileName);
  2366. decodingError=1;
  2367. }
  2368. LZ4F_freeDecompressionContext(dCtx);
  2369. AIO_WritePool_releaseIoJob(writeJob);
  2370. AIO_WritePool_sparseWriteEnd(ress->writeCtx);
  2371. return decodingError ? FIO_ERROR_FRAME_DECODING : filesize;
  2372. }
  2373. #endif
  2374. /** FIO_decompressFrames() :
  2375. * Find and decode frames inside srcFile
  2376. * srcFile presumed opened and valid
  2377. * @return : 0 : OK
  2378. * 1 : error
  2379. */
  2380. static int FIO_decompressFrames(FIO_ctx_t* const fCtx,
  2381. dRess_t ress, const FIO_prefs_t* const prefs,
  2382. const char* dstFileName, const char* srcFileName)
  2383. {
  2384. unsigned readSomething = 0;
  2385. unsigned long long filesize = 0;
  2386. int passThrough = prefs->passThrough;
  2387. if (passThrough == -1) {
  2388. /* If pass-through mode is not explicitly enabled or disabled,
  2389. * default to the legacy behavior of enabling it if we are writing
  2390. * to stdout with the overwrite flag enabled.
  2391. */
  2392. passThrough = prefs->overwrite && !strcmp(dstFileName, stdoutmark);
  2393. }
  2394. assert(passThrough == 0 || passThrough == 1);
  2395. /* for each frame */
  2396. for ( ; ; ) {
  2397. /* check magic number -> version */
  2398. size_t const toRead = 4;
  2399. const BYTE* buf;
  2400. AIO_ReadPool_fillBuffer(ress.readCtx, toRead);
  2401. buf = (const BYTE*)ress.readCtx->srcBuffer;
  2402. if (ress.readCtx->srcBufferLoaded==0) {
  2403. if (readSomething==0) { /* srcFile is empty (which is invalid) */
  2404. DISPLAYLEVEL(1, "zstd: %s: unexpected end of file \n", srcFileName);
  2405. return 1;
  2406. } /* else, just reached frame boundary */
  2407. break; /* no more input */
  2408. }
  2409. readSomething = 1; /* there is at least 1 byte in srcFile */
  2410. if (ress.readCtx->srcBufferLoaded < toRead) { /* not enough input to check magic number */
  2411. if (passThrough) {
  2412. return FIO_passThrough(&ress);
  2413. }
  2414. DISPLAYLEVEL(1, "zstd: %s: unknown header \n", srcFileName);
  2415. return 1;
  2416. }
  2417. if (ZSTD_isFrame(buf, ress.readCtx->srcBufferLoaded)) {
  2418. unsigned long long const frameSize = FIO_decompressZstdFrame(fCtx, &ress, prefs, srcFileName, filesize);
  2419. if (frameSize == FIO_ERROR_FRAME_DECODING) return 1;
  2420. filesize += frameSize;
  2421. } else if (buf[0] == 31 && buf[1] == 139) { /* gz magic number */
  2422. #ifdef ZSTD_GZDECOMPRESS
  2423. unsigned long long const frameSize = FIO_decompressGzFrame(&ress, srcFileName);
  2424. if (frameSize == FIO_ERROR_FRAME_DECODING) return 1;
  2425. filesize += frameSize;
  2426. #else
  2427. DISPLAYLEVEL(1, "zstd: %s: gzip file cannot be uncompressed (zstd compiled without HAVE_ZLIB) -- ignored \n", srcFileName);
  2428. return 1;
  2429. #endif
  2430. } else if ((buf[0] == 0xFD && buf[1] == 0x37) /* xz magic number */
  2431. || (buf[0] == 0x5D && buf[1] == 0x00)) { /* lzma header (no magic number) */
  2432. #ifdef ZSTD_LZMADECOMPRESS
  2433. unsigned long long const frameSize = FIO_decompressLzmaFrame(&ress, srcFileName, buf[0] != 0xFD);
  2434. if (frameSize == FIO_ERROR_FRAME_DECODING) return 1;
  2435. filesize += frameSize;
  2436. #else
  2437. DISPLAYLEVEL(1, "zstd: %s: xz/lzma file cannot be uncompressed (zstd compiled without HAVE_LZMA) -- ignored \n", srcFileName);
  2438. return 1;
  2439. #endif
  2440. } else if (MEM_readLE32(buf) == LZ4_MAGICNUMBER) {
  2441. #ifdef ZSTD_LZ4DECOMPRESS
  2442. unsigned long long const frameSize = FIO_decompressLz4Frame(&ress, srcFileName);
  2443. if (frameSize == FIO_ERROR_FRAME_DECODING) return 1;
  2444. filesize += frameSize;
  2445. #else
  2446. DISPLAYLEVEL(1, "zstd: %s: lz4 file cannot be uncompressed (zstd compiled without HAVE_LZ4) -- ignored \n", srcFileName);
  2447. return 1;
  2448. #endif
  2449. } else if (passThrough) {
  2450. return FIO_passThrough(&ress);
  2451. } else {
  2452. DISPLAYLEVEL(1, "zstd: %s: unsupported format \n", srcFileName);
  2453. return 1;
  2454. } } /* for each frame */
  2455. /* Final Status */
  2456. fCtx->totalBytesOutput += (size_t)filesize;
  2457. DISPLAY_PROGRESS("\r%79s\r", "");
  2458. if (FIO_shouldDisplayFileSummary(fCtx))
  2459. DISPLAY_SUMMARY("%-20s: %llu bytes \n", srcFileName, filesize);
  2460. return 0;
  2461. }
  2462. /** FIO_decompressDstFile() :
  2463. open `dstFileName`, or pass-through if writeCtx's file is already != 0,
  2464. then start decompression process (FIO_decompressFrames()).
  2465. @return : 0 : OK
  2466. 1 : operation aborted
  2467. */
  2468. static int FIO_decompressDstFile(FIO_ctx_t* const fCtx,
  2469. FIO_prefs_t* const prefs,
  2470. dRess_t ress,
  2471. const char* dstFileName,
  2472. const char* srcFileName,
  2473. const stat_t* srcFileStat)
  2474. {
  2475. int result;
  2476. int releaseDstFile = 0;
  2477. int transferStat = 0;
  2478. int dstFd = 0;
  2479. if ((AIO_WritePool_getFile(ress.writeCtx) == NULL) && (prefs->testMode == 0)) {
  2480. FILE *dstFile;
  2481. int dstFilePermissions = DEFAULT_FILE_PERMISSIONS;
  2482. if ( strcmp(srcFileName, stdinmark) /* special case : don't transfer permissions from stdin */
  2483. && strcmp(dstFileName, stdoutmark)
  2484. && UTIL_isRegularFileStat(srcFileStat) ) {
  2485. transferStat = 1;
  2486. dstFilePermissions = TEMPORARY_FILE_PERMISSIONS;
  2487. }
  2488. releaseDstFile = 1;
  2489. dstFile = FIO_openDstFile(fCtx, prefs, srcFileName, dstFileName, dstFilePermissions);
  2490. if (dstFile==NULL) return 1;
  2491. dstFd = fileno(dstFile);
  2492. AIO_WritePool_setFile(ress.writeCtx, dstFile);
  2493. /* Must only be added after FIO_openDstFile() succeeds.
  2494. * Otherwise we may delete the destination file if it already exists,
  2495. * and the user presses Ctrl-C when asked if they wish to overwrite.
  2496. */
  2497. addHandler(dstFileName);
  2498. }
  2499. result = FIO_decompressFrames(fCtx, ress, prefs, dstFileName, srcFileName);
  2500. if (releaseDstFile) {
  2501. clearHandler();
  2502. if (transferStat) {
  2503. UTIL_setFDStat(dstFd, dstFileName, srcFileStat);
  2504. }
  2505. if (AIO_WritePool_closeFile(ress.writeCtx)) {
  2506. DISPLAYLEVEL(1, "zstd: %s: %s \n", dstFileName, strerror(errno));
  2507. result = 1;
  2508. }
  2509. if (transferStat) {
  2510. UTIL_utime(dstFileName, srcFileStat);
  2511. }
  2512. if ( (result != 0) /* operation failure */
  2513. && strcmp(dstFileName, stdoutmark) /* special case : don't remove() stdout */
  2514. ) {
  2515. FIO_removeFile(dstFileName); /* remove decompression artefact; note: don't do anything special if remove() fails */
  2516. }
  2517. }
  2518. return result;
  2519. }
  2520. /** FIO_decompressSrcFile() :
  2521. Open `srcFileName`, transfer control to decompressDstFile()
  2522. @return : 0 : OK
  2523. 1 : error
  2524. */
  2525. static int FIO_decompressSrcFile(FIO_ctx_t* const fCtx, FIO_prefs_t* const prefs, dRess_t ress, const char* dstFileName, const char* srcFileName)
  2526. {
  2527. FILE* srcFile;
  2528. stat_t srcFileStat;
  2529. int result;
  2530. U64 fileSize = UTIL_FILESIZE_UNKNOWN;
  2531. if (UTIL_isDirectory(srcFileName)) {
  2532. DISPLAYLEVEL(1, "zstd: %s is a directory -- ignored \n", srcFileName);
  2533. return 1;
  2534. }
  2535. srcFile = FIO_openSrcFile(prefs, srcFileName, &srcFileStat);
  2536. if (srcFile==NULL) return 1;
  2537. /* Don't use AsyncIO for small files */
  2538. if (strcmp(srcFileName, stdinmark)) /* Stdin doesn't have stats */
  2539. fileSize = UTIL_getFileSizeStat(&srcFileStat);
  2540. if(fileSize != UTIL_FILESIZE_UNKNOWN && fileSize < ZSTD_BLOCKSIZE_MAX * 3) {
  2541. AIO_ReadPool_setAsync(ress.readCtx, 0);
  2542. AIO_WritePool_setAsync(ress.writeCtx, 0);
  2543. } else {
  2544. AIO_ReadPool_setAsync(ress.readCtx, 1);
  2545. AIO_WritePool_setAsync(ress.writeCtx, 1);
  2546. }
  2547. AIO_ReadPool_setFile(ress.readCtx, srcFile);
  2548. result = FIO_decompressDstFile(fCtx, prefs, ress, dstFileName, srcFileName, &srcFileStat);
  2549. AIO_ReadPool_setFile(ress.readCtx, NULL);
  2550. /* Close file */
  2551. if (fclose(srcFile)) {
  2552. DISPLAYLEVEL(1, "zstd: %s: %s \n", srcFileName, strerror(errno)); /* error should not happen */
  2553. return 1;
  2554. }
  2555. if ( prefs->removeSrcFile /* --rm */
  2556. && (result==0) /* decompression successful */
  2557. && strcmp(srcFileName, stdinmark) ) /* not stdin */ {
  2558. /* We must clear the handler, since after this point calling it would
  2559. * delete both the source and destination files.
  2560. */
  2561. clearHandler();
  2562. if (FIO_removeFile(srcFileName)) {
  2563. /* failed to remove src file */
  2564. DISPLAYLEVEL(1, "zstd: %s: %s \n", srcFileName, strerror(errno));
  2565. return 1;
  2566. } }
  2567. return result;
  2568. }
  2569. int FIO_decompressFilename(FIO_ctx_t* const fCtx, FIO_prefs_t* const prefs,
  2570. const char* dstFileName, const char* srcFileName,
  2571. const char* dictFileName)
  2572. {
  2573. dRess_t const ress = FIO_createDResources(prefs, dictFileName);
  2574. int const decodingError = FIO_decompressSrcFile(fCtx, prefs, ress, dstFileName, srcFileName);
  2575. FIO_freeDResources(ress);
  2576. return decodingError;
  2577. }
  2578. static const char *suffixList[] = {
  2579. ZSTD_EXTENSION,
  2580. TZSTD_EXTENSION,
  2581. #ifndef ZSTD_NODECOMPRESS
  2582. ZSTD_ALT_EXTENSION,
  2583. #endif
  2584. #ifdef ZSTD_GZDECOMPRESS
  2585. GZ_EXTENSION,
  2586. TGZ_EXTENSION,
  2587. #endif
  2588. #ifdef ZSTD_LZMADECOMPRESS
  2589. LZMA_EXTENSION,
  2590. XZ_EXTENSION,
  2591. TXZ_EXTENSION,
  2592. #endif
  2593. #ifdef ZSTD_LZ4DECOMPRESS
  2594. LZ4_EXTENSION,
  2595. TLZ4_EXTENSION,
  2596. #endif
  2597. NULL
  2598. };
  2599. static const char *suffixListStr =
  2600. ZSTD_EXTENSION "/" TZSTD_EXTENSION
  2601. #ifdef ZSTD_GZDECOMPRESS
  2602. "/" GZ_EXTENSION "/" TGZ_EXTENSION
  2603. #endif
  2604. #ifdef ZSTD_LZMADECOMPRESS
  2605. "/" LZMA_EXTENSION "/" XZ_EXTENSION "/" TXZ_EXTENSION
  2606. #endif
  2607. #ifdef ZSTD_LZ4DECOMPRESS
  2608. "/" LZ4_EXTENSION "/" TLZ4_EXTENSION
  2609. #endif
  2610. ;
  2611. /* FIO_determineDstName() :
  2612. * create a destination filename from a srcFileName.
  2613. * @return a pointer to it.
  2614. * @return == NULL if there is an error */
  2615. static const char*
  2616. FIO_determineDstName(const char* srcFileName, const char* outDirName)
  2617. {
  2618. static size_t dfnbCapacity = 0;
  2619. static char* dstFileNameBuffer = NULL; /* using static allocation : this function cannot be multi-threaded */
  2620. size_t dstFileNameEndPos;
  2621. char* outDirFilename = NULL;
  2622. const char* dstSuffix = "";
  2623. size_t dstSuffixLen = 0;
  2624. size_t sfnSize = strlen(srcFileName);
  2625. size_t srcSuffixLen;
  2626. const char* const srcSuffix = strrchr(srcFileName, '.');
  2627. if(!strcmp(srcFileName, stdinmark)) {
  2628. return stdoutmark;
  2629. }
  2630. if (srcSuffix == NULL) {
  2631. DISPLAYLEVEL(1,
  2632. "zstd: %s: unknown suffix (%s expected). "
  2633. "Can't derive the output file name. "
  2634. "Specify it with -o dstFileName. Ignoring.\n",
  2635. srcFileName, suffixListStr);
  2636. return NULL;
  2637. }
  2638. srcSuffixLen = strlen(srcSuffix);
  2639. {
  2640. const char** matchedSuffixPtr;
  2641. for (matchedSuffixPtr = suffixList; *matchedSuffixPtr != NULL; matchedSuffixPtr++) {
  2642. if (!strcmp(*matchedSuffixPtr, srcSuffix)) {
  2643. break;
  2644. }
  2645. }
  2646. /* check suffix is authorized */
  2647. if (sfnSize <= srcSuffixLen || *matchedSuffixPtr == NULL) {
  2648. DISPLAYLEVEL(1,
  2649. "zstd: %s: unknown suffix (%s expected). "
  2650. "Can't derive the output file name. "
  2651. "Specify it with -o dstFileName. Ignoring.\n",
  2652. srcFileName, suffixListStr);
  2653. return NULL;
  2654. }
  2655. if ((*matchedSuffixPtr)[1] == 't') {
  2656. dstSuffix = ".tar";
  2657. dstSuffixLen = strlen(dstSuffix);
  2658. }
  2659. }
  2660. if (outDirName) {
  2661. outDirFilename = FIO_createFilename_fromOutDir(srcFileName, outDirName, 0);
  2662. sfnSize = strlen(outDirFilename);
  2663. assert(outDirFilename != NULL);
  2664. }
  2665. if (dfnbCapacity+srcSuffixLen <= sfnSize+1+dstSuffixLen) {
  2666. /* allocate enough space to write dstFilename into it */
  2667. free(dstFileNameBuffer);
  2668. dfnbCapacity = sfnSize + 20;
  2669. dstFileNameBuffer = (char*)malloc(dfnbCapacity);
  2670. if (dstFileNameBuffer==NULL)
  2671. EXM_THROW(74, "%s : not enough memory for dstFileName",
  2672. strerror(errno));
  2673. }
  2674. /* return dst name == src name truncated from suffix */
  2675. assert(dstFileNameBuffer != NULL);
  2676. dstFileNameEndPos = sfnSize - srcSuffixLen;
  2677. if (outDirFilename) {
  2678. memcpy(dstFileNameBuffer, outDirFilename, dstFileNameEndPos);
  2679. free(outDirFilename);
  2680. } else {
  2681. memcpy(dstFileNameBuffer, srcFileName, dstFileNameEndPos);
  2682. }
  2683. /* The short tar extensions tzst, tgz, txz and tlz4 files should have "tar"
  2684. * extension on decompression. Also writes terminating null. */
  2685. strcpy(dstFileNameBuffer + dstFileNameEndPos, dstSuffix);
  2686. return dstFileNameBuffer;
  2687. /* note : dstFileNameBuffer memory is not going to be free */
  2688. }
  2689. int
  2690. FIO_decompressMultipleFilenames(FIO_ctx_t* const fCtx,
  2691. FIO_prefs_t* const prefs,
  2692. const char** srcNamesTable,
  2693. const char* outMirroredRootDirName,
  2694. const char* outDirName, const char* outFileName,
  2695. const char* dictFileName)
  2696. {
  2697. int status;
  2698. int error = 0;
  2699. dRess_t ress = FIO_createDResources(prefs, dictFileName);
  2700. if (outFileName) {
  2701. if (FIO_multiFilesConcatWarning(fCtx, prefs, outFileName, 1 /* displayLevelCutoff */)) {
  2702. FIO_freeDResources(ress);
  2703. return 1;
  2704. }
  2705. if (!prefs->testMode) {
  2706. FILE* dstFile = FIO_openDstFile(fCtx, prefs, NULL, outFileName, DEFAULT_FILE_PERMISSIONS);
  2707. if (dstFile == 0) EXM_THROW(19, "cannot open %s", outFileName);
  2708. AIO_WritePool_setFile(ress.writeCtx, dstFile);
  2709. }
  2710. for (; fCtx->currFileIdx < fCtx->nbFilesTotal; fCtx->currFileIdx++) {
  2711. status = FIO_decompressSrcFile(fCtx, prefs, ress, outFileName, srcNamesTable[fCtx->currFileIdx]);
  2712. if (!status) fCtx->nbFilesProcessed++;
  2713. error |= status;
  2714. }
  2715. if ((!prefs->testMode) && (AIO_WritePool_closeFile(ress.writeCtx)))
  2716. EXM_THROW(72, "Write error : %s : cannot properly close output file",
  2717. strerror(errno));
  2718. } else {
  2719. if (outMirroredRootDirName)
  2720. UTIL_mirrorSourceFilesDirectories(srcNamesTable, (unsigned)fCtx->nbFilesTotal, outMirroredRootDirName);
  2721. for (; fCtx->currFileIdx < fCtx->nbFilesTotal; fCtx->currFileIdx++) { /* create dstFileName */
  2722. const char* const srcFileName = srcNamesTable[fCtx->currFileIdx];
  2723. const char* dstFileName = NULL;
  2724. if (outMirroredRootDirName) {
  2725. char* validMirroredDirName = UTIL_createMirroredDestDirName(srcFileName, outMirroredRootDirName);
  2726. if (validMirroredDirName) {
  2727. dstFileName = FIO_determineDstName(srcFileName, validMirroredDirName);
  2728. free(validMirroredDirName);
  2729. } else {
  2730. DISPLAYLEVEL(2, "zstd: --output-dir-mirror cannot decompress '%s' into '%s'\n", srcFileName, outMirroredRootDirName);
  2731. }
  2732. } else {
  2733. dstFileName = FIO_determineDstName(srcFileName, outDirName);
  2734. }
  2735. if (dstFileName == NULL) { error=1; continue; }
  2736. status = FIO_decompressSrcFile(fCtx, prefs, ress, dstFileName, srcFileName);
  2737. if (!status) fCtx->nbFilesProcessed++;
  2738. error |= status;
  2739. }
  2740. if (outDirName)
  2741. FIO_checkFilenameCollisions(srcNamesTable , (unsigned)fCtx->nbFilesTotal);
  2742. }
  2743. if (FIO_shouldDisplayMultipleFileSummary(fCtx)) {
  2744. DISPLAY_PROGRESS("\r%79s\r", "");
  2745. DISPLAY_SUMMARY("%d files decompressed : %6llu bytes total \n",
  2746. fCtx->nbFilesProcessed, (unsigned long long)fCtx->totalBytesOutput);
  2747. }
  2748. FIO_freeDResources(ress);
  2749. return error;
  2750. }
  2751. /* **************************************************************************
  2752. * .zst file info (--list command)
  2753. ***************************************************************************/
  2754. typedef struct {
  2755. U64 decompressedSize;
  2756. U64 compressedSize;
  2757. U64 windowSize;
  2758. int numActualFrames;
  2759. int numSkippableFrames;
  2760. int decompUnavailable;
  2761. int usesCheck;
  2762. BYTE checksum[4];
  2763. U32 nbFiles;
  2764. unsigned dictID;
  2765. } fileInfo_t;
  2766. typedef enum {
  2767. info_success=0,
  2768. info_frame_error=1,
  2769. info_not_zstd=2,
  2770. info_file_error=3,
  2771. info_truncated_input=4
  2772. } InfoError;
  2773. #define ERROR_IF(c,n,...) { \
  2774. if (c) { \
  2775. DISPLAYLEVEL(1, __VA_ARGS__); \
  2776. DISPLAYLEVEL(1, " \n"); \
  2777. return n; \
  2778. } \
  2779. }
  2780. static InfoError
  2781. FIO_analyzeFrames(fileInfo_t* info, FILE* const srcFile)
  2782. {
  2783. /* begin analyzing frame */
  2784. for ( ; ; ) {
  2785. BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX];
  2786. size_t const numBytesRead = fread(headerBuffer, 1, sizeof(headerBuffer), srcFile);
  2787. if (numBytesRead < ZSTD_FRAMEHEADERSIZE_MIN(ZSTD_f_zstd1)) {
  2788. if ( feof(srcFile)
  2789. && (numBytesRead == 0)
  2790. && (info->compressedSize > 0)
  2791. && (info->compressedSize != UTIL_FILESIZE_UNKNOWN) ) {
  2792. unsigned long long file_position = (unsigned long long) LONG_TELL(srcFile);
  2793. unsigned long long file_size = (unsigned long long) info->compressedSize;
  2794. ERROR_IF(file_position != file_size, info_truncated_input,
  2795. "Error: seeked to position %llu, which is beyond file size of %llu\n",
  2796. file_position,
  2797. file_size);
  2798. break; /* correct end of file => success */
  2799. }
  2800. ERROR_IF(feof(srcFile), info_not_zstd, "Error: reached end of file with incomplete frame");
  2801. ERROR_IF(1, info_frame_error, "Error: did not reach end of file but ran out of frames");
  2802. }
  2803. { U32 const magicNumber = MEM_readLE32(headerBuffer);
  2804. /* Zstandard frame */
  2805. if (magicNumber == ZSTD_MAGICNUMBER) {
  2806. ZSTD_frameHeader header;
  2807. U64 const frameContentSize = ZSTD_getFrameContentSize(headerBuffer, numBytesRead);
  2808. if ( frameContentSize == ZSTD_CONTENTSIZE_ERROR
  2809. || frameContentSize == ZSTD_CONTENTSIZE_UNKNOWN ) {
  2810. info->decompUnavailable = 1;
  2811. } else {
  2812. info->decompressedSize += frameContentSize;
  2813. }
  2814. ERROR_IF(ZSTD_getFrameHeader(&header, headerBuffer, numBytesRead) != 0,
  2815. info_frame_error, "Error: could not decode frame header");
  2816. if (info->dictID != 0 && info->dictID != header.dictID) {
  2817. DISPLAY("WARNING: File contains multiple frames with different dictionary IDs. Showing dictID 0 instead");
  2818. info->dictID = 0;
  2819. } else {
  2820. info->dictID = header.dictID;
  2821. }
  2822. info->windowSize = header.windowSize;
  2823. /* move to the end of the frame header */
  2824. { size_t const headerSize = ZSTD_frameHeaderSize(headerBuffer, numBytesRead);
  2825. ERROR_IF(ZSTD_isError(headerSize), info_frame_error, "Error: could not determine frame header size");
  2826. ERROR_IF(fseek(srcFile, ((long)headerSize)-((long)numBytesRead), SEEK_CUR) != 0,
  2827. info_frame_error, "Error: could not move to end of frame header");
  2828. }
  2829. /* skip all blocks in the frame */
  2830. { int lastBlock = 0;
  2831. do {
  2832. BYTE blockHeaderBuffer[3];
  2833. ERROR_IF(fread(blockHeaderBuffer, 1, 3, srcFile) != 3,
  2834. info_frame_error, "Error while reading block header");
  2835. { U32 const blockHeader = MEM_readLE24(blockHeaderBuffer);
  2836. U32 const blockTypeID = (blockHeader >> 1) & 3;
  2837. U32 const isRLE = (blockTypeID == 1);
  2838. U32 const isWrongBlock = (blockTypeID == 3);
  2839. long const blockSize = isRLE ? 1 : (long)(blockHeader >> 3);
  2840. ERROR_IF(isWrongBlock, info_frame_error, "Error: unsupported block type");
  2841. lastBlock = blockHeader & 1;
  2842. ERROR_IF(fseek(srcFile, blockSize, SEEK_CUR) != 0,
  2843. info_frame_error, "Error: could not skip to end of block");
  2844. }
  2845. } while (lastBlock != 1);
  2846. }
  2847. /* check if checksum is used */
  2848. { BYTE const frameHeaderDescriptor = headerBuffer[4];
  2849. int const contentChecksumFlag = (frameHeaderDescriptor & (1 << 2)) >> 2;
  2850. if (contentChecksumFlag) {
  2851. info->usesCheck = 1;
  2852. ERROR_IF(fread(info->checksum, 1, 4, srcFile) != 4,
  2853. info_frame_error, "Error: could not read checksum");
  2854. } }
  2855. info->numActualFrames++;
  2856. }
  2857. /* Skippable frame */
  2858. else if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
  2859. U32 const frameSize = MEM_readLE32(headerBuffer + 4);
  2860. long const seek = (long)(8 + frameSize - numBytesRead);
  2861. ERROR_IF(LONG_SEEK(srcFile, seek, SEEK_CUR) != 0,
  2862. info_frame_error, "Error: could not find end of skippable frame");
  2863. info->numSkippableFrames++;
  2864. }
  2865. /* unknown content */
  2866. else {
  2867. return info_not_zstd;
  2868. }
  2869. } /* magic number analysis */
  2870. } /* end analyzing frames */
  2871. return info_success;
  2872. }
  2873. static InfoError
  2874. getFileInfo_fileConfirmed(fileInfo_t* info, const char* inFileName)
  2875. {
  2876. InfoError status;
  2877. stat_t srcFileStat;
  2878. FILE* const srcFile = FIO_openSrcFile(NULL, inFileName, &srcFileStat);
  2879. ERROR_IF(srcFile == NULL, info_file_error, "Error: could not open source file %s", inFileName);
  2880. info->compressedSize = UTIL_getFileSizeStat(&srcFileStat);
  2881. status = FIO_analyzeFrames(info, srcFile);
  2882. fclose(srcFile);
  2883. info->nbFiles = 1;
  2884. return status;
  2885. }
  2886. /** getFileInfo() :
  2887. * Reads information from file, stores in *info
  2888. * @return : InfoError status
  2889. */
  2890. static InfoError
  2891. getFileInfo(fileInfo_t* info, const char* srcFileName)
  2892. {
  2893. ERROR_IF(!UTIL_isRegularFile(srcFileName),
  2894. info_file_error, "Error : %s is not a file", srcFileName);
  2895. return getFileInfo_fileConfirmed(info, srcFileName);
  2896. }
  2897. static void
  2898. displayInfo(const char* inFileName, const fileInfo_t* info, int displayLevel)
  2899. {
  2900. UTIL_HumanReadableSize_t const window_hrs = UTIL_makeHumanReadableSize(info->windowSize);
  2901. UTIL_HumanReadableSize_t const compressed_hrs = UTIL_makeHumanReadableSize(info->compressedSize);
  2902. UTIL_HumanReadableSize_t const decompressed_hrs = UTIL_makeHumanReadableSize(info->decompressedSize);
  2903. double const ratio = (info->compressedSize == 0) ? 0 : ((double)info->decompressedSize)/(double)info->compressedSize;
  2904. const char* const checkString = (info->usesCheck ? "XXH64" : "None");
  2905. if (displayLevel <= 2) {
  2906. if (!info->decompUnavailable) {
  2907. DISPLAYOUT("%6d %5d %6.*f%4s %8.*f%4s %5.3f %5s %s\n",
  2908. info->numSkippableFrames + info->numActualFrames,
  2909. info->numSkippableFrames,
  2910. compressed_hrs.precision, compressed_hrs.value, compressed_hrs.suffix,
  2911. decompressed_hrs.precision, decompressed_hrs.value, decompressed_hrs.suffix,
  2912. ratio, checkString, inFileName);
  2913. } else {
  2914. DISPLAYOUT("%6d %5d %6.*f%4s %5s %s\n",
  2915. info->numSkippableFrames + info->numActualFrames,
  2916. info->numSkippableFrames,
  2917. compressed_hrs.precision, compressed_hrs.value, compressed_hrs.suffix,
  2918. checkString, inFileName);
  2919. }
  2920. } else {
  2921. DISPLAYOUT("%s \n", inFileName);
  2922. DISPLAYOUT("# Zstandard Frames: %d\n", info->numActualFrames);
  2923. if (info->numSkippableFrames)
  2924. DISPLAYOUT("# Skippable Frames: %d\n", info->numSkippableFrames);
  2925. DISPLAYOUT("DictID: %u\n", info->dictID);
  2926. DISPLAYOUT("Window Size: %.*f%s (%llu B)\n",
  2927. window_hrs.precision, window_hrs.value, window_hrs.suffix,
  2928. (unsigned long long)info->windowSize);
  2929. DISPLAYOUT("Compressed Size: %.*f%s (%llu B)\n",
  2930. compressed_hrs.precision, compressed_hrs.value, compressed_hrs.suffix,
  2931. (unsigned long long)info->compressedSize);
  2932. if (!info->decompUnavailable) {
  2933. DISPLAYOUT("Decompressed Size: %.*f%s (%llu B)\n",
  2934. decompressed_hrs.precision, decompressed_hrs.value, decompressed_hrs.suffix,
  2935. (unsigned long long)info->decompressedSize);
  2936. DISPLAYOUT("Ratio: %.4f\n", ratio);
  2937. }
  2938. if (info->usesCheck && info->numActualFrames == 1) {
  2939. DISPLAYOUT("Check: %s %02x%02x%02x%02x\n", checkString,
  2940. info->checksum[3], info->checksum[2],
  2941. info->checksum[1], info->checksum[0]
  2942. );
  2943. } else {
  2944. DISPLAYOUT("Check: %s\n", checkString);
  2945. }
  2946. DISPLAYOUT("\n");
  2947. }
  2948. }
  2949. static fileInfo_t FIO_addFInfo(fileInfo_t fi1, fileInfo_t fi2)
  2950. {
  2951. fileInfo_t total;
  2952. memset(&total, 0, sizeof(total));
  2953. total.numActualFrames = fi1.numActualFrames + fi2.numActualFrames;
  2954. total.numSkippableFrames = fi1.numSkippableFrames + fi2.numSkippableFrames;
  2955. total.compressedSize = fi1.compressedSize + fi2.compressedSize;
  2956. total.decompressedSize = fi1.decompressedSize + fi2.decompressedSize;
  2957. total.decompUnavailable = fi1.decompUnavailable | fi2.decompUnavailable;
  2958. total.usesCheck = fi1.usesCheck & fi2.usesCheck;
  2959. total.nbFiles = fi1.nbFiles + fi2.nbFiles;
  2960. return total;
  2961. }
  2962. static int
  2963. FIO_listFile(fileInfo_t* total, const char* inFileName, int displayLevel)
  2964. {
  2965. fileInfo_t info;
  2966. memset(&info, 0, sizeof(info));
  2967. { InfoError const error = getFileInfo(&info, inFileName);
  2968. switch (error) {
  2969. case info_frame_error:
  2970. /* display error, but provide output */
  2971. DISPLAYLEVEL(1, "Error while parsing \"%s\" \n", inFileName);
  2972. break;
  2973. case info_not_zstd:
  2974. DISPLAYOUT("File \"%s\" not compressed by zstd \n", inFileName);
  2975. if (displayLevel > 2) DISPLAYOUT("\n");
  2976. return 1;
  2977. case info_file_error:
  2978. /* error occurred while opening the file */
  2979. if (displayLevel > 2) DISPLAYOUT("\n");
  2980. return 1;
  2981. case info_truncated_input:
  2982. DISPLAYOUT("File \"%s\" is truncated \n", inFileName);
  2983. if (displayLevel > 2) DISPLAYOUT("\n");
  2984. return 1;
  2985. case info_success:
  2986. default:
  2987. break;
  2988. }
  2989. displayInfo(inFileName, &info, displayLevel);
  2990. *total = FIO_addFInfo(*total, info);
  2991. assert(error == info_success || error == info_frame_error);
  2992. return (int)error;
  2993. }
  2994. }
  2995. int FIO_listMultipleFiles(unsigned numFiles, const char** filenameTable, int displayLevel)
  2996. {
  2997. /* ensure no specified input is stdin (needs fseek() capability) */
  2998. { unsigned u;
  2999. for (u=0; u<numFiles;u++) {
  3000. ERROR_IF(!strcmp (filenameTable[u], stdinmark),
  3001. 1, "zstd: --list does not support reading from standard input");
  3002. } }
  3003. if (numFiles == 0) {
  3004. if (!UTIL_isConsole(stdin)) {
  3005. DISPLAYLEVEL(1, "zstd: --list does not support reading from standard input \n");
  3006. }
  3007. DISPLAYLEVEL(1, "No files given \n");
  3008. return 1;
  3009. }
  3010. if (displayLevel <= 2) {
  3011. DISPLAYOUT("Frames Skips Compressed Uncompressed Ratio Check Filename\n");
  3012. }
  3013. { int error = 0;
  3014. fileInfo_t total;
  3015. memset(&total, 0, sizeof(total));
  3016. total.usesCheck = 1;
  3017. /* --list each file, and check for any error */
  3018. { unsigned u;
  3019. for (u=0; u<numFiles;u++) {
  3020. error |= FIO_listFile(&total, filenameTable[u], displayLevel);
  3021. } }
  3022. if (numFiles > 1 && displayLevel <= 2) { /* display total */
  3023. UTIL_HumanReadableSize_t const compressed_hrs = UTIL_makeHumanReadableSize(total.compressedSize);
  3024. UTIL_HumanReadableSize_t const decompressed_hrs = UTIL_makeHumanReadableSize(total.decompressedSize);
  3025. double const ratio = (total.compressedSize == 0) ? 0 : ((double)total.decompressedSize)/(double)total.compressedSize;
  3026. const char* const checkString = (total.usesCheck ? "XXH64" : "");
  3027. DISPLAYOUT("----------------------------------------------------------------- \n");
  3028. if (total.decompUnavailable) {
  3029. DISPLAYOUT("%6d %5d %6.*f%4s %5s %u files\n",
  3030. total.numSkippableFrames + total.numActualFrames,
  3031. total.numSkippableFrames,
  3032. compressed_hrs.precision, compressed_hrs.value, compressed_hrs.suffix,
  3033. checkString, (unsigned)total.nbFiles);
  3034. } else {
  3035. DISPLAYOUT("%6d %5d %6.*f%4s %8.*f%4s %5.3f %5s %u files\n",
  3036. total.numSkippableFrames + total.numActualFrames,
  3037. total.numSkippableFrames,
  3038. compressed_hrs.precision, compressed_hrs.value, compressed_hrs.suffix,
  3039. decompressed_hrs.precision, decompressed_hrs.value, decompressed_hrs.suffix,
  3040. ratio, checkString, (unsigned)total.nbFiles);
  3041. } }
  3042. return error;
  3043. }
  3044. }
  3045. #endif /* #ifndef ZSTD_NODECOMPRESS */