zstdcli.c 68 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494
  1. /*
  2. * Copyright (c) Yann Collet, Facebook, Inc.
  3. * All rights reserved.
  4. *
  5. * This source code is licensed under both the BSD-style license (found in the
  6. * LICENSE file in the root directory of this source tree) and the GPLv2 (found
  7. * in the COPYING file in the root directory of this source tree).
  8. * You may select, at your option, one of the above-listed licenses.
  9. */
  10. /*-************************************
  11. * Tuning parameters
  12. **************************************/
  13. #ifndef ZSTDCLI_CLEVEL_DEFAULT
  14. # define ZSTDCLI_CLEVEL_DEFAULT 3
  15. #endif
  16. #ifndef ZSTDCLI_CLEVEL_MAX
  17. # define ZSTDCLI_CLEVEL_MAX 19 /* without using --ultra */
  18. #endif
  19. #ifndef ZSTDCLI_NBTHREADS_DEFAULT
  20. # define ZSTDCLI_NBTHREADS_DEFAULT 1
  21. #endif
  22. /*-************************************
  23. * Dependencies
  24. **************************************/
  25. #include "platform.h" /* IS_CONSOLE, PLATFORM_POSIX_VERSION */
  26. #include "util.h" /* UTIL_HAS_CREATEFILELIST, UTIL_createFileList */
  27. #include <stdlib.h> /* getenv */
  28. #include <string.h> /* strcmp, strlen */
  29. #include <stdio.h> /* fprintf(), stdin, stdout, stderr */
  30. #include <errno.h> /* errno */
  31. #include <assert.h> /* assert */
  32. #include "fileio.h" /* stdinmark, stdoutmark, ZSTD_EXTENSION */
  33. #ifndef ZSTD_NOBENCH
  34. # include "benchzstd.h" /* BMK_benchFiles */
  35. #endif
  36. #ifndef ZSTD_NODICT
  37. # include "dibio.h" /* ZDICT_cover_params_t, DiB_trainFromFiles() */
  38. #endif
  39. #ifndef ZSTD_NOTRACE
  40. # include "zstdcli_trace.h"
  41. #endif
  42. #include "../lib/zstd.h" /* ZSTD_VERSION_STRING, ZSTD_minCLevel, ZSTD_maxCLevel */
  43. /*-************************************
  44. * Constants
  45. **************************************/
  46. #define COMPRESSOR_NAME "zstd command line interface"
  47. #ifndef ZSTD_VERSION
  48. # define ZSTD_VERSION "v" ZSTD_VERSION_STRING
  49. #endif
  50. #define AUTHOR "Yann Collet"
  51. #define WELCOME_MESSAGE "*** %s %i-bits %s, by %s ***\n", COMPRESSOR_NAME, (int)(sizeof(size_t)*8), ZSTD_VERSION, AUTHOR
  52. #define ZSTD_ZSTDMT "zstdmt"
  53. #define ZSTD_UNZSTD "unzstd"
  54. #define ZSTD_CAT "zstdcat"
  55. #define ZSTD_ZCAT "zcat"
  56. #define ZSTD_GZ "gzip"
  57. #define ZSTD_GUNZIP "gunzip"
  58. #define ZSTD_GZCAT "gzcat"
  59. #define ZSTD_LZMA "lzma"
  60. #define ZSTD_UNLZMA "unlzma"
  61. #define ZSTD_XZ "xz"
  62. #define ZSTD_UNXZ "unxz"
  63. #define ZSTD_LZ4 "lz4"
  64. #define ZSTD_UNLZ4 "unlz4"
  65. #define KB *(1 <<10)
  66. #define MB *(1 <<20)
  67. #define GB *(1U<<30)
  68. #define DISPLAY_LEVEL_DEFAULT 2
  69. static const char* g_defaultDictName = "dictionary";
  70. static const unsigned g_defaultMaxDictSize = 110 KB;
  71. static const int g_defaultDictCLevel = 3;
  72. static const unsigned g_defaultSelectivityLevel = 9;
  73. static const unsigned g_defaultMaxWindowLog = 27;
  74. #define OVERLAP_LOG_DEFAULT 9999
  75. #define LDM_PARAM_DEFAULT 9999 /* Default for parameters where 0 is valid */
  76. static U32 g_overlapLog = OVERLAP_LOG_DEFAULT;
  77. static U32 g_ldmHashLog = 0;
  78. static U32 g_ldmMinMatch = 0;
  79. static U32 g_ldmHashRateLog = LDM_PARAM_DEFAULT;
  80. static U32 g_ldmBucketSizeLog = LDM_PARAM_DEFAULT;
  81. #define DEFAULT_ACCEL 1
  82. typedef enum { cover, fastCover, legacy } dictType;
  83. /*-************************************
  84. * Display Macros
  85. **************************************/
  86. #define DISPLAY_F(f, ...) fprintf((f), __VA_ARGS__)
  87. #define DISPLAYOUT(...) DISPLAY_F(stdout, __VA_ARGS__)
  88. #define DISPLAY(...) DISPLAY_F(stderr, __VA_ARGS__)
  89. #define DISPLAYLEVEL(l, ...) { if (g_displayLevel>=l) { DISPLAY(__VA_ARGS__); } }
  90. static int g_displayLevel = DISPLAY_LEVEL_DEFAULT; /* 0 : no display, 1: errors, 2 : + result + interaction + warnings, 3 : + progression, 4 : + information */
  91. /*-************************************
  92. * Check Version (when CLI linked to dynamic library)
  93. **************************************/
  94. /* Due to usage of experimental symbols and capabilities by the CLI,
  95. * the CLI must be linked against a dynamic library of same version */
  96. static void checkLibVersion(void)
  97. {
  98. if (strcmp(ZSTD_VERSION_STRING, ZSTD_versionString())) {
  99. DISPLAYLEVEL(1, "Error : incorrect library version (expecting : %s ; actual : %s ) \n",
  100. ZSTD_VERSION_STRING, ZSTD_versionString());
  101. DISPLAYLEVEL(1, "Please update library to version %s, or use stand-alone zstd binary \n",
  102. ZSTD_VERSION_STRING);
  103. exit(1);
  104. }
  105. }
  106. /*-************************************
  107. * Command Line
  108. **************************************/
  109. /* print help either in `stderr` or `stdout` depending on originating request
  110. * error (badusage) => stderr
  111. * help (usage_advanced) => stdout
  112. */
  113. static void usage(FILE* f, const char* programName)
  114. {
  115. DISPLAY_F(f, "Usage : \n");
  116. DISPLAY_F(f, " %s [args] [FILE(s)] [-o file] \n", programName);
  117. DISPLAY_F(f, "\n");
  118. DISPLAY_F(f, "FILE : a filename \n");
  119. DISPLAY_F(f, " with no FILE, or when FILE is - , read standard input\n");
  120. DISPLAY_F(f, "Arguments : \n");
  121. #ifndef ZSTD_NOCOMPRESS
  122. DISPLAY_F(f, " -# : # compression level (1-%d, default: %d) \n", ZSTDCLI_CLEVEL_MAX, ZSTDCLI_CLEVEL_DEFAULT);
  123. #endif
  124. #ifndef ZSTD_NODECOMPRESS
  125. DISPLAY_F(f, " -d : decompression \n");
  126. #endif
  127. DISPLAY_F(f, " -D DICT: use DICT as Dictionary for compression or decompression \n");
  128. DISPLAY_F(f, " -o file: result stored into `file` (only 1 output file) \n");
  129. DISPLAY_F(f, " -f : disable input and output checks. Allows overwriting existing files,\n");
  130. DISPLAY_F(f, " input from console, output to stdout, operating on links,\n");
  131. DISPLAY_F(f, " block devices, etc.\n");
  132. DISPLAY_F(f, "--rm : remove source file(s) after successful de/compression \n");
  133. DISPLAY_F(f, " -k : preserve source file(s) (default) \n");
  134. DISPLAY_F(f, " -h/-H : display help/long help and exit \n");
  135. }
  136. static void usage_advanced(const char* programName)
  137. {
  138. DISPLAYOUT(WELCOME_MESSAGE);
  139. usage(stdout, programName);
  140. DISPLAYOUT( "\n");
  141. DISPLAYOUT( "Advanced arguments : \n");
  142. DISPLAYOUT( " -V : display Version number and exit \n");
  143. DISPLAYOUT( " -c : write to standard output (even if it is the console) \n");
  144. DISPLAYOUT( " -v : verbose mode; specify multiple times to increase verbosity \n");
  145. DISPLAYOUT( " -q : suppress warnings; specify twice to suppress errors too \n");
  146. DISPLAYOUT( "--[no-]progress : forcibly display, or never display the progress counter.\n");
  147. DISPLAYOUT( " note: any (de)compressed output to terminal will mix with progress counter text. \n");
  148. #ifdef UTIL_HAS_CREATEFILELIST
  149. DISPLAYOUT( " -r : operate recursively on directories \n");
  150. DISPLAYOUT( "--filelist FILE : read list of files to operate upon from FILE \n");
  151. DISPLAYOUT( "--output-dir-flat DIR : processed files are stored into DIR \n");
  152. #endif
  153. #ifdef UTIL_HAS_MIRRORFILELIST
  154. DISPLAYOUT( "--output-dir-mirror DIR : processed files are stored into DIR respecting original directory structure \n");
  155. #endif
  156. #ifndef ZSTD_NOCOMPRESS
  157. DISPLAYOUT( "--[no-]check : during compression, add XXH64 integrity checksum to frame (default: enabled)");
  158. #ifndef ZSTD_NODECOMPRESS
  159. DISPLAYOUT( ". If specified with -d, decompressor will ignore/validate checksums in compressed frame (default: validate).");
  160. #endif
  161. #else
  162. #ifdef ZSTD_NOCOMPRESS
  163. DISPLAYOUT( "--[no-]check : during decompression, ignore/validate checksums in compressed frame (default: validate).");
  164. #endif
  165. #endif /* ZSTD_NOCOMPRESS */
  166. #ifndef ZSTD_NOTRACE
  167. DISPLAYOUT( "\n");
  168. DISPLAYOUT( "--trace FILE : log tracing information to FILE.");
  169. #endif
  170. DISPLAYOUT( "\n");
  171. DISPLAYOUT( "-- : All arguments after \"--\" are treated as files \n");
  172. #ifndef ZSTD_NOCOMPRESS
  173. DISPLAYOUT( "\n");
  174. DISPLAYOUT( "Advanced compression arguments : \n");
  175. DISPLAYOUT( "--ultra : enable levels beyond %i, up to %i (requires more memory) \n", ZSTDCLI_CLEVEL_MAX, ZSTD_maxCLevel());
  176. DISPLAYOUT( "--long[=#]: enable long distance matching with given window log (default: %u) \n", g_defaultMaxWindowLog);
  177. DISPLAYOUT( "--fast[=#]: switch to very fast compression levels (default: %u) \n", 1);
  178. DISPLAYOUT( "--adapt : dynamically adapt compression level to I/O conditions \n");
  179. DISPLAYOUT( "--[no-]row-match-finder : force enable/disable usage of fast row-based matchfinder for greedy, lazy, and lazy2 strategies \n");
  180. DISPLAYOUT( "--patch-from=FILE : specify the file to be used as a reference point for zstd's diff engine. \n");
  181. # ifdef ZSTD_MULTITHREAD
  182. DISPLAYOUT( " -T# : spawns # compression threads (default: 1, 0==# cores) \n");
  183. DISPLAYOUT( " -B# : select size of each job (default: 0==automatic) \n");
  184. DISPLAYOUT( "--single-thread : use a single thread for both I/O and compression (result slightly different than -T1) \n");
  185. DISPLAYOUT( "--auto-threads={physical,logical} (default: physical} : use either physical cores or logical cores as default when specifying -T0 \n");
  186. DISPLAYOUT( "--rsyncable : compress using a rsync-friendly method (-B sets block size) \n");
  187. # endif
  188. DISPLAYOUT( "--exclude-compressed: only compress files that are not already compressed \n");
  189. DISPLAYOUT( "--stream-size=# : specify size of streaming input from `stdin` \n");
  190. DISPLAYOUT( "--size-hint=# optimize compression parameters for streaming input of approximately this size \n");
  191. DISPLAYOUT( "--target-compressed-block-size=# : generate compressed block of approximately targeted size \n");
  192. DISPLAYOUT( "--no-dictID : don't write dictID into header (dictionary compression only) \n");
  193. DISPLAYOUT( "--[no-]compress-literals : force (un)compressed literals \n");
  194. DISPLAYOUT( "--format=zstd : compress files to the .zst format (default) \n");
  195. #ifdef ZSTD_GZCOMPRESS
  196. DISPLAYOUT( "--format=gzip : compress files to the .gz format \n");
  197. #endif
  198. #ifdef ZSTD_LZMACOMPRESS
  199. DISPLAYOUT( "--format=xz : compress files to the .xz format \n");
  200. DISPLAYOUT( "--format=lzma : compress files to the .lzma format \n");
  201. #endif
  202. #ifdef ZSTD_LZ4COMPRESS
  203. DISPLAYOUT( "--format=lz4 : compress files to the .lz4 format \n");
  204. #endif
  205. #endif /* !ZSTD_NOCOMPRESS */
  206. #ifndef ZSTD_NODECOMPRESS
  207. DISPLAYOUT( "\n");
  208. DISPLAYOUT( "Advanced decompression arguments : \n");
  209. DISPLAYOUT( " -l : print information about zstd compressed files \n");
  210. DISPLAYOUT( "--test : test compressed file integrity \n");
  211. DISPLAYOUT( " -M# : Set a memory usage limit for decompression \n");
  212. # if ZSTD_SPARSE_DEFAULT
  213. DISPLAYOUT( "--[no-]sparse : sparse mode (default: enabled on file, disabled on stdout) \n");
  214. # else
  215. DISPLAYOUT( "--[no-]sparse : sparse mode (default: disabled) \n");
  216. # endif
  217. #endif /* ZSTD_NODECOMPRESS */
  218. #ifndef ZSTD_NODICT
  219. DISPLAYOUT( "\n");
  220. DISPLAYOUT( "Dictionary builder : \n");
  221. DISPLAYOUT( "--train ## : create a dictionary from a training set of files \n");
  222. DISPLAYOUT( "--train-cover[=k=#,d=#,steps=#,split=#,shrink[=#]] : use the cover algorithm with optional args \n");
  223. DISPLAYOUT( "--train-fastcover[=k=#,d=#,f=#,steps=#,split=#,accel=#,shrink[=#]] : use the fast cover algorithm with optional args \n");
  224. DISPLAYOUT( "--train-legacy[=s=#] : use the legacy algorithm with selectivity (default: %u) \n", g_defaultSelectivityLevel);
  225. DISPLAYOUT( " -o DICT : DICT is dictionary name (default: %s) \n", g_defaultDictName);
  226. DISPLAYOUT( "--maxdict=# : limit dictionary to specified size (default: %u) \n", g_defaultMaxDictSize);
  227. DISPLAYOUT( "--dictID=# : force dictionary ID to specified value (default: random) \n");
  228. #endif
  229. #ifndef ZSTD_NOBENCH
  230. DISPLAYOUT( "\n");
  231. DISPLAYOUT( "Benchmark arguments : \n");
  232. DISPLAYOUT( " -b# : benchmark file(s), using # compression level (default: %d) \n", ZSTDCLI_CLEVEL_DEFAULT);
  233. DISPLAYOUT( " -e# : test all compression levels successively from -b# to -e# (default: 1) \n");
  234. DISPLAYOUT( " -i# : minimum evaluation time in seconds (default: 3s) \n");
  235. DISPLAYOUT( " -B# : cut file into independent blocks of size # (default: no block) \n");
  236. DISPLAYOUT( " -S : output one benchmark result per input file (default: consolidated result) \n");
  237. DISPLAYOUT( "--priority=rt : set process priority to real-time \n");
  238. #endif
  239. }
  240. static void badusage(const char* programName)
  241. {
  242. DISPLAYLEVEL(1, "Incorrect parameters \n");
  243. if (g_displayLevel >= 2) usage(stderr, programName);
  244. }
  245. static void waitEnter(void)
  246. {
  247. int unused;
  248. DISPLAY("Press enter to continue... \n");
  249. unused = getchar();
  250. (void)unused;
  251. }
  252. static const char* lastNameFromPath(const char* path)
  253. {
  254. const char* name = path;
  255. if (strrchr(name, '/')) name = strrchr(name, '/') + 1;
  256. if (strrchr(name, '\\')) name = strrchr(name, '\\') + 1; /* windows */
  257. return name;
  258. }
  259. /*! exeNameMatch() :
  260. @return : a non-zero value if exeName matches test, excluding the extension
  261. */
  262. static int exeNameMatch(const char* exeName, const char* test)
  263. {
  264. return !strncmp(exeName, test, strlen(test)) &&
  265. (exeName[strlen(test)] == '\0' || exeName[strlen(test)] == '.');
  266. }
  267. static void errorOut(const char* msg)
  268. {
  269. DISPLAY("%s \n", msg); exit(1);
  270. }
  271. /*! readU32FromCharChecked() :
  272. * @return 0 if success, and store the result in *value.
  273. * allows and interprets K, KB, KiB, M, MB and MiB suffix.
  274. * Will also modify `*stringPtr`, advancing it to position where it stopped reading.
  275. * @return 1 if an overflow error occurs */
  276. static int readU32FromCharChecked(const char** stringPtr, unsigned* value)
  277. {
  278. unsigned result = 0;
  279. while ((**stringPtr >='0') && (**stringPtr <='9')) {
  280. unsigned const max = ((unsigned)(-1)) / 10;
  281. unsigned last = result;
  282. if (result > max) return 1; /* overflow error */
  283. result *= 10;
  284. result += (unsigned)(**stringPtr - '0');
  285. if (result < last) return 1; /* overflow error */
  286. (*stringPtr)++ ;
  287. }
  288. if ((**stringPtr=='K') || (**stringPtr=='M')) {
  289. unsigned const maxK = ((unsigned)(-1)) >> 10;
  290. if (result > maxK) return 1; /* overflow error */
  291. result <<= 10;
  292. if (**stringPtr=='M') {
  293. if (result > maxK) return 1; /* overflow error */
  294. result <<= 10;
  295. }
  296. (*stringPtr)++; /* skip `K` or `M` */
  297. if (**stringPtr=='i') (*stringPtr)++;
  298. if (**stringPtr=='B') (*stringPtr)++;
  299. }
  300. *value = result;
  301. return 0;
  302. }
  303. /*! readU32FromChar() :
  304. * @return : unsigned integer value read from input in `char` format.
  305. * allows and interprets K, KB, KiB, M, MB and MiB suffix.
  306. * Will also modify `*stringPtr`, advancing it to position where it stopped reading.
  307. * Note : function will exit() program if digit sequence overflows */
  308. static unsigned readU32FromChar(const char** stringPtr) {
  309. static const char errorMsg[] = "error: numeric value overflows 32-bit unsigned int";
  310. unsigned result;
  311. if (readU32FromCharChecked(stringPtr, &result)) { errorOut(errorMsg); }
  312. return result;
  313. }
  314. /*! readIntFromChar() :
  315. * @return : signed integer value read from input in `char` format.
  316. * allows and interprets K, KB, KiB, M, MB and MiB suffix.
  317. * Will also modify `*stringPtr`, advancing it to position where it stopped reading.
  318. * Note : function will exit() program if digit sequence overflows */
  319. static int readIntFromChar(const char** stringPtr) {
  320. static const char errorMsg[] = "error: numeric value overflows 32-bit int";
  321. int sign = 1;
  322. unsigned result;
  323. if (**stringPtr=='-') {
  324. (*stringPtr)++;
  325. sign = -1;
  326. }
  327. if (readU32FromCharChecked(stringPtr, &result)) { errorOut(errorMsg); }
  328. return (int) result * sign;
  329. }
  330. /*! readSizeTFromCharChecked() :
  331. * @return 0 if success, and store the result in *value.
  332. * allows and interprets K, KB, KiB, M, MB and MiB suffix.
  333. * Will also modify `*stringPtr`, advancing it to position where it stopped reading.
  334. * @return 1 if an overflow error occurs */
  335. static int readSizeTFromCharChecked(const char** stringPtr, size_t* value)
  336. {
  337. size_t result = 0;
  338. while ((**stringPtr >='0') && (**stringPtr <='9')) {
  339. size_t const max = ((size_t)(-1)) / 10;
  340. size_t last = result;
  341. if (result > max) return 1; /* overflow error */
  342. result *= 10;
  343. result += (size_t)(**stringPtr - '0');
  344. if (result < last) return 1; /* overflow error */
  345. (*stringPtr)++ ;
  346. }
  347. if ((**stringPtr=='K') || (**stringPtr=='M')) {
  348. size_t const maxK = ((size_t)(-1)) >> 10;
  349. if (result > maxK) return 1; /* overflow error */
  350. result <<= 10;
  351. if (**stringPtr=='M') {
  352. if (result > maxK) return 1; /* overflow error */
  353. result <<= 10;
  354. }
  355. (*stringPtr)++; /* skip `K` or `M` */
  356. if (**stringPtr=='i') (*stringPtr)++;
  357. if (**stringPtr=='B') (*stringPtr)++;
  358. }
  359. *value = result;
  360. return 0;
  361. }
  362. /*! readSizeTFromChar() :
  363. * @return : size_t value read from input in `char` format.
  364. * allows and interprets K, KB, KiB, M, MB and MiB suffix.
  365. * Will also modify `*stringPtr`, advancing it to position where it stopped reading.
  366. * Note : function will exit() program if digit sequence overflows */
  367. static size_t readSizeTFromChar(const char** stringPtr) {
  368. static const char errorMsg[] = "error: numeric value overflows size_t";
  369. size_t result;
  370. if (readSizeTFromCharChecked(stringPtr, &result)) { errorOut(errorMsg); }
  371. return result;
  372. }
  373. /** longCommandWArg() :
  374. * check if *stringPtr is the same as longCommand.
  375. * If yes, @return 1 and advances *stringPtr to the position which immediately follows longCommand.
  376. * @return 0 and doesn't modify *stringPtr otherwise.
  377. */
  378. static int longCommandWArg(const char** stringPtr, const char* longCommand)
  379. {
  380. size_t const comSize = strlen(longCommand);
  381. int const result = !strncmp(*stringPtr, longCommand, comSize);
  382. if (result) *stringPtr += comSize;
  383. return result;
  384. }
  385. #ifndef ZSTD_NODICT
  386. static const unsigned kDefaultRegression = 1;
  387. /**
  388. * parseCoverParameters() :
  389. * reads cover parameters from *stringPtr (e.g. "--train-cover=k=48,d=8,steps=32") into *params
  390. * @return 1 means that cover parameters were correct
  391. * @return 0 in case of malformed parameters
  392. */
  393. static unsigned parseCoverParameters(const char* stringPtr, ZDICT_cover_params_t* params)
  394. {
  395. memset(params, 0, sizeof(*params));
  396. for (; ;) {
  397. if (longCommandWArg(&stringPtr, "k=")) { params->k = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
  398. if (longCommandWArg(&stringPtr, "d=")) { params->d = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
  399. if (longCommandWArg(&stringPtr, "steps=")) { params->steps = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
  400. if (longCommandWArg(&stringPtr, "split=")) {
  401. unsigned splitPercentage = readU32FromChar(&stringPtr);
  402. params->splitPoint = (double)splitPercentage / 100.0;
  403. if (stringPtr[0]==',') { stringPtr++; continue; } else break;
  404. }
  405. if (longCommandWArg(&stringPtr, "shrink")) {
  406. params->shrinkDictMaxRegression = kDefaultRegression;
  407. params->shrinkDict = 1;
  408. if (stringPtr[0]=='=') {
  409. stringPtr++;
  410. params->shrinkDictMaxRegression = readU32FromChar(&stringPtr);
  411. }
  412. if (stringPtr[0]==',') {
  413. stringPtr++;
  414. continue;
  415. }
  416. else break;
  417. }
  418. return 0;
  419. }
  420. if (stringPtr[0] != 0) return 0;
  421. DISPLAYLEVEL(4, "cover: k=%u\nd=%u\nsteps=%u\nsplit=%u\nshrink%u\n", params->k, params->d, params->steps, (unsigned)(params->splitPoint * 100), params->shrinkDictMaxRegression);
  422. return 1;
  423. }
  424. /**
  425. * parseFastCoverParameters() :
  426. * reads fastcover parameters from *stringPtr (e.g. "--train-fastcover=k=48,d=8,f=20,steps=32,accel=2") into *params
  427. * @return 1 means that fastcover parameters were correct
  428. * @return 0 in case of malformed parameters
  429. */
  430. static unsigned parseFastCoverParameters(const char* stringPtr, ZDICT_fastCover_params_t* params)
  431. {
  432. memset(params, 0, sizeof(*params));
  433. for (; ;) {
  434. if (longCommandWArg(&stringPtr, "k=")) { params->k = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
  435. if (longCommandWArg(&stringPtr, "d=")) { params->d = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
  436. if (longCommandWArg(&stringPtr, "f=")) { params->f = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
  437. if (longCommandWArg(&stringPtr, "steps=")) { params->steps = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
  438. if (longCommandWArg(&stringPtr, "accel=")) { params->accel = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
  439. if (longCommandWArg(&stringPtr, "split=")) {
  440. unsigned splitPercentage = readU32FromChar(&stringPtr);
  441. params->splitPoint = (double)splitPercentage / 100.0;
  442. if (stringPtr[0]==',') { stringPtr++; continue; } else break;
  443. }
  444. if (longCommandWArg(&stringPtr, "shrink")) {
  445. params->shrinkDictMaxRegression = kDefaultRegression;
  446. params->shrinkDict = 1;
  447. if (stringPtr[0]=='=') {
  448. stringPtr++;
  449. params->shrinkDictMaxRegression = readU32FromChar(&stringPtr);
  450. }
  451. if (stringPtr[0]==',') {
  452. stringPtr++;
  453. continue;
  454. }
  455. else break;
  456. }
  457. return 0;
  458. }
  459. if (stringPtr[0] != 0) return 0;
  460. DISPLAYLEVEL(4, "cover: k=%u\nd=%u\nf=%u\nsteps=%u\nsplit=%u\naccel=%u\nshrink=%u\n", params->k, params->d, params->f, params->steps, (unsigned)(params->splitPoint * 100), params->accel, params->shrinkDictMaxRegression);
  461. return 1;
  462. }
  463. /**
  464. * parseLegacyParameters() :
  465. * reads legacy dictionary builder parameters from *stringPtr (e.g. "--train-legacy=selectivity=8") into *selectivity
  466. * @return 1 means that legacy dictionary builder parameters were correct
  467. * @return 0 in case of malformed parameters
  468. */
  469. static unsigned parseLegacyParameters(const char* stringPtr, unsigned* selectivity)
  470. {
  471. if (!longCommandWArg(&stringPtr, "s=") && !longCommandWArg(&stringPtr, "selectivity=")) { return 0; }
  472. *selectivity = readU32FromChar(&stringPtr);
  473. if (stringPtr[0] != 0) return 0;
  474. DISPLAYLEVEL(4, "legacy: selectivity=%u\n", *selectivity);
  475. return 1;
  476. }
  477. static ZDICT_cover_params_t defaultCoverParams(void)
  478. {
  479. ZDICT_cover_params_t params;
  480. memset(&params, 0, sizeof(params));
  481. params.d = 8;
  482. params.steps = 4;
  483. params.splitPoint = 1.0;
  484. params.shrinkDict = 0;
  485. params.shrinkDictMaxRegression = kDefaultRegression;
  486. return params;
  487. }
  488. static ZDICT_fastCover_params_t defaultFastCoverParams(void)
  489. {
  490. ZDICT_fastCover_params_t params;
  491. memset(&params, 0, sizeof(params));
  492. params.d = 8;
  493. params.f = 20;
  494. params.steps = 4;
  495. params.splitPoint = 0.75; /* different from default splitPoint of cover */
  496. params.accel = DEFAULT_ACCEL;
  497. params.shrinkDict = 0;
  498. params.shrinkDictMaxRegression = kDefaultRegression;
  499. return params;
  500. }
  501. #endif
  502. /** parseAdaptParameters() :
  503. * reads adapt parameters from *stringPtr (e.g. "--zstd=min=1,max=19) and store them into adaptMinPtr and adaptMaxPtr.
  504. * Both adaptMinPtr and adaptMaxPtr must be already allocated and correctly initialized.
  505. * There is no guarantee that any of these values will be updated.
  506. * @return 1 means that parsing was successful,
  507. * @return 0 in case of malformed parameters
  508. */
  509. static unsigned parseAdaptParameters(const char* stringPtr, int* adaptMinPtr, int* adaptMaxPtr)
  510. {
  511. for ( ; ;) {
  512. if (longCommandWArg(&stringPtr, "min=")) { *adaptMinPtr = readIntFromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
  513. if (longCommandWArg(&stringPtr, "max=")) { *adaptMaxPtr = readIntFromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
  514. DISPLAYLEVEL(4, "invalid compression parameter \n");
  515. return 0;
  516. }
  517. if (stringPtr[0] != 0) return 0; /* check the end of string */
  518. if (*adaptMinPtr > *adaptMaxPtr) {
  519. DISPLAYLEVEL(4, "incoherent adaptation limits \n");
  520. return 0;
  521. }
  522. return 1;
  523. }
  524. /** parseCompressionParameters() :
  525. * reads compression parameters from *stringPtr (e.g. "--zstd=wlog=23,clog=23,hlog=22,slog=6,mml=3,tlen=48,strat=6") into *params
  526. * @return 1 means that compression parameters were correct
  527. * @return 0 in case of malformed parameters
  528. */
  529. static unsigned parseCompressionParameters(const char* stringPtr, ZSTD_compressionParameters* params)
  530. {
  531. for ( ; ;) {
  532. if (longCommandWArg(&stringPtr, "windowLog=") || longCommandWArg(&stringPtr, "wlog=")) { params->windowLog = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
  533. if (longCommandWArg(&stringPtr, "chainLog=") || longCommandWArg(&stringPtr, "clog=")) { params->chainLog = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
  534. if (longCommandWArg(&stringPtr, "hashLog=") || longCommandWArg(&stringPtr, "hlog=")) { params->hashLog = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
  535. if (longCommandWArg(&stringPtr, "searchLog=") || longCommandWArg(&stringPtr, "slog=")) { params->searchLog = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
  536. if (longCommandWArg(&stringPtr, "minMatch=") || longCommandWArg(&stringPtr, "mml=")) { params->minMatch = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
  537. if (longCommandWArg(&stringPtr, "targetLength=") || longCommandWArg(&stringPtr, "tlen=")) { params->targetLength = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
  538. if (longCommandWArg(&stringPtr, "strategy=") || longCommandWArg(&stringPtr, "strat=")) { params->strategy = (ZSTD_strategy)(readU32FromChar(&stringPtr)); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
  539. if (longCommandWArg(&stringPtr, "overlapLog=") || longCommandWArg(&stringPtr, "ovlog=")) { g_overlapLog = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
  540. if (longCommandWArg(&stringPtr, "ldmHashLog=") || longCommandWArg(&stringPtr, "lhlog=")) { g_ldmHashLog = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
  541. if (longCommandWArg(&stringPtr, "ldmMinMatch=") || longCommandWArg(&stringPtr, "lmml=")) { g_ldmMinMatch = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
  542. if (longCommandWArg(&stringPtr, "ldmBucketSizeLog=") || longCommandWArg(&stringPtr, "lblog=")) { g_ldmBucketSizeLog = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
  543. if (longCommandWArg(&stringPtr, "ldmHashRateLog=") || longCommandWArg(&stringPtr, "lhrlog=")) { g_ldmHashRateLog = readU32FromChar(&stringPtr); if (stringPtr[0]==',') { stringPtr++; continue; } else break; }
  544. DISPLAYLEVEL(4, "invalid compression parameter \n");
  545. return 0;
  546. }
  547. DISPLAYLEVEL(4, "windowLog=%d, chainLog=%d, hashLog=%d, searchLog=%d \n", params->windowLog, params->chainLog, params->hashLog, params->searchLog);
  548. DISPLAYLEVEL(4, "minMatch=%d, targetLength=%d, strategy=%d \n", params->minMatch, params->targetLength, params->strategy);
  549. if (stringPtr[0] != 0) return 0; /* check the end of string */
  550. return 1;
  551. }
  552. static void printVersion(void)
  553. {
  554. if (g_displayLevel < DISPLAY_LEVEL_DEFAULT) {
  555. DISPLAYOUT("%s\n", ZSTD_VERSION_STRING);
  556. return;
  557. }
  558. DISPLAYOUT(WELCOME_MESSAGE);
  559. if (g_displayLevel >= 3) {
  560. /* format support */
  561. DISPLAYOUT("*** supports: zstd");
  562. #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>0) && (ZSTD_LEGACY_SUPPORT<8)
  563. DISPLAYOUT(", zstd legacy v0.%d+", ZSTD_LEGACY_SUPPORT);
  564. #endif
  565. #ifdef ZSTD_GZCOMPRESS
  566. DISPLAYOUT(", gzip");
  567. #endif
  568. #ifdef ZSTD_LZ4COMPRESS
  569. DISPLAYOUT(", lz4");
  570. #endif
  571. #ifdef ZSTD_LZMACOMPRESS
  572. DISPLAYOUT(", lzma, xz ");
  573. #endif
  574. DISPLAYOUT("\n");
  575. if (g_displayLevel >= 4) {
  576. /* posix support */
  577. #ifdef _POSIX_C_SOURCE
  578. DISPLAYOUT("_POSIX_C_SOURCE defined: %ldL\n", (long) _POSIX_C_SOURCE);
  579. #endif
  580. #ifdef _POSIX_VERSION
  581. DISPLAYOUT("_POSIX_VERSION defined: %ldL \n", (long) _POSIX_VERSION);
  582. #endif
  583. #ifdef PLATFORM_POSIX_VERSION
  584. DISPLAYOUT("PLATFORM_POSIX_VERSION defined: %ldL\n", (long) PLATFORM_POSIX_VERSION);
  585. #endif
  586. } }
  587. }
  588. #define ZSTD_NB_STRATEGIES 9
  589. static const char* ZSTD_strategyMap[ZSTD_NB_STRATEGIES + 1] = { "", "ZSTD_fast",
  590. "ZSTD_dfast", "ZSTD_greedy", "ZSTD_lazy", "ZSTD_lazy2", "ZSTD_btlazy2",
  591. "ZSTD_btopt", "ZSTD_btultra", "ZSTD_btultra2"};
  592. #ifndef ZSTD_NOCOMPRESS
  593. static void printDefaultCParams(const char* filename, const char* dictFileName, int cLevel) {
  594. unsigned long long fileSize = UTIL_getFileSize(filename);
  595. const size_t dictSize = dictFileName != NULL ? (size_t)UTIL_getFileSize(dictFileName) : 0;
  596. const ZSTD_compressionParameters cParams = ZSTD_getCParams(cLevel, fileSize, dictSize);
  597. if (fileSize != UTIL_FILESIZE_UNKNOWN) DISPLAY("%s (%u bytes)\n", filename, (unsigned)fileSize);
  598. else DISPLAY("%s (src size unknown)\n", filename);
  599. DISPLAY(" - windowLog : %u\n", cParams.windowLog);
  600. DISPLAY(" - chainLog : %u\n", cParams.chainLog);
  601. DISPLAY(" - hashLog : %u\n", cParams.hashLog);
  602. DISPLAY(" - searchLog : %u\n", cParams.searchLog);
  603. DISPLAY(" - minMatch : %u\n", cParams.minMatch);
  604. DISPLAY(" - targetLength : %u\n", cParams.targetLength);
  605. assert(cParams.strategy < ZSTD_NB_STRATEGIES + 1);
  606. DISPLAY(" - strategy : %s (%u)\n", ZSTD_strategyMap[(int)cParams.strategy], (unsigned)cParams.strategy);
  607. }
  608. static void printActualCParams(const char* filename, const char* dictFileName, int cLevel, const ZSTD_compressionParameters* cParams) {
  609. unsigned long long fileSize = UTIL_getFileSize(filename);
  610. const size_t dictSize = dictFileName != NULL ? (size_t)UTIL_getFileSize(dictFileName) : 0;
  611. ZSTD_compressionParameters actualCParams = ZSTD_getCParams(cLevel, fileSize, dictSize);
  612. assert(g_displayLevel >= 4);
  613. actualCParams.windowLog = cParams->windowLog == 0 ? actualCParams.windowLog : cParams->windowLog;
  614. actualCParams.chainLog = cParams->chainLog == 0 ? actualCParams.chainLog : cParams->chainLog;
  615. actualCParams.hashLog = cParams->hashLog == 0 ? actualCParams.hashLog : cParams->hashLog;
  616. actualCParams.searchLog = cParams->searchLog == 0 ? actualCParams.searchLog : cParams->searchLog;
  617. actualCParams.minMatch = cParams->minMatch == 0 ? actualCParams.minMatch : cParams->minMatch;
  618. actualCParams.targetLength = cParams->targetLength == 0 ? actualCParams.targetLength : cParams->targetLength;
  619. actualCParams.strategy = cParams->strategy == 0 ? actualCParams.strategy : cParams->strategy;
  620. DISPLAY("--zstd=wlog=%d,clog=%d,hlog=%d,slog=%d,mml=%d,tlen=%d,strat=%d\n",
  621. actualCParams.windowLog, actualCParams.chainLog, actualCParams.hashLog, actualCParams.searchLog,
  622. actualCParams.minMatch, actualCParams.targetLength, actualCParams.strategy);
  623. }
  624. #endif
  625. /* Environment variables for parameter setting */
  626. #define ENV_CLEVEL "ZSTD_CLEVEL"
  627. #define ENV_NBTHREADS "ZSTD_NBTHREADS" /* takes lower precedence than directly specifying -T# in the CLI */
  628. /* pick up environment variable */
  629. static int init_cLevel(void) {
  630. const char* const env = getenv(ENV_CLEVEL);
  631. if (env != NULL) {
  632. const char* ptr = env;
  633. int sign = 1;
  634. if (*ptr == '-') {
  635. sign = -1;
  636. ptr++;
  637. } else if (*ptr == '+') {
  638. ptr++;
  639. }
  640. if ((*ptr>='0') && (*ptr<='9')) {
  641. unsigned absLevel;
  642. if (readU32FromCharChecked(&ptr, &absLevel)) {
  643. DISPLAYLEVEL(2, "Ignore environment variable setting %s=%s: numeric value too large \n", ENV_CLEVEL, env);
  644. return ZSTDCLI_CLEVEL_DEFAULT;
  645. } else if (*ptr == 0) {
  646. return sign * (int)absLevel;
  647. } }
  648. DISPLAYLEVEL(2, "Ignore environment variable setting %s=%s: not a valid integer value \n", ENV_CLEVEL, env);
  649. }
  650. return ZSTDCLI_CLEVEL_DEFAULT;
  651. }
  652. #ifdef ZSTD_MULTITHREAD
  653. static unsigned init_nbThreads(void) {
  654. const char* const env = getenv(ENV_NBTHREADS);
  655. if (env != NULL) {
  656. const char* ptr = env;
  657. if ((*ptr>='0') && (*ptr<='9')) {
  658. unsigned nbThreads;
  659. if (readU32FromCharChecked(&ptr, &nbThreads)) {
  660. DISPLAYLEVEL(2, "Ignore environment variable setting %s=%s: numeric value too large \n", ENV_NBTHREADS, env);
  661. return ZSTDCLI_NBTHREADS_DEFAULT;
  662. } else if (*ptr == 0) {
  663. return nbThreads;
  664. }
  665. }
  666. DISPLAYLEVEL(2, "Ignore environment variable setting %s=%s: not a valid unsigned value \n", ENV_NBTHREADS, env);
  667. }
  668. return ZSTDCLI_NBTHREADS_DEFAULT;
  669. }
  670. #endif
  671. #define NEXT_FIELD(ptr) { \
  672. if (*argument == '=') { \
  673. ptr = ++argument; \
  674. argument += strlen(ptr); \
  675. } else { \
  676. argNb++; \
  677. if (argNb >= argCount) { \
  678. DISPLAY("error: missing command argument \n"); \
  679. CLEAN_RETURN(1); \
  680. } \
  681. ptr = argv[argNb]; \
  682. assert(ptr != NULL); \
  683. if (ptr[0]=='-') { \
  684. DISPLAY("error: command cannot be separated from its argument by another command \n"); \
  685. CLEAN_RETURN(1); \
  686. } } }
  687. #define NEXT_UINT32(val32) { \
  688. const char* __nb; \
  689. NEXT_FIELD(__nb); \
  690. val32 = readU32FromChar(&__nb); \
  691. }
  692. typedef enum { zom_compress, zom_decompress, zom_test, zom_bench, zom_train, zom_list } zstd_operation_mode;
  693. #define CLEAN_RETURN(i) { operationResult = (i); goto _end; }
  694. #ifdef ZSTD_NOCOMPRESS
  695. /* symbols from compression library are not defined and should not be invoked */
  696. # define MINCLEVEL -99
  697. # define MAXCLEVEL 22
  698. #else
  699. # define MINCLEVEL ZSTD_minCLevel()
  700. # define MAXCLEVEL ZSTD_maxCLevel()
  701. #endif
  702. int main(int argCount, const char* argv[])
  703. {
  704. int argNb,
  705. followLinks = 0,
  706. allowBlockDevices = 0,
  707. forceStdin = 0,
  708. forceStdout = 0,
  709. hasStdout = 0,
  710. ldmFlag = 0,
  711. main_pause = 0,
  712. nbWorkers = 0,
  713. adapt = 0,
  714. useRowMatchFinder = 0,
  715. adaptMin = MINCLEVEL,
  716. adaptMax = MAXCLEVEL,
  717. rsyncable = 0,
  718. nextArgumentsAreFiles = 0,
  719. operationResult = 0,
  720. separateFiles = 0,
  721. setRealTimePrio = 0,
  722. singleThread = 0,
  723. #ifdef ZSTD_MULTITHREAD
  724. defaultLogicalCores = 0,
  725. #endif
  726. showDefaultCParams = 0,
  727. ultra=0,
  728. contentSize=1;
  729. double compressibility = 0.5;
  730. unsigned bench_nbSeconds = 3; /* would be better if this value was synchronized from bench */
  731. size_t blockSize = 0;
  732. FIO_prefs_t* const prefs = FIO_createPreferences();
  733. FIO_ctx_t* const fCtx = FIO_createContext();
  734. zstd_operation_mode operation = zom_compress;
  735. ZSTD_compressionParameters compressionParams;
  736. int cLevel = init_cLevel();
  737. int cLevelLast = MINCLEVEL - 1; /* lower than minimum */
  738. unsigned recursive = 0;
  739. unsigned memLimit = 0;
  740. FileNamesTable* filenames = UTIL_allocateFileNamesTable((size_t)argCount); /* argCount >= 1 */
  741. FileNamesTable* file_of_names = UTIL_allocateFileNamesTable((size_t)argCount); /* argCount >= 1 */
  742. const char* programName = argv[0];
  743. const char* outFileName = NULL;
  744. const char* outDirName = NULL;
  745. const char* outMirroredDirName = NULL;
  746. const char* dictFileName = NULL;
  747. const char* patchFromDictFileName = NULL;
  748. const char* suffix = ZSTD_EXTENSION;
  749. unsigned maxDictSize = g_defaultMaxDictSize;
  750. unsigned dictID = 0;
  751. size_t streamSrcSize = 0;
  752. size_t targetCBlockSize = 0;
  753. size_t srcSizeHint = 0;
  754. int dictCLevel = g_defaultDictCLevel;
  755. unsigned dictSelect = g_defaultSelectivityLevel;
  756. #ifndef ZSTD_NODICT
  757. ZDICT_cover_params_t coverParams = defaultCoverParams();
  758. ZDICT_fastCover_params_t fastCoverParams = defaultFastCoverParams();
  759. dictType dict = fastCover;
  760. #endif
  761. #ifndef ZSTD_NOBENCH
  762. BMK_advancedParams_t benchParams = BMK_initAdvancedParams();
  763. #endif
  764. ZSTD_paramSwitch_e literalCompressionMode = ZSTD_ps_auto;
  765. /* init */
  766. checkLibVersion();
  767. (void)recursive; (void)cLevelLast; /* not used when ZSTD_NOBENCH set */
  768. (void)memLimit;
  769. assert(argCount >= 1);
  770. if ((filenames==NULL) || (file_of_names==NULL)) { DISPLAY("zstd: allocation error \n"); exit(1); }
  771. programName = lastNameFromPath(programName);
  772. #ifdef ZSTD_MULTITHREAD
  773. nbWorkers = init_nbThreads();
  774. #endif
  775. /* preset behaviors */
  776. if (exeNameMatch(programName, ZSTD_ZSTDMT)) nbWorkers=0, singleThread=0;
  777. if (exeNameMatch(programName, ZSTD_UNZSTD)) operation=zom_decompress;
  778. if (exeNameMatch(programName, ZSTD_CAT)) { operation=zom_decompress; FIO_overwriteMode(prefs); forceStdout=1; followLinks=1; outFileName=stdoutmark; g_displayLevel=1; } /* supports multiple formats */
  779. if (exeNameMatch(programName, ZSTD_ZCAT)) { operation=zom_decompress; FIO_overwriteMode(prefs); forceStdout=1; followLinks=1; outFileName=stdoutmark; g_displayLevel=1; } /* behave like zcat, also supports multiple formats */
  780. if (exeNameMatch(programName, ZSTD_GZ)) { suffix = GZ_EXTENSION; FIO_setCompressionType(prefs, FIO_gzipCompression); FIO_setRemoveSrcFile(prefs, 1); } /* behave like gzip */
  781. if (exeNameMatch(programName, ZSTD_GUNZIP)) { operation=zom_decompress; FIO_setRemoveSrcFile(prefs, 1); } /* behave like gunzip, also supports multiple formats */
  782. if (exeNameMatch(programName, ZSTD_GZCAT)) { operation=zom_decompress; FIO_overwriteMode(prefs); forceStdout=1; followLinks=1; outFileName=stdoutmark; g_displayLevel=1; } /* behave like gzcat, also supports multiple formats */
  783. if (exeNameMatch(programName, ZSTD_LZMA)) { suffix = LZMA_EXTENSION; FIO_setCompressionType(prefs, FIO_lzmaCompression); FIO_setRemoveSrcFile(prefs, 1); } /* behave like lzma */
  784. if (exeNameMatch(programName, ZSTD_UNLZMA)) { operation=zom_decompress; FIO_setCompressionType(prefs, FIO_lzmaCompression); FIO_setRemoveSrcFile(prefs, 1); } /* behave like unlzma, also supports multiple formats */
  785. if (exeNameMatch(programName, ZSTD_XZ)) { suffix = XZ_EXTENSION; FIO_setCompressionType(prefs, FIO_xzCompression); FIO_setRemoveSrcFile(prefs, 1); } /* behave like xz */
  786. if (exeNameMatch(programName, ZSTD_UNXZ)) { operation=zom_decompress; FIO_setCompressionType(prefs, FIO_xzCompression); FIO_setRemoveSrcFile(prefs, 1); } /* behave like unxz, also supports multiple formats */
  787. if (exeNameMatch(programName, ZSTD_LZ4)) { suffix = LZ4_EXTENSION; FIO_setCompressionType(prefs, FIO_lz4Compression); } /* behave like lz4 */
  788. if (exeNameMatch(programName, ZSTD_UNLZ4)) { operation=zom_decompress; FIO_setCompressionType(prefs, FIO_lz4Compression); } /* behave like unlz4, also supports multiple formats */
  789. memset(&compressionParams, 0, sizeof(compressionParams));
  790. /* init crash handler */
  791. FIO_addAbortHandler();
  792. /* command switches */
  793. for (argNb=1; argNb<argCount; argNb++) {
  794. const char* argument = argv[argNb];
  795. if (!argument) continue; /* Protection if argument empty */
  796. if (nextArgumentsAreFiles) {
  797. UTIL_refFilename(filenames, argument);
  798. continue;
  799. }
  800. /* "-" means stdin/stdout */
  801. if (!strcmp(argument, "-")){
  802. UTIL_refFilename(filenames, stdinmark);
  803. continue;
  804. }
  805. /* Decode commands (note : aggregated commands are allowed) */
  806. if (argument[0]=='-') {
  807. if (argument[1]=='-') {
  808. /* long commands (--long-word) */
  809. if (!strcmp(argument, "--")) { nextArgumentsAreFiles=1; continue; } /* only file names allowed from now on */
  810. if (!strcmp(argument, "--list")) { operation=zom_list; continue; }
  811. if (!strcmp(argument, "--compress")) { operation=zom_compress; continue; }
  812. if (!strcmp(argument, "--decompress")) { operation=zom_decompress; continue; }
  813. if (!strcmp(argument, "--uncompress")) { operation=zom_decompress; continue; }
  814. if (!strcmp(argument, "--force")) { FIO_overwriteMode(prefs); forceStdin=1; forceStdout=1; followLinks=1; allowBlockDevices=1; continue; }
  815. if (!strcmp(argument, "--version")) { printVersion(); CLEAN_RETURN(0); }
  816. if (!strcmp(argument, "--help")) { usage_advanced(programName); CLEAN_RETURN(0); }
  817. if (!strcmp(argument, "--verbose")) { g_displayLevel++; continue; }
  818. if (!strcmp(argument, "--quiet")) { g_displayLevel--; continue; }
  819. if (!strcmp(argument, "--stdout")) { forceStdout=1; outFileName=stdoutmark; g_displayLevel-=(g_displayLevel==2); continue; }
  820. if (!strcmp(argument, "--ultra")) { ultra=1; continue; }
  821. if (!strcmp(argument, "--check")) { FIO_setChecksumFlag(prefs, 2); continue; }
  822. if (!strcmp(argument, "--no-check")) { FIO_setChecksumFlag(prefs, 0); continue; }
  823. if (!strcmp(argument, "--sparse")) { FIO_setSparseWrite(prefs, 2); continue; }
  824. if (!strcmp(argument, "--no-sparse")) { FIO_setSparseWrite(prefs, 0); continue; }
  825. if (!strcmp(argument, "--test")) { operation=zom_test; continue; }
  826. if (!strcmp(argument, "--train")) { operation=zom_train; if (outFileName==NULL) outFileName=g_defaultDictName; continue; }
  827. if (!strcmp(argument, "--no-dictID")) { FIO_setDictIDFlag(prefs, 0); continue; }
  828. if (!strcmp(argument, "--keep")) { FIO_setRemoveSrcFile(prefs, 0); continue; }
  829. if (!strcmp(argument, "--rm")) { FIO_setRemoveSrcFile(prefs, 1); continue; }
  830. if (!strcmp(argument, "--priority=rt")) { setRealTimePrio = 1; continue; }
  831. if (!strcmp(argument, "--show-default-cparams")) { showDefaultCParams = 1; continue; }
  832. if (!strcmp(argument, "--content-size")) { contentSize = 1; continue; }
  833. if (!strcmp(argument, "--no-content-size")) { contentSize = 0; continue; }
  834. if (!strcmp(argument, "--adapt")) { adapt = 1; continue; }
  835. if (!strcmp(argument, "--no-row-match-finder")) { useRowMatchFinder = 1; continue; }
  836. if (!strcmp(argument, "--row-match-finder")) { useRowMatchFinder = 2; continue; }
  837. if (longCommandWArg(&argument, "--adapt=")) { adapt = 1; if (!parseAdaptParameters(argument, &adaptMin, &adaptMax)) { badusage(programName); CLEAN_RETURN(1); } continue; }
  838. if (!strcmp(argument, "--single-thread")) { nbWorkers = 0; singleThread = 1; continue; }
  839. if (!strcmp(argument, "--format=zstd")) { suffix = ZSTD_EXTENSION; FIO_setCompressionType(prefs, FIO_zstdCompression); continue; }
  840. #ifdef ZSTD_GZCOMPRESS
  841. if (!strcmp(argument, "--format=gzip")) { suffix = GZ_EXTENSION; FIO_setCompressionType(prefs, FIO_gzipCompression); continue; }
  842. #endif
  843. #ifdef ZSTD_LZMACOMPRESS
  844. if (!strcmp(argument, "--format=lzma")) { suffix = LZMA_EXTENSION; FIO_setCompressionType(prefs, FIO_lzmaCompression); continue; }
  845. if (!strcmp(argument, "--format=xz")) { suffix = XZ_EXTENSION; FIO_setCompressionType(prefs, FIO_xzCompression); continue; }
  846. #endif
  847. #ifdef ZSTD_LZ4COMPRESS
  848. if (!strcmp(argument, "--format=lz4")) { suffix = LZ4_EXTENSION; FIO_setCompressionType(prefs, FIO_lz4Compression); continue; }
  849. #endif
  850. if (!strcmp(argument, "--rsyncable")) { rsyncable = 1; continue; }
  851. if (!strcmp(argument, "--compress-literals")) { literalCompressionMode = ZSTD_ps_enable; continue; }
  852. if (!strcmp(argument, "--no-compress-literals")) { literalCompressionMode = ZSTD_ps_disable; continue; }
  853. if (!strcmp(argument, "--no-progress")) { FIO_setProgressSetting(FIO_ps_never); continue; }
  854. if (!strcmp(argument, "--progress")) { FIO_setProgressSetting(FIO_ps_always); continue; }
  855. if (!strcmp(argument, "--exclude-compressed")) { FIO_setExcludeCompressedFile(prefs, 1); continue; }
  856. /* long commands with arguments */
  857. #ifndef ZSTD_NODICT
  858. if (longCommandWArg(&argument, "--train-cover")) {
  859. operation = zom_train;
  860. if (outFileName == NULL)
  861. outFileName = g_defaultDictName;
  862. dict = cover;
  863. /* Allow optional arguments following an = */
  864. if (*argument == 0) { memset(&coverParams, 0, sizeof(coverParams)); }
  865. else if (*argument++ != '=') { badusage(programName); CLEAN_RETURN(1); }
  866. else if (!parseCoverParameters(argument, &coverParams)) { badusage(programName); CLEAN_RETURN(1); }
  867. continue;
  868. }
  869. if (longCommandWArg(&argument, "--train-fastcover")) {
  870. operation = zom_train;
  871. if (outFileName == NULL)
  872. outFileName = g_defaultDictName;
  873. dict = fastCover;
  874. /* Allow optional arguments following an = */
  875. if (*argument == 0) { memset(&fastCoverParams, 0, sizeof(fastCoverParams)); }
  876. else if (*argument++ != '=') { badusage(programName); CLEAN_RETURN(1); }
  877. else if (!parseFastCoverParameters(argument, &fastCoverParams)) { badusage(programName); CLEAN_RETURN(1); }
  878. continue;
  879. }
  880. if (longCommandWArg(&argument, "--train-legacy")) {
  881. operation = zom_train;
  882. if (outFileName == NULL)
  883. outFileName = g_defaultDictName;
  884. dict = legacy;
  885. /* Allow optional arguments following an = */
  886. if (*argument == 0) { continue; }
  887. else if (*argument++ != '=') { badusage(programName); CLEAN_RETURN(1); }
  888. else if (!parseLegacyParameters(argument, &dictSelect)) { badusage(programName); CLEAN_RETURN(1); }
  889. continue;
  890. }
  891. #endif
  892. if (longCommandWArg(&argument, "--threads")) { NEXT_UINT32(nbWorkers); continue; }
  893. if (longCommandWArg(&argument, "--memlimit")) { NEXT_UINT32(memLimit); continue; }
  894. if (longCommandWArg(&argument, "--memory")) { NEXT_UINT32(memLimit); continue; }
  895. if (longCommandWArg(&argument, "--memlimit-decompress")) { NEXT_UINT32(memLimit); continue; }
  896. if (longCommandWArg(&argument, "--block-size=")) { blockSize = readSizeTFromChar(&argument); continue; }
  897. if (longCommandWArg(&argument, "--maxdict")) { NEXT_UINT32(maxDictSize); continue; }
  898. if (longCommandWArg(&argument, "--dictID")) { NEXT_UINT32(dictID); continue; }
  899. if (longCommandWArg(&argument, "--zstd=")) { if (!parseCompressionParameters(argument, &compressionParams)) { badusage(programName); CLEAN_RETURN(1); } continue; }
  900. if (longCommandWArg(&argument, "--stream-size=")) { streamSrcSize = readSizeTFromChar(&argument); continue; }
  901. if (longCommandWArg(&argument, "--target-compressed-block-size=")) { targetCBlockSize = readSizeTFromChar(&argument); continue; }
  902. if (longCommandWArg(&argument, "--size-hint=")) { srcSizeHint = readSizeTFromChar(&argument); continue; }
  903. if (longCommandWArg(&argument, "--output-dir-flat")) { NEXT_FIELD(outDirName); continue; }
  904. #ifdef ZSTD_MULTITHREAD
  905. if (longCommandWArg(&argument, "--auto-threads")) {
  906. const char* threadDefault = NULL;
  907. NEXT_FIELD(threadDefault);
  908. if (strcmp(threadDefault, "logical") == 0)
  909. defaultLogicalCores = 1;
  910. continue;
  911. }
  912. #endif
  913. #ifdef UTIL_HAS_MIRRORFILELIST
  914. if (longCommandWArg(&argument, "--output-dir-mirror")) { NEXT_FIELD(outMirroredDirName); continue; }
  915. #endif
  916. #ifndef ZSTD_NOTRACE
  917. if (longCommandWArg(&argument, "--trace")) { char const* traceFile; NEXT_FIELD(traceFile); TRACE_enable(traceFile); continue; }
  918. #endif
  919. if (longCommandWArg(&argument, "--patch-from")) { NEXT_FIELD(patchFromDictFileName); continue; }
  920. if (longCommandWArg(&argument, "--long")) {
  921. unsigned ldmWindowLog = 0;
  922. ldmFlag = 1;
  923. /* Parse optional window log */
  924. if (*argument == '=') {
  925. ++argument;
  926. ldmWindowLog = readU32FromChar(&argument);
  927. } else if (*argument != 0) {
  928. /* Invalid character following --long */
  929. badusage(programName);
  930. CLEAN_RETURN(1);
  931. }
  932. /* Only set windowLog if not already set by --zstd */
  933. if (compressionParams.windowLog == 0)
  934. compressionParams.windowLog = ldmWindowLog;
  935. continue;
  936. }
  937. #ifndef ZSTD_NOCOMPRESS /* linking ZSTD_minCLevel() requires compression support */
  938. if (longCommandWArg(&argument, "--fast")) {
  939. /* Parse optional acceleration factor */
  940. if (*argument == '=') {
  941. U32 const maxFast = (U32)-ZSTD_minCLevel();
  942. U32 fastLevel;
  943. ++argument;
  944. fastLevel = readU32FromChar(&argument);
  945. if (fastLevel > maxFast) fastLevel = maxFast;
  946. if (fastLevel) {
  947. dictCLevel = cLevel = -(int)fastLevel;
  948. } else {
  949. badusage(programName);
  950. CLEAN_RETURN(1);
  951. }
  952. } else if (*argument != 0) {
  953. /* Invalid character following --fast */
  954. badusage(programName);
  955. CLEAN_RETURN(1);
  956. } else {
  957. cLevel = -1; /* default for --fast */
  958. }
  959. continue;
  960. }
  961. #endif
  962. if (longCommandWArg(&argument, "--filelist")) {
  963. const char* listName;
  964. NEXT_FIELD(listName);
  965. UTIL_refFilename(file_of_names, listName);
  966. continue;
  967. }
  968. /* fall-through, will trigger bad_usage() later on */
  969. }
  970. argument++;
  971. while (argument[0]!=0) {
  972. #ifndef ZSTD_NOCOMPRESS
  973. /* compression Level */
  974. if ((*argument>='0') && (*argument<='9')) {
  975. dictCLevel = cLevel = (int)readU32FromChar(&argument);
  976. continue;
  977. }
  978. #endif
  979. switch(argument[0])
  980. {
  981. /* Display help */
  982. case 'V': printVersion(); CLEAN_RETURN(0); /* Version Only */
  983. case 'H':
  984. case 'h': usage_advanced(programName); CLEAN_RETURN(0);
  985. /* Compress */
  986. case 'z': operation=zom_compress; argument++; break;
  987. /* Decoding */
  988. case 'd':
  989. #ifndef ZSTD_NOBENCH
  990. benchParams.mode = BMK_decodeOnly;
  991. if (operation==zom_bench) { argument++; break; } /* benchmark decode (hidden option) */
  992. #endif
  993. operation=zom_decompress; argument++; break;
  994. /* Force stdout, even if stdout==console */
  995. case 'c': forceStdout=1; outFileName=stdoutmark; argument++; break;
  996. /* Use file content as dictionary */
  997. case 'D': argument++; NEXT_FIELD(dictFileName); break;
  998. /* Overwrite */
  999. case 'f': FIO_overwriteMode(prefs); forceStdin=1; forceStdout=1; followLinks=1; allowBlockDevices=1; argument++; break;
  1000. /* Verbose mode */
  1001. case 'v': g_displayLevel++; argument++; break;
  1002. /* Quiet mode */
  1003. case 'q': g_displayLevel--; argument++; break;
  1004. /* keep source file (default) */
  1005. case 'k': FIO_setRemoveSrcFile(prefs, 0); argument++; break;
  1006. /* Checksum */
  1007. case 'C': FIO_setChecksumFlag(prefs, 2); argument++; break;
  1008. /* test compressed file */
  1009. case 't': operation=zom_test; argument++; break;
  1010. /* destination file name */
  1011. case 'o': argument++; NEXT_FIELD(outFileName); break;
  1012. /* limit memory */
  1013. case 'M':
  1014. argument++;
  1015. memLimit = readU32FromChar(&argument);
  1016. break;
  1017. case 'l': operation=zom_list; argument++; break;
  1018. #ifdef UTIL_HAS_CREATEFILELIST
  1019. /* recursive */
  1020. case 'r': recursive=1; argument++; break;
  1021. #endif
  1022. #ifndef ZSTD_NOBENCH
  1023. /* Benchmark */
  1024. case 'b':
  1025. operation=zom_bench;
  1026. argument++;
  1027. break;
  1028. /* range bench (benchmark only) */
  1029. case 'e':
  1030. /* compression Level */
  1031. argument++;
  1032. cLevelLast = (int)readU32FromChar(&argument);
  1033. break;
  1034. /* Modify Nb Iterations (benchmark only) */
  1035. case 'i':
  1036. argument++;
  1037. bench_nbSeconds = readU32FromChar(&argument);
  1038. break;
  1039. /* cut input into blocks (benchmark only) */
  1040. case 'B':
  1041. argument++;
  1042. blockSize = readU32FromChar(&argument);
  1043. break;
  1044. /* benchmark files separately (hidden option) */
  1045. case 'S':
  1046. argument++;
  1047. separateFiles = 1;
  1048. break;
  1049. #endif /* ZSTD_NOBENCH */
  1050. /* nb of threads (hidden option) */
  1051. case 'T':
  1052. argument++;
  1053. nbWorkers = (int)readU32FromChar(&argument);
  1054. break;
  1055. /* Dictionary Selection level */
  1056. case 's':
  1057. argument++;
  1058. dictSelect = readU32FromChar(&argument);
  1059. break;
  1060. /* Pause at the end (-p) or set an additional param (-p#) (hidden option) */
  1061. case 'p': argument++;
  1062. #ifndef ZSTD_NOBENCH
  1063. if ((*argument>='0') && (*argument<='9')) {
  1064. benchParams.additionalParam = (int)readU32FromChar(&argument);
  1065. } else
  1066. #endif
  1067. main_pause=1;
  1068. break;
  1069. /* Select compressibility of synthetic sample */
  1070. case 'P':
  1071. argument++;
  1072. compressibility = (double)readU32FromChar(&argument) / 100;
  1073. break;
  1074. /* unknown command */
  1075. default : badusage(programName); CLEAN_RETURN(1);
  1076. }
  1077. }
  1078. continue;
  1079. } /* if (argument[0]=='-') */
  1080. /* none of the above : add filename to list */
  1081. UTIL_refFilename(filenames, argument);
  1082. }
  1083. /* Welcome message (if verbose) */
  1084. DISPLAYLEVEL(3, WELCOME_MESSAGE);
  1085. #ifdef ZSTD_MULTITHREAD
  1086. if ((nbWorkers==0) && (!singleThread)) {
  1087. /* automatically set # workers based on # of reported cpus */
  1088. if (defaultLogicalCores) {
  1089. nbWorkers = UTIL_countLogicalCores();
  1090. DISPLAYLEVEL(3, "Note: %d logical core(s) detected \n", nbWorkers);
  1091. } else {
  1092. nbWorkers = UTIL_countPhysicalCores();
  1093. DISPLAYLEVEL(3, "Note: %d physical core(s) detected \n", nbWorkers);
  1094. }
  1095. }
  1096. #else
  1097. (void)singleThread; (void)nbWorkers;
  1098. #endif
  1099. g_utilDisplayLevel = g_displayLevel;
  1100. #ifdef UTIL_HAS_CREATEFILELIST
  1101. if (!followLinks) {
  1102. unsigned u, fileNamesNb;
  1103. unsigned const nbFilenames = (unsigned)filenames->tableSize;
  1104. for (u=0, fileNamesNb=0; u<nbFilenames; u++) {
  1105. if ( UTIL_isLink(filenames->fileNames[u])
  1106. && !UTIL_isFIFO(filenames->fileNames[u])
  1107. ) {
  1108. DISPLAYLEVEL(2, "Warning : %s is a symbolic link, ignoring \n", filenames->fileNames[u]);
  1109. } else {
  1110. filenames->fileNames[fileNamesNb++] = filenames->fileNames[u];
  1111. } }
  1112. if (fileNamesNb == 0 && nbFilenames > 0) /* all names are eliminated */
  1113. CLEAN_RETURN(1);
  1114. filenames->tableSize = fileNamesNb;
  1115. } /* if (!followLinks) */
  1116. /* read names from a file */
  1117. if (file_of_names->tableSize) {
  1118. size_t const nbFileLists = file_of_names->tableSize;
  1119. size_t flNb;
  1120. for (flNb=0; flNb < nbFileLists; flNb++) {
  1121. FileNamesTable* const fnt = UTIL_createFileNamesTable_fromFileName(file_of_names->fileNames[flNb]);
  1122. if (fnt==NULL) {
  1123. DISPLAYLEVEL(1, "zstd: error reading %s \n", file_of_names->fileNames[flNb]);
  1124. CLEAN_RETURN(1);
  1125. }
  1126. filenames = UTIL_mergeFileNamesTable(filenames, fnt);
  1127. }
  1128. }
  1129. if (recursive) { /* at this stage, filenameTable is a list of paths, which can contain both files and directories */
  1130. UTIL_expandFNT(&filenames, followLinks);
  1131. }
  1132. #else
  1133. (void)followLinks;
  1134. #endif
  1135. if (operation == zom_list) {
  1136. #ifndef ZSTD_NODECOMPRESS
  1137. int const ret = FIO_listMultipleFiles((unsigned)filenames->tableSize, filenames->fileNames, g_displayLevel);
  1138. CLEAN_RETURN(ret);
  1139. #else
  1140. DISPLAY("file information is not supported \n");
  1141. CLEAN_RETURN(1);
  1142. #endif
  1143. }
  1144. /* Check if benchmark is selected */
  1145. if (operation==zom_bench) {
  1146. #ifndef ZSTD_NOBENCH
  1147. benchParams.blockSize = blockSize;
  1148. benchParams.nbWorkers = nbWorkers;
  1149. benchParams.realTime = (unsigned)setRealTimePrio;
  1150. benchParams.nbSeconds = bench_nbSeconds;
  1151. benchParams.ldmFlag = ldmFlag;
  1152. benchParams.ldmMinMatch = (int)g_ldmMinMatch;
  1153. benchParams.ldmHashLog = (int)g_ldmHashLog;
  1154. benchParams.useRowMatchFinder = useRowMatchFinder;
  1155. if (g_ldmBucketSizeLog != LDM_PARAM_DEFAULT) {
  1156. benchParams.ldmBucketSizeLog = (int)g_ldmBucketSizeLog;
  1157. }
  1158. if (g_ldmHashRateLog != LDM_PARAM_DEFAULT) {
  1159. benchParams.ldmHashRateLog = (int)g_ldmHashRateLog;
  1160. }
  1161. benchParams.literalCompressionMode = literalCompressionMode;
  1162. if (cLevel > ZSTD_maxCLevel()) cLevel = ZSTD_maxCLevel();
  1163. if (cLevelLast > ZSTD_maxCLevel()) cLevelLast = ZSTD_maxCLevel();
  1164. if (cLevelLast < cLevel) cLevelLast = cLevel;
  1165. if (cLevelLast > cLevel)
  1166. DISPLAYLEVEL(3, "Benchmarking levels from %d to %d\n", cLevel, cLevelLast);
  1167. if (filenames->tableSize > 0) {
  1168. if(separateFiles) {
  1169. unsigned i;
  1170. for(i = 0; i < filenames->tableSize; i++) {
  1171. int c;
  1172. DISPLAYLEVEL(3, "Benchmarking %s \n", filenames->fileNames[i]);
  1173. for(c = cLevel; c <= cLevelLast; c++) {
  1174. BMK_benchFilesAdvanced(&filenames->fileNames[i], 1, dictFileName, c, &compressionParams, g_displayLevel, &benchParams);
  1175. } }
  1176. } else {
  1177. for(; cLevel <= cLevelLast; cLevel++) {
  1178. BMK_benchFilesAdvanced(filenames->fileNames, (unsigned)filenames->tableSize, dictFileName, cLevel, &compressionParams, g_displayLevel, &benchParams);
  1179. } }
  1180. } else {
  1181. for(; cLevel <= cLevelLast; cLevel++) {
  1182. BMK_syntheticTest(cLevel, compressibility, &compressionParams, g_displayLevel, &benchParams);
  1183. } }
  1184. #else
  1185. (void)bench_nbSeconds; (void)blockSize; (void)setRealTimePrio; (void)separateFiles; (void)compressibility;
  1186. #endif
  1187. goto _end;
  1188. }
  1189. /* Check if dictionary builder is selected */
  1190. if (operation==zom_train) {
  1191. #ifndef ZSTD_NODICT
  1192. ZDICT_params_t zParams;
  1193. zParams.compressionLevel = dictCLevel;
  1194. zParams.notificationLevel = (unsigned)g_displayLevel;
  1195. zParams.dictID = dictID;
  1196. if (dict == cover) {
  1197. int const optimize = !coverParams.k || !coverParams.d;
  1198. coverParams.nbThreads = (unsigned)nbWorkers;
  1199. coverParams.zParams = zParams;
  1200. operationResult = DiB_trainFromFiles(outFileName, maxDictSize, filenames->fileNames, (int)filenames->tableSize, blockSize, NULL, &coverParams, NULL, optimize, memLimit);
  1201. } else if (dict == fastCover) {
  1202. int const optimize = !fastCoverParams.k || !fastCoverParams.d;
  1203. fastCoverParams.nbThreads = (unsigned)nbWorkers;
  1204. fastCoverParams.zParams = zParams;
  1205. operationResult = DiB_trainFromFiles(outFileName, maxDictSize, filenames->fileNames, (int)filenames->tableSize, blockSize, NULL, NULL, &fastCoverParams, optimize, memLimit);
  1206. } else {
  1207. ZDICT_legacy_params_t dictParams;
  1208. memset(&dictParams, 0, sizeof(dictParams));
  1209. dictParams.selectivityLevel = dictSelect;
  1210. dictParams.zParams = zParams;
  1211. operationResult = DiB_trainFromFiles(outFileName, maxDictSize, filenames->fileNames, (int)filenames->tableSize, blockSize, &dictParams, NULL, NULL, 0, memLimit);
  1212. }
  1213. #else
  1214. (void)dictCLevel; (void)dictSelect; (void)dictID; (void)maxDictSize; /* not used when ZSTD_NODICT set */
  1215. DISPLAYLEVEL(1, "training mode not available \n");
  1216. operationResult = 1;
  1217. #endif
  1218. goto _end;
  1219. }
  1220. #ifndef ZSTD_NODECOMPRESS
  1221. if (operation==zom_test) { FIO_setTestMode(prefs, 1); outFileName=nulmark; FIO_setRemoveSrcFile(prefs, 0); } /* test mode */
  1222. #endif
  1223. /* No input filename ==> use stdin and stdout */
  1224. if (filenames->tableSize == 0) UTIL_refFilename(filenames, stdinmark);
  1225. if (!strcmp(filenames->fileNames[0], stdinmark) && !outFileName)
  1226. outFileName = stdoutmark; /* when input is stdin, default output is stdout */
  1227. /* Check if input/output defined as console; trigger an error in this case */
  1228. if (!forceStdin
  1229. && !strcmp(filenames->fileNames[0], stdinmark)
  1230. && IS_CONSOLE(stdin) ) {
  1231. DISPLAYLEVEL(1, "stdin is a console, aborting\n");
  1232. CLEAN_RETURN(1);
  1233. }
  1234. if ( outFileName && !strcmp(outFileName, stdoutmark)
  1235. && IS_CONSOLE(stdout)
  1236. && !strcmp(filenames->fileNames[0], stdinmark)
  1237. && !forceStdout
  1238. && operation!=zom_decompress ) {
  1239. DISPLAYLEVEL(1, "stdout is a console, aborting\n");
  1240. CLEAN_RETURN(1);
  1241. }
  1242. #ifndef ZSTD_NOCOMPRESS
  1243. /* check compression level limits */
  1244. { int const maxCLevel = ultra ? ZSTD_maxCLevel() : ZSTDCLI_CLEVEL_MAX;
  1245. if (cLevel > maxCLevel) {
  1246. DISPLAYLEVEL(2, "Warning : compression level higher than max, reduced to %i \n", maxCLevel);
  1247. cLevel = maxCLevel;
  1248. } }
  1249. #endif
  1250. if (showDefaultCParams) {
  1251. if (operation == zom_decompress) {
  1252. DISPLAY("error : can't use --show-default-cparams in decomrpession mode \n");
  1253. CLEAN_RETURN(1);
  1254. }
  1255. }
  1256. if (dictFileName != NULL && patchFromDictFileName != NULL) {
  1257. DISPLAY("error : can't use -D and --patch-from=# at the same time \n");
  1258. CLEAN_RETURN(1);
  1259. }
  1260. if (patchFromDictFileName != NULL && filenames->tableSize > 1) {
  1261. DISPLAY("error : can't use --patch-from=# on multiple files \n");
  1262. CLEAN_RETURN(1);
  1263. }
  1264. /* No status message in pipe mode (stdin - stdout) */
  1265. hasStdout = outFileName && !strcmp(outFileName,stdoutmark);
  1266. if ((hasStdout || !IS_CONSOLE(stderr)) && (g_displayLevel==2)) g_displayLevel=1;
  1267. /* IO Stream/File */
  1268. FIO_setHasStdoutOutput(fCtx, hasStdout);
  1269. FIO_setNbFilesTotal(fCtx, (int)filenames->tableSize);
  1270. FIO_determineHasStdinInput(fCtx, filenames);
  1271. FIO_setNotificationLevel(g_displayLevel);
  1272. FIO_setAllowBlockDevices(prefs, allowBlockDevices);
  1273. FIO_setPatchFromMode(prefs, patchFromDictFileName != NULL);
  1274. if (memLimit == 0) {
  1275. if (compressionParams.windowLog == 0) {
  1276. memLimit = (U32)1 << g_defaultMaxWindowLog;
  1277. } else {
  1278. memLimit = (U32)1 << (compressionParams.windowLog & 31);
  1279. } }
  1280. if (patchFromDictFileName != NULL)
  1281. dictFileName = patchFromDictFileName;
  1282. FIO_setMemLimit(prefs, memLimit);
  1283. if (operation==zom_compress) {
  1284. #ifndef ZSTD_NOCOMPRESS
  1285. FIO_setContentSize(prefs, contentSize);
  1286. FIO_setNbWorkers(prefs, nbWorkers);
  1287. FIO_setBlockSize(prefs, (int)blockSize);
  1288. if (g_overlapLog!=OVERLAP_LOG_DEFAULT) FIO_setOverlapLog(prefs, (int)g_overlapLog);
  1289. FIO_setLdmFlag(prefs, (unsigned)ldmFlag);
  1290. FIO_setLdmHashLog(prefs, (int)g_ldmHashLog);
  1291. FIO_setLdmMinMatch(prefs, (int)g_ldmMinMatch);
  1292. if (g_ldmBucketSizeLog != LDM_PARAM_DEFAULT) FIO_setLdmBucketSizeLog(prefs, (int)g_ldmBucketSizeLog);
  1293. if (g_ldmHashRateLog != LDM_PARAM_DEFAULT) FIO_setLdmHashRateLog(prefs, (int)g_ldmHashRateLog);
  1294. FIO_setAdaptiveMode(prefs, (unsigned)adapt);
  1295. FIO_setUseRowMatchFinder(prefs, useRowMatchFinder);
  1296. FIO_setAdaptMin(prefs, adaptMin);
  1297. FIO_setAdaptMax(prefs, adaptMax);
  1298. FIO_setRsyncable(prefs, rsyncable);
  1299. FIO_setStreamSrcSize(prefs, streamSrcSize);
  1300. FIO_setTargetCBlockSize(prefs, targetCBlockSize);
  1301. FIO_setSrcSizeHint(prefs, srcSizeHint);
  1302. FIO_setLiteralCompressionMode(prefs, literalCompressionMode);
  1303. if (adaptMin > cLevel) cLevel = adaptMin;
  1304. if (adaptMax < cLevel) cLevel = adaptMax;
  1305. /* Compare strategies constant with the ground truth */
  1306. { ZSTD_bounds strategyBounds = ZSTD_cParam_getBounds(ZSTD_c_strategy);
  1307. assert(ZSTD_NB_STRATEGIES == strategyBounds.upperBound);
  1308. (void)strategyBounds; }
  1309. if (showDefaultCParams || g_displayLevel >= 4) {
  1310. size_t fileNb;
  1311. for (fileNb = 0; fileNb < (size_t)filenames->tableSize; fileNb++) {
  1312. if (showDefaultCParams)
  1313. printDefaultCParams(filenames->fileNames[fileNb], dictFileName, cLevel);
  1314. if (g_displayLevel >= 4)
  1315. printActualCParams(filenames->fileNames[fileNb], dictFileName, cLevel, &compressionParams);
  1316. }
  1317. }
  1318. if (g_displayLevel >= 4)
  1319. FIO_displayCompressionParameters(prefs);
  1320. if ((filenames->tableSize==1) && outFileName)
  1321. operationResult = FIO_compressFilename(fCtx, prefs, outFileName, filenames->fileNames[0], dictFileName, cLevel, compressionParams);
  1322. else
  1323. operationResult = FIO_compressMultipleFilenames(fCtx, prefs, filenames->fileNames, outMirroredDirName, outDirName, outFileName, suffix, dictFileName, cLevel, compressionParams);
  1324. #else
  1325. (void)contentSize; (void)suffix; (void)adapt; (void)rsyncable; (void)ultra; (void)cLevel; (void)ldmFlag; (void)literalCompressionMode; (void)targetCBlockSize; (void)streamSrcSize; (void)srcSizeHint; (void)ZSTD_strategyMap; (void)useRowMatchFinder; /* not used when ZSTD_NOCOMPRESS set */
  1326. DISPLAY("Compression not supported \n");
  1327. #endif
  1328. } else { /* decompression or test */
  1329. #ifndef ZSTD_NODECOMPRESS
  1330. if (filenames->tableSize == 1 && outFileName) {
  1331. operationResult = FIO_decompressFilename(fCtx, prefs, outFileName, filenames->fileNames[0], dictFileName);
  1332. } else {
  1333. operationResult = FIO_decompressMultipleFilenames(fCtx, prefs, filenames->fileNames, outMirroredDirName, outDirName, outFileName, dictFileName);
  1334. }
  1335. #else
  1336. DISPLAY("Decompression not supported \n");
  1337. #endif
  1338. }
  1339. _end:
  1340. FIO_freePreferences(prefs);
  1341. FIO_freeContext(fCtx);
  1342. if (main_pause) waitEnter();
  1343. UTIL_freeFileNamesTable(filenames);
  1344. UTIL_freeFileNamesTable(file_of_names);
  1345. #ifndef ZSTD_NOTRACE
  1346. TRACE_finish();
  1347. #endif
  1348. return operationResult;
  1349. }