fileio.c 127 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221
  1. /*
  2. * Copyright (c) Yann Collet, Facebook, Inc.
  3. * All rights reserved.
  4. *
  5. * This source code is licensed under both the BSD-style license (found in the
  6. * LICENSE file in the root directory of this source tree) and the GPLv2 (found
  7. * in the COPYING file in the root directory of this source tree).
  8. * You may select, at your option, one of the above-listed licenses.
  9. */
  10. /* *************************************
  11. * Compiler Options
  12. ***************************************/
  13. #ifdef _MSC_VER /* Visual */
  14. # pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
  15. # pragma warning(disable : 4204) /* non-constant aggregate initializer */
  16. #endif
  17. #if defined(__MINGW32__) && !defined(_POSIX_SOURCE)
  18. # define _POSIX_SOURCE 1 /* disable %llu warnings with MinGW on Windows */
  19. #endif
  20. /*-*************************************
  21. * Includes
  22. ***************************************/
  23. #include "platform.h" /* Large Files support, SET_BINARY_MODE */
  24. #include "util.h" /* UTIL_getFileSize, UTIL_isRegularFile, UTIL_isSameFile */
  25. #include <stdio.h> /* fprintf, open, fdopen, fread, _fileno, stdin, stdout */
  26. #include <stdlib.h> /* malloc, free */
  27. #include <string.h> /* strcmp, strlen */
  28. #include <fcntl.h> /* O_WRONLY */
  29. #include <assert.h>
  30. #include <errno.h> /* errno */
  31. #include <limits.h> /* INT_MAX */
  32. #include <signal.h>
  33. #include "timefn.h" /* UTIL_getTime, UTIL_clockSpanMicro */
  34. #if defined (_MSC_VER)
  35. # include <sys/stat.h>
  36. # include <io.h>
  37. #endif
  38. #include "../lib/common/mem.h" /* U32, U64 */
  39. #include "fileio.h"
  40. #define ZSTD_STATIC_LINKING_ONLY /* ZSTD_magicNumber, ZSTD_frameHeaderSize_max */
  41. #include "../lib/zstd.h"
  42. #include "../lib/zstd_errors.h" /* ZSTD_error_frameParameter_windowTooLarge */
  43. #if defined(ZSTD_GZCOMPRESS) || defined(ZSTD_GZDECOMPRESS)
  44. # error #include <zlib.h>
  45. # if !defined(z_const)
  46. # define z_const
  47. # endif
  48. #endif
  49. #if defined(ZSTD_LZMACOMPRESS) || defined(ZSTD_LZMADECOMPRESS)
  50. # error #include <lzma.h>
  51. #endif
  52. #define LZ4_MAGICNUMBER 0x184D2204
  53. #if defined(ZSTD_LZ4COMPRESS) || defined(ZSTD_LZ4DECOMPRESS)
  54. # define LZ4F_ENABLE_OBSOLETE_ENUMS
  55. # error #include <lz4frame.h>
  56. # error #include <lz4.h>
  57. #endif
  58. /*-*************************************
  59. * Constants
  60. ***************************************/
  61. #define ADAPT_WINDOWLOG_DEFAULT 23 /* 8 MB */
  62. #define DICTSIZE_MAX (32 MB) /* protection against large input (attack scenario) */
  63. #define FNSPACE 30
  64. /* Default file permissions 0666 (modulated by umask) */
  65. #if !defined(_WIN32)
  66. /* These macros aren't defined on windows. */
  67. #define DEFAULT_FILE_PERMISSIONS (S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH)
  68. #else
  69. #define DEFAULT_FILE_PERMISSIONS (0666)
  70. #endif
  71. /*-*************************************
  72. * Macros
  73. ***************************************/
  74. #define KB *(1 <<10)
  75. #define MB *(1 <<20)
  76. #define GB *(1U<<30)
  77. #undef MAX
  78. #define MAX(a,b) ((a)>(b) ? (a) : (b))
  79. struct FIO_display_prefs_s {
  80. int displayLevel; /* 0 : no display; 1: errors; 2: + result + interaction + warnings; 3: + progression; 4: + information */
  81. FIO_progressSetting_e progressSetting;
  82. };
  83. static FIO_display_prefs_t g_display_prefs = {2, FIO_ps_auto};
  84. #define DISPLAY(...) fprintf(stderr, __VA_ARGS__)
  85. #define DISPLAYOUT(...) fprintf(stdout, __VA_ARGS__)
  86. #define DISPLAYLEVEL(l, ...) { if (g_display_prefs.displayLevel>=l) { DISPLAY(__VA_ARGS__); } }
  87. static const U64 g_refreshRate = SEC_TO_MICRO / 6;
  88. static UTIL_time_t g_displayClock = UTIL_TIME_INITIALIZER;
  89. #define READY_FOR_UPDATE() ((g_display_prefs.progressSetting != FIO_ps_never) && UTIL_clockSpanMicro(g_displayClock) > g_refreshRate)
  90. #define DELAY_NEXT_UPDATE() { g_displayClock = UTIL_getTime(); }
  91. #define DISPLAYUPDATE(l, ...) { \
  92. if (g_display_prefs.displayLevel>=l && (g_display_prefs.progressSetting != FIO_ps_never)) { \
  93. if (READY_FOR_UPDATE() || (g_display_prefs.displayLevel>=4)) { \
  94. DELAY_NEXT_UPDATE(); \
  95. DISPLAY(__VA_ARGS__); \
  96. if (g_display_prefs.displayLevel>=4) fflush(stderr); \
  97. } } }
  98. #undef MIN /* in case it would be already defined */
  99. #define MIN(a,b) ((a) < (b) ? (a) : (b))
  100. #define EXM_THROW(error, ...) \
  101. { \
  102. DISPLAYLEVEL(1, "zstd: "); \
  103. DISPLAYLEVEL(5, "Error defined at %s, line %i : \n", __FILE__, __LINE__); \
  104. DISPLAYLEVEL(1, "error %i : ", error); \
  105. DISPLAYLEVEL(1, __VA_ARGS__); \
  106. DISPLAYLEVEL(1, " \n"); \
  107. exit(error); \
  108. }
  109. #define CHECK_V(v, f) \
  110. v = f; \
  111. if (ZSTD_isError(v)) { \
  112. DISPLAYLEVEL(5, "%s \n", #f); \
  113. EXM_THROW(11, "%s", ZSTD_getErrorName(v)); \
  114. }
  115. #define CHECK(f) { size_t err; CHECK_V(err, f); }
  116. /*-************************************
  117. * Signal (Ctrl-C trapping)
  118. **************************************/
  119. static const char* g_artefact = NULL;
  120. static void INThandler(int sig)
  121. {
  122. assert(sig==SIGINT); (void)sig;
  123. #if !defined(_MSC_VER)
  124. signal(sig, SIG_IGN); /* this invocation generates a buggy warning in Visual Studio */
  125. #endif
  126. if (g_artefact) {
  127. assert(UTIL_isRegularFile(g_artefact));
  128. remove(g_artefact);
  129. }
  130. DISPLAY("\n");
  131. exit(2);
  132. }
  133. static void addHandler(char const* dstFileName)
  134. {
  135. if (UTIL_isRegularFile(dstFileName)) {
  136. g_artefact = dstFileName;
  137. signal(SIGINT, INThandler);
  138. } else {
  139. g_artefact = NULL;
  140. }
  141. }
  142. /* Idempotent */
  143. static void clearHandler(void)
  144. {
  145. if (g_artefact) signal(SIGINT, SIG_DFL);
  146. g_artefact = NULL;
  147. }
  148. /*-*********************************************************
  149. * Termination signal trapping (Print debug stack trace)
  150. ***********************************************************/
  151. #if defined(__has_feature) && !defined(BACKTRACE_ENABLE) /* Clang compiler */
  152. # if (__has_feature(address_sanitizer))
  153. # define BACKTRACE_ENABLE 0
  154. # endif /* __has_feature(address_sanitizer) */
  155. #elif defined(__SANITIZE_ADDRESS__) && !defined(BACKTRACE_ENABLE) /* GCC compiler */
  156. # define BACKTRACE_ENABLE 0
  157. #endif
  158. #if !defined(BACKTRACE_ENABLE)
  159. /* automatic detector : backtrace enabled by default on linux+glibc and osx */
  160. # if (defined(__linux__) && (defined(__GLIBC__) && !defined(__UCLIBC__))) \
  161. || (defined(__APPLE__) && defined(__MACH__))
  162. # define BACKTRACE_ENABLE 1
  163. # else
  164. # define BACKTRACE_ENABLE 0
  165. # endif
  166. #endif
  167. /* note : after this point, BACKTRACE_ENABLE is necessarily defined */
  168. #if BACKTRACE_ENABLE
  169. #include <execinfo.h> /* backtrace, backtrace_symbols */
  170. #define MAX_STACK_FRAMES 50
  171. static void ABRThandler(int sig) {
  172. const char* name;
  173. void* addrlist[MAX_STACK_FRAMES];
  174. char** symbollist;
  175. int addrlen, i;
  176. switch (sig) {
  177. case SIGABRT: name = "SIGABRT"; break;
  178. case SIGFPE: name = "SIGFPE"; break;
  179. case SIGILL: name = "SIGILL"; break;
  180. case SIGINT: name = "SIGINT"; break;
  181. case SIGSEGV: name = "SIGSEGV"; break;
  182. default: name = "UNKNOWN";
  183. }
  184. DISPLAY("Caught %s signal, printing stack:\n", name);
  185. /* Retrieve current stack addresses. */
  186. addrlen = backtrace(addrlist, MAX_STACK_FRAMES);
  187. if (addrlen == 0) {
  188. DISPLAY("\n");
  189. return;
  190. }
  191. /* Create readable strings to each frame. */
  192. symbollist = backtrace_symbols(addrlist, addrlen);
  193. /* Print the stack trace, excluding calls handling the signal. */
  194. for (i = ZSTD_START_SYMBOLLIST_FRAME; i < addrlen; i++) {
  195. DISPLAY("%s\n", symbollist[i]);
  196. }
  197. free(symbollist);
  198. /* Reset and raise the signal so default handler runs. */
  199. signal(sig, SIG_DFL);
  200. raise(sig);
  201. }
  202. #endif
  203. void FIO_addAbortHandler()
  204. {
  205. #if BACKTRACE_ENABLE
  206. signal(SIGABRT, ABRThandler);
  207. signal(SIGFPE, ABRThandler);
  208. signal(SIGILL, ABRThandler);
  209. signal(SIGSEGV, ABRThandler);
  210. signal(SIGBUS, ABRThandler);
  211. #endif
  212. }
  213. /*-************************************************************
  214. * Avoid fseek()'s 2GiB barrier with MSVC, macOS, *BSD, MinGW
  215. ***************************************************************/
  216. #if defined(_MSC_VER) && _MSC_VER >= 1400
  217. # define LONG_SEEK _fseeki64
  218. # define LONG_TELL _ftelli64
  219. #elif !defined(__64BIT__) && (PLATFORM_POSIX_VERSION >= 200112L) /* No point defining Large file for 64 bit */
  220. # define LONG_SEEK fseeko
  221. # define LONG_TELL ftello
  222. #elif defined(__MINGW32__) && !defined(__STRICT_ANSI__) && !defined(__NO_MINGW_LFS) && defined(__MSVCRT__)
  223. # define LONG_SEEK fseeko64
  224. # define LONG_TELL ftello64
  225. #elif defined(_WIN32) && !defined(__DJGPP__)
  226. # include <windows.h>
  227. static int LONG_SEEK(FILE* file, __int64 offset, int origin) {
  228. LARGE_INTEGER off;
  229. DWORD method;
  230. off.QuadPart = offset;
  231. if (origin == SEEK_END)
  232. method = FILE_END;
  233. else if (origin == SEEK_CUR)
  234. method = FILE_CURRENT;
  235. else
  236. method = FILE_BEGIN;
  237. if (SetFilePointerEx((HANDLE) _get_osfhandle(_fileno(file)), off, NULL, method))
  238. return 0;
  239. else
  240. return -1;
  241. }
  242. static __int64 LONG_TELL(FILE* file) {
  243. LARGE_INTEGER off, newOff;
  244. off.QuadPart = 0;
  245. newOff.QuadPart = 0;
  246. SetFilePointerEx((HANDLE) _get_osfhandle(_fileno(file)), off, &newOff, FILE_CURRENT);
  247. return newOff.QuadPart;
  248. }
  249. #else
  250. # define LONG_SEEK fseek
  251. # define LONG_TELL ftell
  252. #endif
  253. /*-*************************************
  254. * Parameters: FIO_prefs_t
  255. ***************************************/
  256. /* typedef'd to FIO_prefs_t within fileio.h */
  257. struct FIO_prefs_s {
  258. /* Algorithm preferences */
  259. FIO_compressionType_t compressionType;
  260. U32 sparseFileSupport; /* 0: no sparse allowed; 1: auto (file yes, stdout no); 2: force sparse */
  261. int dictIDFlag;
  262. int checksumFlag;
  263. int blockSize;
  264. int overlapLog;
  265. U32 adaptiveMode;
  266. U32 useRowMatchFinder;
  267. int rsyncable;
  268. int minAdaptLevel;
  269. int maxAdaptLevel;
  270. int ldmFlag;
  271. int ldmHashLog;
  272. int ldmMinMatch;
  273. int ldmBucketSizeLog;
  274. int ldmHashRateLog;
  275. size_t streamSrcSize;
  276. size_t targetCBlockSize;
  277. int srcSizeHint;
  278. int testMode;
  279. ZSTD_paramSwitch_e literalCompressionMode;
  280. /* IO preferences */
  281. U32 removeSrcFile;
  282. U32 overwrite;
  283. /* Computation resources preferences */
  284. unsigned memLimit;
  285. int nbWorkers;
  286. int excludeCompressedFiles;
  287. int patchFromMode;
  288. int contentSize;
  289. int allowBlockDevices;
  290. };
  291. /*-*************************************
  292. * Parameters: FIO_ctx_t
  293. ***************************************/
  294. /* typedef'd to FIO_ctx_t within fileio.h */
  295. struct FIO_ctx_s {
  296. /* file i/o info */
  297. int nbFilesTotal;
  298. int hasStdinInput;
  299. int hasStdoutOutput;
  300. /* file i/o state */
  301. int currFileIdx;
  302. int nbFilesProcessed;
  303. size_t totalBytesInput;
  304. size_t totalBytesOutput;
  305. };
  306. /*-*************************************
  307. * Parameters: Initialization
  308. ***************************************/
  309. #define FIO_OVERLAP_LOG_NOTSET 9999
  310. #define FIO_LDM_PARAM_NOTSET 9999
  311. FIO_prefs_t* FIO_createPreferences(void)
  312. {
  313. FIO_prefs_t* const ret = (FIO_prefs_t*)malloc(sizeof(FIO_prefs_t));
  314. if (!ret) EXM_THROW(21, "Allocation error : not enough memory");
  315. ret->compressionType = FIO_zstdCompression;
  316. ret->overwrite = 0;
  317. ret->sparseFileSupport = ZSTD_SPARSE_DEFAULT;
  318. ret->dictIDFlag = 1;
  319. ret->checksumFlag = 1;
  320. ret->removeSrcFile = 0;
  321. ret->memLimit = 0;
  322. ret->nbWorkers = 1;
  323. ret->blockSize = 0;
  324. ret->overlapLog = FIO_OVERLAP_LOG_NOTSET;
  325. ret->adaptiveMode = 0;
  326. ret->rsyncable = 0;
  327. ret->minAdaptLevel = -50; /* initializing this value requires a constant, so ZSTD_minCLevel() doesn't work */
  328. ret->maxAdaptLevel = 22; /* initializing this value requires a constant, so ZSTD_maxCLevel() doesn't work */
  329. ret->ldmFlag = 0;
  330. ret->ldmHashLog = 0;
  331. ret->ldmMinMatch = 0;
  332. ret->ldmBucketSizeLog = FIO_LDM_PARAM_NOTSET;
  333. ret->ldmHashRateLog = FIO_LDM_PARAM_NOTSET;
  334. ret->streamSrcSize = 0;
  335. ret->targetCBlockSize = 0;
  336. ret->srcSizeHint = 0;
  337. ret->testMode = 0;
  338. ret->literalCompressionMode = ZSTD_ps_auto;
  339. ret->excludeCompressedFiles = 0;
  340. ret->allowBlockDevices = 0;
  341. return ret;
  342. }
  343. FIO_ctx_t* FIO_createContext(void)
  344. {
  345. FIO_ctx_t* const ret = (FIO_ctx_t*)malloc(sizeof(FIO_ctx_t));
  346. if (!ret) EXM_THROW(21, "Allocation error : not enough memory");
  347. ret->currFileIdx = 0;
  348. ret->hasStdinInput = 0;
  349. ret->hasStdoutOutput = 0;
  350. ret->nbFilesTotal = 1;
  351. ret->nbFilesProcessed = 0;
  352. ret->totalBytesInput = 0;
  353. ret->totalBytesOutput = 0;
  354. return ret;
  355. }
  356. void FIO_freePreferences(FIO_prefs_t* const prefs)
  357. {
  358. free(prefs);
  359. }
  360. void FIO_freeContext(FIO_ctx_t* const fCtx)
  361. {
  362. free(fCtx);
  363. }
  364. /*-*************************************
  365. * Parameters: Display Options
  366. ***************************************/
  367. void FIO_setNotificationLevel(int level) { g_display_prefs.displayLevel=level; }
  368. void FIO_setProgressSetting(FIO_progressSetting_e setting) { g_display_prefs.progressSetting = setting; }
  369. /*-*************************************
  370. * Parameters: Setters
  371. ***************************************/
  372. /* FIO_prefs_t functions */
  373. void FIO_setCompressionType(FIO_prefs_t* const prefs, FIO_compressionType_t compressionType) { prefs->compressionType = compressionType; }
  374. void FIO_overwriteMode(FIO_prefs_t* const prefs) { prefs->overwrite = 1; }
  375. void FIO_setSparseWrite(FIO_prefs_t* const prefs, unsigned sparse) { prefs->sparseFileSupport = sparse; }
  376. void FIO_setDictIDFlag(FIO_prefs_t* const prefs, int dictIDFlag) { prefs->dictIDFlag = dictIDFlag; }
  377. void FIO_setChecksumFlag(FIO_prefs_t* const prefs, int checksumFlag) { prefs->checksumFlag = checksumFlag; }
  378. void FIO_setRemoveSrcFile(FIO_prefs_t* const prefs, unsigned flag) { prefs->removeSrcFile = (flag>0); }
  379. void FIO_setMemLimit(FIO_prefs_t* const prefs, unsigned memLimit) { prefs->memLimit = memLimit; }
  380. void FIO_setNbWorkers(FIO_prefs_t* const prefs, int nbWorkers) {
  381. #ifndef ZSTD_MULTITHREAD
  382. if (nbWorkers > 0) DISPLAYLEVEL(2, "Note : multi-threading is disabled \n");
  383. #endif
  384. prefs->nbWorkers = nbWorkers;
  385. }
  386. void FIO_setExcludeCompressedFile(FIO_prefs_t* const prefs, int excludeCompressedFiles) { prefs->excludeCompressedFiles = excludeCompressedFiles; }
  387. void FIO_setAllowBlockDevices(FIO_prefs_t* const prefs, int allowBlockDevices) { prefs->allowBlockDevices = allowBlockDevices; }
  388. void FIO_setBlockSize(FIO_prefs_t* const prefs, int blockSize) {
  389. if (blockSize && prefs->nbWorkers==0)
  390. DISPLAYLEVEL(2, "Setting block size is useless in single-thread mode \n");
  391. prefs->blockSize = blockSize;
  392. }
  393. void FIO_setOverlapLog(FIO_prefs_t* const prefs, int overlapLog){
  394. if (overlapLog && prefs->nbWorkers==0)
  395. DISPLAYLEVEL(2, "Setting overlapLog is useless in single-thread mode \n");
  396. prefs->overlapLog = overlapLog;
  397. }
  398. void FIO_setAdaptiveMode(FIO_prefs_t* const prefs, unsigned adapt) {
  399. if ((adapt>0) && (prefs->nbWorkers==0))
  400. EXM_THROW(1, "Adaptive mode is not compatible with single thread mode \n");
  401. prefs->adaptiveMode = adapt;
  402. }
  403. void FIO_setUseRowMatchFinder(FIO_prefs_t* const prefs, int useRowMatchFinder) {
  404. prefs->useRowMatchFinder = useRowMatchFinder;
  405. }
  406. void FIO_setRsyncable(FIO_prefs_t* const prefs, int rsyncable) {
  407. if ((rsyncable>0) && (prefs->nbWorkers==0))
  408. EXM_THROW(1, "Rsyncable mode is not compatible with single thread mode \n");
  409. prefs->rsyncable = rsyncable;
  410. }
  411. void FIO_setStreamSrcSize(FIO_prefs_t* const prefs, size_t streamSrcSize) {
  412. prefs->streamSrcSize = streamSrcSize;
  413. }
  414. void FIO_setTargetCBlockSize(FIO_prefs_t* const prefs, size_t targetCBlockSize) {
  415. prefs->targetCBlockSize = targetCBlockSize;
  416. }
  417. void FIO_setSrcSizeHint(FIO_prefs_t* const prefs, size_t srcSizeHint) {
  418. prefs->srcSizeHint = (int)MIN((size_t)INT_MAX, srcSizeHint);
  419. }
  420. void FIO_setTestMode(FIO_prefs_t* const prefs, int testMode) {
  421. prefs->testMode = (testMode!=0);
  422. }
  423. void FIO_setLiteralCompressionMode(
  424. FIO_prefs_t* const prefs,
  425. ZSTD_paramSwitch_e mode) {
  426. prefs->literalCompressionMode = mode;
  427. }
  428. void FIO_setAdaptMin(FIO_prefs_t* const prefs, int minCLevel)
  429. {
  430. #ifndef ZSTD_NOCOMPRESS
  431. assert(minCLevel >= ZSTD_minCLevel());
  432. #endif
  433. prefs->minAdaptLevel = minCLevel;
  434. }
  435. void FIO_setAdaptMax(FIO_prefs_t* const prefs, int maxCLevel)
  436. {
  437. prefs->maxAdaptLevel = maxCLevel;
  438. }
  439. void FIO_setLdmFlag(FIO_prefs_t* const prefs, unsigned ldmFlag) {
  440. prefs->ldmFlag = (ldmFlag>0);
  441. }
  442. void FIO_setLdmHashLog(FIO_prefs_t* const prefs, int ldmHashLog) {
  443. prefs->ldmHashLog = ldmHashLog;
  444. }
  445. void FIO_setLdmMinMatch(FIO_prefs_t* const prefs, int ldmMinMatch) {
  446. prefs->ldmMinMatch = ldmMinMatch;
  447. }
  448. void FIO_setLdmBucketSizeLog(FIO_prefs_t* const prefs, int ldmBucketSizeLog) {
  449. prefs->ldmBucketSizeLog = ldmBucketSizeLog;
  450. }
  451. void FIO_setLdmHashRateLog(FIO_prefs_t* const prefs, int ldmHashRateLog) {
  452. prefs->ldmHashRateLog = ldmHashRateLog;
  453. }
  454. void FIO_setPatchFromMode(FIO_prefs_t* const prefs, int value)
  455. {
  456. prefs->patchFromMode = value != 0;
  457. }
  458. void FIO_setContentSize(FIO_prefs_t* const prefs, int value)
  459. {
  460. prefs->contentSize = value != 0;
  461. }
  462. /* FIO_ctx_t functions */
  463. void FIO_setHasStdoutOutput(FIO_ctx_t* const fCtx, int value) {
  464. fCtx->hasStdoutOutput = value;
  465. }
  466. void FIO_setNbFilesTotal(FIO_ctx_t* const fCtx, int value)
  467. {
  468. fCtx->nbFilesTotal = value;
  469. }
  470. void FIO_determineHasStdinInput(FIO_ctx_t* const fCtx, const FileNamesTable* const filenames) {
  471. size_t i = 0;
  472. for ( ; i < filenames->tableSize; ++i) {
  473. if (!strcmp(stdinmark, filenames->fileNames[i])) {
  474. fCtx->hasStdinInput = 1;
  475. return;
  476. }
  477. }
  478. }
  479. /*-*************************************
  480. * Functions
  481. ***************************************/
  482. /** FIO_removeFile() :
  483. * @result : Unlink `fileName`, even if it's read-only */
  484. static int FIO_removeFile(const char* path)
  485. {
  486. stat_t statbuf;
  487. if (!UTIL_stat(path, &statbuf)) {
  488. DISPLAYLEVEL(2, "zstd: Failed to stat %s while trying to remove it\n", path);
  489. return 0;
  490. }
  491. if (!UTIL_isRegularFileStat(&statbuf)) {
  492. DISPLAYLEVEL(2, "zstd: Refusing to remove non-regular file %s\n", path);
  493. return 0;
  494. }
  495. #if defined(_WIN32) || defined(WIN32)
  496. /* windows doesn't allow remove read-only files,
  497. * so try to make it writable first */
  498. if (!(statbuf.st_mode & _S_IWRITE)) {
  499. UTIL_chmod(path, &statbuf, _S_IWRITE);
  500. }
  501. #endif
  502. return remove(path);
  503. }
  504. /** FIO_openSrcFile() :
  505. * condition : `srcFileName` must be non-NULL. `prefs` may be NULL.
  506. * @result : FILE* to `srcFileName`, or NULL if it fails */
  507. static FILE* FIO_openSrcFile(const FIO_prefs_t* const prefs, const char* srcFileName)
  508. {
  509. stat_t statbuf;
  510. int allowBlockDevices = prefs != NULL ? prefs->allowBlockDevices : 0;
  511. assert(srcFileName != NULL);
  512. if (!strcmp (srcFileName, stdinmark)) {
  513. DISPLAYLEVEL(4,"Using stdin for input \n");
  514. SET_BINARY_MODE(stdin);
  515. return stdin;
  516. }
  517. if (!UTIL_stat(srcFileName, &statbuf)) {
  518. DISPLAYLEVEL(1, "zstd: can't stat %s : %s -- ignored \n",
  519. srcFileName, strerror(errno));
  520. return NULL;
  521. }
  522. if (!UTIL_isRegularFileStat(&statbuf)
  523. && !UTIL_isFIFOStat(&statbuf)
  524. && !(allowBlockDevices && UTIL_isBlockDevStat(&statbuf))
  525. ) {
  526. DISPLAYLEVEL(1, "zstd: %s is not a regular file -- ignored \n",
  527. srcFileName);
  528. return NULL;
  529. }
  530. { FILE* const f = fopen(srcFileName, "rb");
  531. if (f == NULL)
  532. DISPLAYLEVEL(1, "zstd: %s: %s \n", srcFileName, strerror(errno));
  533. return f;
  534. }
  535. }
  536. /** FIO_openDstFile() :
  537. * condition : `dstFileName` must be non-NULL.
  538. * @result : FILE* to `dstFileName`, or NULL if it fails */
  539. static FILE*
  540. FIO_openDstFile(FIO_ctx_t* fCtx, FIO_prefs_t* const prefs,
  541. const char* srcFileName, const char* dstFileName,
  542. const int mode)
  543. {
  544. if (prefs->testMode) return NULL; /* do not open file in test mode */
  545. assert(dstFileName != NULL);
  546. if (!strcmp (dstFileName, stdoutmark)) {
  547. DISPLAYLEVEL(4,"Using stdout for output \n");
  548. SET_BINARY_MODE(stdout);
  549. if (prefs->sparseFileSupport == 1) {
  550. prefs->sparseFileSupport = 0;
  551. DISPLAYLEVEL(4, "Sparse File Support is automatically disabled on stdout ; try --sparse \n");
  552. }
  553. return stdout;
  554. }
  555. /* ensure dst is not the same as src */
  556. if (srcFileName != NULL && UTIL_isSameFile(srcFileName, dstFileName)) {
  557. DISPLAYLEVEL(1, "zstd: Refusing to open an output file which will overwrite the input file \n");
  558. return NULL;
  559. }
  560. if (prefs->sparseFileSupport == 1) {
  561. prefs->sparseFileSupport = ZSTD_SPARSE_DEFAULT;
  562. }
  563. if (UTIL_isRegularFile(dstFileName)) {
  564. /* Check if destination file already exists */
  565. #if !defined(_WIN32)
  566. /* this test does not work on Windows :
  567. * `NUL` and `nul` are detected as regular files */
  568. if (!strcmp(dstFileName, nulmark)) {
  569. EXM_THROW(40, "%s is unexpectedly categorized as a regular file",
  570. dstFileName);
  571. }
  572. #endif
  573. if (!prefs->overwrite) {
  574. if (g_display_prefs.displayLevel <= 1) {
  575. /* No interaction possible */
  576. DISPLAY("zstd: %s already exists; not overwritten \n",
  577. dstFileName);
  578. return NULL;
  579. }
  580. DISPLAY("zstd: %s already exists; ", dstFileName);
  581. if (UTIL_requireUserConfirmation("overwrite (y/n) ? ", "Not overwritten \n", "yY", fCtx->hasStdinInput))
  582. return NULL;
  583. }
  584. /* need to unlink */
  585. FIO_removeFile(dstFileName);
  586. }
  587. {
  588. #if defined(_WIN32)
  589. /* Windows requires opening the file as a "binary" file to avoid
  590. * mangling. This macro doesn't exist on unix. */
  591. const int openflags = O_WRONLY|O_CREAT|O_TRUNC|O_BINARY;
  592. const int fd = _open(dstFileName, openflags, mode);
  593. FILE* f = NULL;
  594. if (fd != -1) {
  595. f = _fdopen(fd, "wb");
  596. }
  597. #else
  598. const int openflags = O_WRONLY|O_CREAT|O_TRUNC;
  599. const int fd = open(dstFileName, openflags, mode);
  600. FILE* f = NULL;
  601. if (fd != -1) {
  602. f = fdopen(fd, "wb");
  603. }
  604. #endif
  605. if (f == NULL) {
  606. DISPLAYLEVEL(1, "zstd: %s: %s\n", dstFileName, strerror(errno));
  607. }
  608. return f;
  609. }
  610. }
  611. /*! FIO_createDictBuffer() :
  612. * creates a buffer, pointed by `*bufferPtr`,
  613. * loads `filename` content into it, up to DICTSIZE_MAX bytes.
  614. * @return : loaded size
  615. * if fileName==NULL, returns 0 and a NULL pointer
  616. */
  617. static size_t FIO_createDictBuffer(void** bufferPtr, const char* fileName, FIO_prefs_t* const prefs)
  618. {
  619. FILE* fileHandle;
  620. U64 fileSize;
  621. stat_t statbuf;
  622. assert(bufferPtr != NULL);
  623. *bufferPtr = NULL;
  624. if (fileName == NULL) return 0;
  625. DISPLAYLEVEL(4,"Loading %s as dictionary \n", fileName);
  626. if (!UTIL_stat(fileName, &statbuf)) {
  627. EXM_THROW(31, "Stat failed on dictionary file %s: %s", fileName, strerror(errno));
  628. }
  629. if (!UTIL_isRegularFileStat(&statbuf)) {
  630. EXM_THROW(32, "Dictionary %s must be a regular file.", fileName);
  631. }
  632. fileHandle = fopen(fileName, "rb");
  633. if (fileHandle == NULL) {
  634. EXM_THROW(33, "Couldn't open dictionary %s: %s", fileName, strerror(errno));
  635. }
  636. fileSize = UTIL_getFileSizeStat(&statbuf);
  637. {
  638. size_t const dictSizeMax = prefs->patchFromMode ? prefs->memLimit : DICTSIZE_MAX;
  639. if (fileSize > dictSizeMax) {
  640. EXM_THROW(34, "Dictionary file %s is too large (> %u bytes)",
  641. fileName, (unsigned)dictSizeMax); /* avoid extreme cases */
  642. }
  643. }
  644. *bufferPtr = malloc((size_t)fileSize);
  645. if (*bufferPtr==NULL) EXM_THROW(34, "%s", strerror(errno));
  646. { size_t const readSize = fread(*bufferPtr, 1, (size_t)fileSize, fileHandle);
  647. if (readSize != fileSize) {
  648. EXM_THROW(35, "Error reading dictionary file %s : %s",
  649. fileName, strerror(errno));
  650. }
  651. }
  652. fclose(fileHandle);
  653. return (size_t)fileSize;
  654. }
  655. /* FIO_checkFilenameCollisions() :
  656. * Checks for and warns if there are any files that would have the same output path
  657. */
  658. int FIO_checkFilenameCollisions(const char** filenameTable, unsigned nbFiles) {
  659. const char **filenameTableSorted, *prevElem, *filename;
  660. unsigned u;
  661. filenameTableSorted = (const char**) malloc(sizeof(char*) * nbFiles);
  662. if (!filenameTableSorted) {
  663. DISPLAY("Unable to malloc new str array, not checking for name collisions\n");
  664. return 1;
  665. }
  666. for (u = 0; u < nbFiles; ++u) {
  667. filename = strrchr(filenameTable[u], PATH_SEP);
  668. if (filename == NULL) {
  669. filenameTableSorted[u] = filenameTable[u];
  670. } else {
  671. filenameTableSorted[u] = filename+1;
  672. }
  673. }
  674. qsort((void*)filenameTableSorted, nbFiles, sizeof(char*), UTIL_compareStr);
  675. prevElem = filenameTableSorted[0];
  676. for (u = 1; u < nbFiles; ++u) {
  677. if (strcmp(prevElem, filenameTableSorted[u]) == 0) {
  678. DISPLAY("WARNING: Two files have same filename: %s\n", prevElem);
  679. }
  680. prevElem = filenameTableSorted[u];
  681. }
  682. free((void*)filenameTableSorted);
  683. return 0;
  684. }
  685. static const char*
  686. extractFilename(const char* path, char separator)
  687. {
  688. const char* search = strrchr(path, separator);
  689. if (search == NULL) return path;
  690. return search+1;
  691. }
  692. /* FIO_createFilename_fromOutDir() :
  693. * Takes a source file name and specified output directory, and
  694. * allocates memory for and returns a pointer to final path.
  695. * This function never returns an error (it may abort() in case of pb)
  696. */
  697. static char*
  698. FIO_createFilename_fromOutDir(const char* path, const char* outDirName, const size_t suffixLen)
  699. {
  700. const char* filenameStart;
  701. char separator;
  702. char* result;
  703. #if defined(_MSC_VER) || defined(__MINGW32__) || defined (__MSVCRT__) /* windows support */
  704. separator = '\\';
  705. #else
  706. separator = '/';
  707. #endif
  708. filenameStart = extractFilename(path, separator);
  709. #if defined(_MSC_VER) || defined(__MINGW32__) || defined (__MSVCRT__) /* windows support */
  710. filenameStart = extractFilename(filenameStart, '/'); /* sometimes, '/' separator is also used on Windows (mingw+msys2) */
  711. #endif
  712. result = (char*) calloc(1, strlen(outDirName) + 1 + strlen(filenameStart) + suffixLen + 1);
  713. if (!result) {
  714. EXM_THROW(30, "zstd: FIO_createFilename_fromOutDir: %s", strerror(errno));
  715. }
  716. memcpy(result, outDirName, strlen(outDirName));
  717. if (outDirName[strlen(outDirName)-1] == separator) {
  718. memcpy(result + strlen(outDirName), filenameStart, strlen(filenameStart));
  719. } else {
  720. memcpy(result + strlen(outDirName), &separator, 1);
  721. memcpy(result + strlen(outDirName) + 1, filenameStart, strlen(filenameStart));
  722. }
  723. return result;
  724. }
  725. /* FIO_highbit64() :
  726. * gives position of highest bit.
  727. * note : only works for v > 0 !
  728. */
  729. static unsigned FIO_highbit64(unsigned long long v)
  730. {
  731. unsigned count = 0;
  732. assert(v != 0);
  733. v >>= 1;
  734. while (v) { v >>= 1; count++; }
  735. return count;
  736. }
  737. static void FIO_adjustMemLimitForPatchFromMode(FIO_prefs_t* const prefs,
  738. unsigned long long const dictSize,
  739. unsigned long long const maxSrcFileSize)
  740. {
  741. unsigned long long maxSize = MAX(prefs->memLimit, MAX(dictSize, maxSrcFileSize));
  742. unsigned const maxWindowSize = (1U << ZSTD_WINDOWLOG_MAX);
  743. if (maxSize == UTIL_FILESIZE_UNKNOWN)
  744. EXM_THROW(42, "Using --patch-from with stdin requires --stream-size");
  745. assert(maxSize != UTIL_FILESIZE_UNKNOWN);
  746. if (maxSize > maxWindowSize)
  747. EXM_THROW(42, "Can't handle files larger than %u GB\n", maxWindowSize/(1 GB));
  748. FIO_setMemLimit(prefs, (unsigned)maxSize);
  749. }
  750. /* FIO_removeMultiFilesWarning() :
  751. * Returns 1 if the console should abort, 0 if console should proceed.
  752. * This function handles logic when processing multiple files with -o, displaying the appropriate warnings/prompts.
  753. *
  754. * If -f is specified, or there is just 1 file, zstd will always proceed as usual.
  755. * If --rm is specified, there will be a prompt asking for user confirmation.
  756. * If -f is specified with --rm, zstd will proceed as usual
  757. * If -q is specified with --rm, zstd will abort pre-emptively
  758. * If neither flag is specified, zstd will prompt the user for confirmation to proceed.
  759. * If --rm is not specified, then zstd will print a warning to the user (which can be silenced with -q).
  760. * However, if the output is stdout, we will always abort rather than displaying the warning prompt.
  761. */
  762. static int FIO_removeMultiFilesWarning(FIO_ctx_t* const fCtx, const FIO_prefs_t* const prefs, const char* outFileName, int displayLevelCutoff)
  763. {
  764. int error = 0;
  765. if (fCtx->nbFilesTotal > 1 && !prefs->overwrite) {
  766. if (g_display_prefs.displayLevel <= displayLevelCutoff) {
  767. if (prefs->removeSrcFile) {
  768. DISPLAYLEVEL(1, "zstd: Aborting... not deleting files and processing into dst: %s\n", outFileName);
  769. error = 1;
  770. }
  771. } else {
  772. if (!strcmp(outFileName, stdoutmark)) {
  773. DISPLAYLEVEL(2, "zstd: WARNING: all input files will be processed and concatenated into stdout. \n");
  774. } else {
  775. DISPLAYLEVEL(2, "zstd: WARNING: all input files will be processed and concatenated into a single output file: %s \n", outFileName);
  776. }
  777. DISPLAYLEVEL(2, "The concatenated output CANNOT regenerate the original directory tree. \n")
  778. if (prefs->removeSrcFile) {
  779. if (fCtx->hasStdoutOutput) {
  780. DISPLAYLEVEL(1, "Aborting. Use -f if you really want to delete the files and output to stdout\n");
  781. error = 1;
  782. } else {
  783. error = g_display_prefs.displayLevel > displayLevelCutoff && UTIL_requireUserConfirmation("This is a destructive operation. Proceed? (y/n): ", "Aborting...", "yY", fCtx->hasStdinInput);
  784. }
  785. }
  786. }
  787. }
  788. return error;
  789. }
  790. #ifndef ZSTD_NOCOMPRESS
  791. /* **********************************************************************
  792. * Compression
  793. ************************************************************************/
  794. typedef struct {
  795. FILE* srcFile;
  796. FILE* dstFile;
  797. void* srcBuffer;
  798. size_t srcBufferSize;
  799. void* dstBuffer;
  800. size_t dstBufferSize;
  801. void* dictBuffer;
  802. size_t dictBufferSize;
  803. const char* dictFileName;
  804. ZSTD_CStream* cctx;
  805. } cRess_t;
  806. /** ZSTD_cycleLog() :
  807. * condition for correct operation : hashLog > 1 */
  808. static U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat)
  809. {
  810. U32 const btScale = ((U32)strat >= (U32)ZSTD_btlazy2);
  811. assert(hashLog > 1);
  812. return hashLog - btScale;
  813. }
  814. static void FIO_adjustParamsForPatchFromMode(FIO_prefs_t* const prefs,
  815. ZSTD_compressionParameters* comprParams,
  816. unsigned long long const dictSize,
  817. unsigned long long const maxSrcFileSize,
  818. int cLevel)
  819. {
  820. unsigned const fileWindowLog = FIO_highbit64(maxSrcFileSize) + 1;
  821. ZSTD_compressionParameters const cParams = ZSTD_getCParams(cLevel, (size_t)maxSrcFileSize, (size_t)dictSize);
  822. FIO_adjustMemLimitForPatchFromMode(prefs, dictSize, maxSrcFileSize);
  823. if (fileWindowLog > ZSTD_WINDOWLOG_MAX)
  824. DISPLAYLEVEL(1, "Max window log exceeded by file (compression ratio will suffer)\n");
  825. comprParams->windowLog = MAX(ZSTD_WINDOWLOG_MIN, MIN(ZSTD_WINDOWLOG_MAX, fileWindowLog));
  826. if (fileWindowLog > ZSTD_cycleLog(cParams.chainLog, cParams.strategy)) {
  827. if (!prefs->ldmFlag)
  828. DISPLAYLEVEL(1, "long mode automatically triggered\n");
  829. FIO_setLdmFlag(prefs, 1);
  830. }
  831. if (cParams.strategy >= ZSTD_btopt) {
  832. DISPLAYLEVEL(1, "[Optimal parser notes] Consider the following to improve patch size at the cost of speed:\n");
  833. DISPLAYLEVEL(1, "- Use --single-thread mode in the zstd cli\n");
  834. DISPLAYLEVEL(1, "- Set a larger targetLength (eg. --zstd=targetLength=4096)\n");
  835. DISPLAYLEVEL(1, "- Set a larger chainLog (eg. --zstd=chainLog=%u)\n", ZSTD_CHAINLOG_MAX);
  836. DISPLAYLEVEL(1, "Also consider playing around with searchLog and hashLog\n");
  837. }
  838. }
  839. static cRess_t FIO_createCResources(FIO_prefs_t* const prefs,
  840. const char* dictFileName, unsigned long long const maxSrcFileSize,
  841. int cLevel, ZSTD_compressionParameters comprParams) {
  842. cRess_t ress;
  843. memset(&ress, 0, sizeof(ress));
  844. DISPLAYLEVEL(6, "FIO_createCResources \n");
  845. ress.cctx = ZSTD_createCCtx();
  846. if (ress.cctx == NULL)
  847. EXM_THROW(30, "allocation error (%s): can't create ZSTD_CCtx",
  848. strerror(errno));
  849. ress.srcBufferSize = ZSTD_CStreamInSize();
  850. ress.srcBuffer = malloc(ress.srcBufferSize);
  851. ress.dstBufferSize = ZSTD_CStreamOutSize();
  852. /* need to update memLimit before calling createDictBuffer
  853. * because of memLimit check inside it */
  854. if (prefs->patchFromMode) {
  855. unsigned long long const ssSize = (unsigned long long)prefs->streamSrcSize;
  856. FIO_adjustParamsForPatchFromMode(prefs, &comprParams, UTIL_getFileSize(dictFileName), ssSize > 0 ? ssSize : maxSrcFileSize, cLevel);
  857. }
  858. ress.dstBuffer = malloc(ress.dstBufferSize);
  859. ress.dictBufferSize = FIO_createDictBuffer(&ress.dictBuffer, dictFileName, prefs); /* works with dictFileName==NULL */
  860. if (!ress.srcBuffer || !ress.dstBuffer)
  861. EXM_THROW(31, "allocation error : not enough memory");
  862. /* Advanced parameters, including dictionary */
  863. if (dictFileName && (ress.dictBuffer==NULL))
  864. EXM_THROW(32, "allocation error : can't create dictBuffer");
  865. ress.dictFileName = dictFileName;
  866. if (prefs->adaptiveMode && !prefs->ldmFlag && !comprParams.windowLog)
  867. comprParams.windowLog = ADAPT_WINDOWLOG_DEFAULT;
  868. CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_contentSizeFlag, prefs->contentSize) ); /* always enable content size when available (note: supposed to be default) */
  869. CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_dictIDFlag, prefs->dictIDFlag) );
  870. CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_checksumFlag, prefs->checksumFlag) );
  871. /* compression level */
  872. CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_compressionLevel, cLevel) );
  873. /* max compressed block size */
  874. CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_targetCBlockSize, (int)prefs->targetCBlockSize) );
  875. /* source size hint */
  876. CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_srcSizeHint, (int)prefs->srcSizeHint) );
  877. /* long distance matching */
  878. CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_enableLongDistanceMatching, prefs->ldmFlag) );
  879. CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_ldmHashLog, prefs->ldmHashLog) );
  880. CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_ldmMinMatch, prefs->ldmMinMatch) );
  881. if (prefs->ldmBucketSizeLog != FIO_LDM_PARAM_NOTSET) {
  882. CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_ldmBucketSizeLog, prefs->ldmBucketSizeLog) );
  883. }
  884. if (prefs->ldmHashRateLog != FIO_LDM_PARAM_NOTSET) {
  885. CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_ldmHashRateLog, prefs->ldmHashRateLog) );
  886. }
  887. CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_useRowMatchFinder, prefs->useRowMatchFinder));
  888. /* compression parameters */
  889. CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_windowLog, (int)comprParams.windowLog) );
  890. CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_chainLog, (int)comprParams.chainLog) );
  891. CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_hashLog, (int)comprParams.hashLog) );
  892. CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_searchLog, (int)comprParams.searchLog) );
  893. CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_minMatch, (int)comprParams.minMatch) );
  894. CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_targetLength, (int)comprParams.targetLength) );
  895. CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_strategy, (int)comprParams.strategy) );
  896. CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_literalCompressionMode, (int)prefs->literalCompressionMode) );
  897. CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_enableDedicatedDictSearch, 1) );
  898. /* multi-threading */
  899. #ifdef ZSTD_MULTITHREAD
  900. DISPLAYLEVEL(5,"set nb workers = %u \n", prefs->nbWorkers);
  901. CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_nbWorkers, prefs->nbWorkers) );
  902. CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_jobSize, prefs->blockSize) );
  903. if (prefs->overlapLog != FIO_OVERLAP_LOG_NOTSET) {
  904. DISPLAYLEVEL(3,"set overlapLog = %u \n", prefs->overlapLog);
  905. CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_overlapLog, prefs->overlapLog) );
  906. }
  907. CHECK( ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_rsyncable, prefs->rsyncable) );
  908. #endif
  909. /* dictionary */
  910. if (prefs->patchFromMode) {
  911. CHECK( ZSTD_CCtx_refPrefix(ress.cctx, ress.dictBuffer, ress.dictBufferSize) );
  912. } else {
  913. CHECK( ZSTD_CCtx_loadDictionary(ress.cctx, ress.dictBuffer, ress.dictBufferSize) );
  914. }
  915. return ress;
  916. }
  917. static void FIO_freeCResources(const cRess_t* const ress)
  918. {
  919. free(ress->srcBuffer);
  920. free(ress->dstBuffer);
  921. free(ress->dictBuffer);
  922. ZSTD_freeCStream(ress->cctx); /* never fails */
  923. }
  924. #ifdef ZSTD_GZCOMPRESS
  925. static unsigned long long
  926. FIO_compressGzFrame(const cRess_t* ress, /* buffers & handlers are used, but not changed */
  927. const char* srcFileName, U64 const srcFileSize,
  928. int compressionLevel, U64* readsize)
  929. {
  930. unsigned long long inFileSize = 0, outFileSize = 0;
  931. z_stream strm;
  932. if (compressionLevel > Z_BEST_COMPRESSION)
  933. compressionLevel = Z_BEST_COMPRESSION;
  934. strm.zalloc = Z_NULL;
  935. strm.zfree = Z_NULL;
  936. strm.opaque = Z_NULL;
  937. { int const ret = deflateInit2(&strm, compressionLevel, Z_DEFLATED,
  938. 15 /* maxWindowLogSize */ + 16 /* gzip only */,
  939. 8, Z_DEFAULT_STRATEGY); /* see http://www.zlib.net/manual.html */
  940. if (ret != Z_OK) {
  941. EXM_THROW(71, "zstd: %s: deflateInit2 error %d \n", srcFileName, ret);
  942. } }
  943. strm.next_in = 0;
  944. strm.avail_in = 0;
  945. strm.next_out = (Bytef*)ress->dstBuffer;
  946. strm.avail_out = (uInt)ress->dstBufferSize;
  947. while (1) {
  948. int ret;
  949. if (strm.avail_in == 0) {
  950. size_t const inSize = fread(ress->srcBuffer, 1, ress->srcBufferSize, ress->srcFile);
  951. if (inSize == 0) break;
  952. inFileSize += inSize;
  953. strm.next_in = (z_const unsigned char*)ress->srcBuffer;
  954. strm.avail_in = (uInt)inSize;
  955. }
  956. ret = deflate(&strm, Z_NO_FLUSH);
  957. if (ret != Z_OK)
  958. EXM_THROW(72, "zstd: %s: deflate error %d \n", srcFileName, ret);
  959. { size_t const cSize = ress->dstBufferSize - strm.avail_out;
  960. if (cSize) {
  961. if (fwrite(ress->dstBuffer, 1, cSize, ress->dstFile) != cSize)
  962. EXM_THROW(73, "Write error : cannot write to output file : %s ", strerror(errno));
  963. outFileSize += cSize;
  964. strm.next_out = (Bytef*)ress->dstBuffer;
  965. strm.avail_out = (uInt)ress->dstBufferSize;
  966. } }
  967. if (srcFileSize == UTIL_FILESIZE_UNKNOWN) {
  968. DISPLAYUPDATE(2, "\rRead : %u MB ==> %.2f%% ",
  969. (unsigned)(inFileSize>>20),
  970. (double)outFileSize/inFileSize*100)
  971. } else {
  972. DISPLAYUPDATE(2, "\rRead : %u / %u MB ==> %.2f%% ",
  973. (unsigned)(inFileSize>>20), (unsigned)(srcFileSize>>20),
  974. (double)outFileSize/inFileSize*100);
  975. } }
  976. while (1) {
  977. int const ret = deflate(&strm, Z_FINISH);
  978. { size_t const cSize = ress->dstBufferSize - strm.avail_out;
  979. if (cSize) {
  980. if (fwrite(ress->dstBuffer, 1, cSize, ress->dstFile) != cSize)
  981. EXM_THROW(75, "Write error : %s ", strerror(errno));
  982. outFileSize += cSize;
  983. strm.next_out = (Bytef*)ress->dstBuffer;
  984. strm.avail_out = (uInt)ress->dstBufferSize;
  985. } }
  986. if (ret == Z_STREAM_END) break;
  987. if (ret != Z_BUF_ERROR)
  988. EXM_THROW(77, "zstd: %s: deflate error %d \n", srcFileName, ret);
  989. }
  990. { int const ret = deflateEnd(&strm);
  991. if (ret != Z_OK) {
  992. EXM_THROW(79, "zstd: %s: deflateEnd error %d \n", srcFileName, ret);
  993. } }
  994. *readsize = inFileSize;
  995. return outFileSize;
  996. }
  997. #endif
  998. #ifdef ZSTD_LZMACOMPRESS
  999. static unsigned long long
  1000. FIO_compressLzmaFrame(cRess_t* ress,
  1001. const char* srcFileName, U64 const srcFileSize,
  1002. int compressionLevel, U64* readsize, int plain_lzma)
  1003. {
  1004. unsigned long long inFileSize = 0, outFileSize = 0;
  1005. lzma_stream strm = LZMA_STREAM_INIT;
  1006. lzma_action action = LZMA_RUN;
  1007. lzma_ret ret;
  1008. if (compressionLevel < 0) compressionLevel = 0;
  1009. if (compressionLevel > 9) compressionLevel = 9;
  1010. if (plain_lzma) {
  1011. lzma_options_lzma opt_lzma;
  1012. if (lzma_lzma_preset(&opt_lzma, compressionLevel))
  1013. EXM_THROW(81, "zstd: %s: lzma_lzma_preset error", srcFileName);
  1014. ret = lzma_alone_encoder(&strm, &opt_lzma); /* LZMA */
  1015. if (ret != LZMA_OK)
  1016. EXM_THROW(82, "zstd: %s: lzma_alone_encoder error %d", srcFileName, ret);
  1017. } else {
  1018. ret = lzma_easy_encoder(&strm, compressionLevel, LZMA_CHECK_CRC64); /* XZ */
  1019. if (ret != LZMA_OK)
  1020. EXM_THROW(83, "zstd: %s: lzma_easy_encoder error %d", srcFileName, ret);
  1021. }
  1022. strm.next_in = 0;
  1023. strm.avail_in = 0;
  1024. strm.next_out = (BYTE*)ress->dstBuffer;
  1025. strm.avail_out = ress->dstBufferSize;
  1026. while (1) {
  1027. if (strm.avail_in == 0) {
  1028. size_t const inSize = fread(ress->srcBuffer, 1, ress->srcBufferSize, ress->srcFile);
  1029. if (inSize == 0) action = LZMA_FINISH;
  1030. inFileSize += inSize;
  1031. strm.next_in = (BYTE const*)ress->srcBuffer;
  1032. strm.avail_in = inSize;
  1033. }
  1034. ret = lzma_code(&strm, action);
  1035. if (ret != LZMA_OK && ret != LZMA_STREAM_END)
  1036. EXM_THROW(84, "zstd: %s: lzma_code encoding error %d", srcFileName, ret);
  1037. { size_t const compBytes = ress->dstBufferSize - strm.avail_out;
  1038. if (compBytes) {
  1039. if (fwrite(ress->dstBuffer, 1, compBytes, ress->dstFile) != compBytes)
  1040. EXM_THROW(85, "Write error : %s", strerror(errno));
  1041. outFileSize += compBytes;
  1042. strm.next_out = (BYTE*)ress->dstBuffer;
  1043. strm.avail_out = ress->dstBufferSize;
  1044. } }
  1045. if (srcFileSize == UTIL_FILESIZE_UNKNOWN)
  1046. DISPLAYUPDATE(2, "\rRead : %u MB ==> %.2f%%",
  1047. (unsigned)(inFileSize>>20),
  1048. (double)outFileSize/inFileSize*100)
  1049. else
  1050. DISPLAYUPDATE(2, "\rRead : %u / %u MB ==> %.2f%%",
  1051. (unsigned)(inFileSize>>20), (unsigned)(srcFileSize>>20),
  1052. (double)outFileSize/inFileSize*100);
  1053. if (ret == LZMA_STREAM_END) break;
  1054. }
  1055. lzma_end(&strm);
  1056. *readsize = inFileSize;
  1057. return outFileSize;
  1058. }
  1059. #endif
  1060. #ifdef ZSTD_LZ4COMPRESS
  1061. #if LZ4_VERSION_NUMBER <= 10600
  1062. #define LZ4F_blockLinked blockLinked
  1063. #define LZ4F_max64KB max64KB
  1064. #endif
  1065. static int FIO_LZ4_GetBlockSize_FromBlockId (int id) { return (1 << (8 + (2 * id))); }
  1066. static unsigned long long
  1067. FIO_compressLz4Frame(cRess_t* ress,
  1068. const char* srcFileName, U64 const srcFileSize,
  1069. int compressionLevel, int checksumFlag,
  1070. U64* readsize)
  1071. {
  1072. const size_t blockSize = FIO_LZ4_GetBlockSize_FromBlockId(LZ4F_max64KB);
  1073. unsigned long long inFileSize = 0, outFileSize = 0;
  1074. LZ4F_preferences_t prefs;
  1075. LZ4F_compressionContext_t ctx;
  1076. LZ4F_errorCode_t const errorCode = LZ4F_createCompressionContext(&ctx, LZ4F_VERSION);
  1077. if (LZ4F_isError(errorCode))
  1078. EXM_THROW(31, "zstd: failed to create lz4 compression context");
  1079. memset(&prefs, 0, sizeof(prefs));
  1080. assert(blockSize <= ress->srcBufferSize);
  1081. prefs.autoFlush = 1;
  1082. prefs.compressionLevel = compressionLevel;
  1083. prefs.frameInfo.blockMode = LZ4F_blockLinked;
  1084. prefs.frameInfo.blockSizeID = LZ4F_max64KB;
  1085. prefs.frameInfo.contentChecksumFlag = (contentChecksum_t)checksumFlag;
  1086. #if LZ4_VERSION_NUMBER >= 10600
  1087. prefs.frameInfo.contentSize = (srcFileSize==UTIL_FILESIZE_UNKNOWN) ? 0 : srcFileSize;
  1088. #endif
  1089. assert(LZ4F_compressBound(blockSize, &prefs) <= ress->dstBufferSize);
  1090. {
  1091. size_t readSize;
  1092. size_t headerSize = LZ4F_compressBegin(ctx, ress->dstBuffer, ress->dstBufferSize, &prefs);
  1093. if (LZ4F_isError(headerSize))
  1094. EXM_THROW(33, "File header generation failed : %s",
  1095. LZ4F_getErrorName(headerSize));
  1096. if (fwrite(ress->dstBuffer, 1, headerSize, ress->dstFile) != headerSize)
  1097. EXM_THROW(34, "Write error : %s (cannot write header)", strerror(errno));
  1098. outFileSize += headerSize;
  1099. /* Read first block */
  1100. readSize = fread(ress->srcBuffer, (size_t)1, (size_t)blockSize, ress->srcFile);
  1101. inFileSize += readSize;
  1102. /* Main Loop */
  1103. while (readSize>0) {
  1104. size_t const outSize = LZ4F_compressUpdate(ctx,
  1105. ress->dstBuffer, ress->dstBufferSize,
  1106. ress->srcBuffer, readSize, NULL);
  1107. if (LZ4F_isError(outSize))
  1108. EXM_THROW(35, "zstd: %s: lz4 compression failed : %s",
  1109. srcFileName, LZ4F_getErrorName(outSize));
  1110. outFileSize += outSize;
  1111. if (srcFileSize == UTIL_FILESIZE_UNKNOWN) {
  1112. DISPLAYUPDATE(2, "\rRead : %u MB ==> %.2f%%",
  1113. (unsigned)(inFileSize>>20),
  1114. (double)outFileSize/inFileSize*100)
  1115. } else {
  1116. DISPLAYUPDATE(2, "\rRead : %u / %u MB ==> %.2f%%",
  1117. (unsigned)(inFileSize>>20), (unsigned)(srcFileSize>>20),
  1118. (double)outFileSize/inFileSize*100);
  1119. }
  1120. /* Write Block */
  1121. { size_t const sizeCheck = fwrite(ress->dstBuffer, 1, outSize, ress->dstFile);
  1122. if (sizeCheck != outSize)
  1123. EXM_THROW(36, "Write error : %s", strerror(errno));
  1124. }
  1125. /* Read next block */
  1126. readSize = fread(ress->srcBuffer, (size_t)1, (size_t)blockSize, ress->srcFile);
  1127. inFileSize += readSize;
  1128. }
  1129. if (ferror(ress->srcFile)) EXM_THROW(37, "Error reading %s ", srcFileName);
  1130. /* End of Stream mark */
  1131. headerSize = LZ4F_compressEnd(ctx, ress->dstBuffer, ress->dstBufferSize, NULL);
  1132. if (LZ4F_isError(headerSize))
  1133. EXM_THROW(38, "zstd: %s: lz4 end of file generation failed : %s",
  1134. srcFileName, LZ4F_getErrorName(headerSize));
  1135. { size_t const sizeCheck = fwrite(ress->dstBuffer, 1, headerSize, ress->dstFile);
  1136. if (sizeCheck != headerSize)
  1137. EXM_THROW(39, "Write error : %s (cannot write end of stream)",
  1138. strerror(errno));
  1139. }
  1140. outFileSize += headerSize;
  1141. }
  1142. *readsize = inFileSize;
  1143. LZ4F_freeCompressionContext(ctx);
  1144. return outFileSize;
  1145. }
  1146. #endif
  1147. static unsigned long long
  1148. FIO_compressZstdFrame(FIO_ctx_t* const fCtx,
  1149. FIO_prefs_t* const prefs,
  1150. const cRess_t* ressPtr,
  1151. const char* srcFileName, U64 fileSize,
  1152. int compressionLevel, U64* readsize)
  1153. {
  1154. cRess_t const ress = *ressPtr;
  1155. FILE* const srcFile = ress.srcFile;
  1156. FILE* const dstFile = ress.dstFile;
  1157. U64 compressedfilesize = 0;
  1158. ZSTD_EndDirective directive = ZSTD_e_continue;
  1159. U64 pledgedSrcSize = ZSTD_CONTENTSIZE_UNKNOWN;
  1160. /* stats */
  1161. ZSTD_frameProgression previous_zfp_update = { 0, 0, 0, 0, 0, 0 };
  1162. ZSTD_frameProgression previous_zfp_correction = { 0, 0, 0, 0, 0, 0 };
  1163. typedef enum { noChange, slower, faster } speedChange_e;
  1164. speedChange_e speedChange = noChange;
  1165. unsigned flushWaiting = 0;
  1166. unsigned inputPresented = 0;
  1167. unsigned inputBlocked = 0;
  1168. unsigned lastJobID = 0;
  1169. UTIL_HumanReadableSize_t const file_hrs = UTIL_makeHumanReadableSize(fileSize);
  1170. DISPLAYLEVEL(6, "compression using zstd format \n");
  1171. /* init */
  1172. if (fileSize != UTIL_FILESIZE_UNKNOWN) {
  1173. pledgedSrcSize = fileSize;
  1174. CHECK(ZSTD_CCtx_setPledgedSrcSize(ress.cctx, fileSize));
  1175. } else if (prefs->streamSrcSize > 0) {
  1176. /* unknown source size; use the declared stream size */
  1177. pledgedSrcSize = prefs->streamSrcSize;
  1178. CHECK( ZSTD_CCtx_setPledgedSrcSize(ress.cctx, prefs->streamSrcSize) );
  1179. }
  1180. {
  1181. int windowLog;
  1182. UTIL_HumanReadableSize_t windowSize;
  1183. CHECK(ZSTD_CCtx_getParameter(ress.cctx, ZSTD_c_windowLog, &windowLog));
  1184. if (windowLog == 0) {
  1185. const ZSTD_compressionParameters cParams = ZSTD_getCParams(compressionLevel, fileSize, 0);
  1186. windowLog = cParams.windowLog;
  1187. }
  1188. windowSize = UTIL_makeHumanReadableSize(MAX(1ULL, MIN(1ULL << windowLog, pledgedSrcSize)));
  1189. DISPLAYLEVEL(4, "Decompression will require %.*f%s of memory\n", windowSize.precision, windowSize.value, windowSize.suffix);
  1190. }
  1191. (void)srcFileName;
  1192. /* Main compression loop */
  1193. do {
  1194. size_t stillToFlush;
  1195. /* Fill input Buffer */
  1196. size_t const inSize = fread(ress.srcBuffer, (size_t)1, ress.srcBufferSize, srcFile);
  1197. ZSTD_inBuffer inBuff = { ress.srcBuffer, inSize, 0 };
  1198. DISPLAYLEVEL(6, "fread %u bytes from source \n", (unsigned)inSize);
  1199. *readsize += inSize;
  1200. if ((inSize == 0) || (*readsize == fileSize))
  1201. directive = ZSTD_e_end;
  1202. stillToFlush = 1;
  1203. while ((inBuff.pos != inBuff.size) /* input buffer must be entirely ingested */
  1204. || (directive == ZSTD_e_end && stillToFlush != 0) ) {
  1205. size_t const oldIPos = inBuff.pos;
  1206. ZSTD_outBuffer outBuff = { ress.dstBuffer, ress.dstBufferSize, 0 };
  1207. size_t const toFlushNow = ZSTD_toFlushNow(ress.cctx);
  1208. CHECK_V(stillToFlush, ZSTD_compressStream2(ress.cctx, &outBuff, &inBuff, directive));
  1209. /* count stats */
  1210. inputPresented++;
  1211. if (oldIPos == inBuff.pos) inputBlocked++; /* input buffer is full and can't take any more : input speed is faster than consumption rate */
  1212. if (!toFlushNow) flushWaiting = 1;
  1213. /* Write compressed stream */
  1214. DISPLAYLEVEL(6, "ZSTD_compress_generic(end:%u) => input pos(%u)<=(%u)size ; output generated %u bytes \n",
  1215. (unsigned)directive, (unsigned)inBuff.pos, (unsigned)inBuff.size, (unsigned)outBuff.pos);
  1216. if (outBuff.pos) {
  1217. size_t const sizeCheck = fwrite(ress.dstBuffer, 1, outBuff.pos, dstFile);
  1218. if (sizeCheck != outBuff.pos)
  1219. EXM_THROW(25, "Write error : %s (cannot write compressed block)",
  1220. strerror(errno));
  1221. compressedfilesize += outBuff.pos;
  1222. }
  1223. /* display notification; and adapt compression level */
  1224. if (READY_FOR_UPDATE()) {
  1225. ZSTD_frameProgression const zfp = ZSTD_getFrameProgression(ress.cctx);
  1226. double const cShare = (double)zfp.produced / (double)(zfp.consumed + !zfp.consumed/*avoid div0*/) * 100;
  1227. UTIL_HumanReadableSize_t const buffered_hrs = UTIL_makeHumanReadableSize(zfp.ingested - zfp.consumed);
  1228. UTIL_HumanReadableSize_t const consumed_hrs = UTIL_makeHumanReadableSize(zfp.consumed);
  1229. UTIL_HumanReadableSize_t const produced_hrs = UTIL_makeHumanReadableSize(zfp.produced);
  1230. /* display progress notifications */
  1231. if (g_display_prefs.displayLevel >= 3) {
  1232. DISPLAYUPDATE(3, "\r(L%i) Buffered :%6.*f%4s - Consumed :%6.*f%4s - Compressed :%6.*f%4s => %.2f%% ",
  1233. compressionLevel,
  1234. buffered_hrs.precision, buffered_hrs.value, buffered_hrs.suffix,
  1235. consumed_hrs.precision, consumed_hrs.value, consumed_hrs.suffix,
  1236. produced_hrs.precision, produced_hrs.value, produced_hrs.suffix,
  1237. cShare );
  1238. } else if (g_display_prefs.displayLevel >= 2 || g_display_prefs.progressSetting == FIO_ps_always) {
  1239. /* Require level 2 or forcibly displayed progress counter for summarized updates */
  1240. DISPLAYLEVEL(1, "\r%79s\r", ""); /* Clear out the current displayed line */
  1241. if (fCtx->nbFilesTotal > 1) {
  1242. size_t srcFileNameSize = strlen(srcFileName);
  1243. /* Ensure that the string we print is roughly the same size each time */
  1244. if (srcFileNameSize > 18) {
  1245. const char* truncatedSrcFileName = srcFileName + srcFileNameSize - 15;
  1246. DISPLAYLEVEL(1, "Compress: %u/%u files. Current: ...%s ",
  1247. fCtx->currFileIdx+1, fCtx->nbFilesTotal, truncatedSrcFileName);
  1248. } else {
  1249. DISPLAYLEVEL(1, "Compress: %u/%u files. Current: %*s ",
  1250. fCtx->currFileIdx+1, fCtx->nbFilesTotal, (int)(18-srcFileNameSize), srcFileName);
  1251. }
  1252. }
  1253. DISPLAYLEVEL(1, "Read:%6.*f%4s ", consumed_hrs.precision, consumed_hrs.value, consumed_hrs.suffix);
  1254. if (fileSize != UTIL_FILESIZE_UNKNOWN)
  1255. DISPLAYLEVEL(2, "/%6.*f%4s", file_hrs.precision, file_hrs.value, file_hrs.suffix);
  1256. DISPLAYLEVEL(1, " ==> %2.f%%", cShare);
  1257. DELAY_NEXT_UPDATE();
  1258. }
  1259. /* adaptive mode : statistics measurement and speed correction */
  1260. if (prefs->adaptiveMode) {
  1261. /* check output speed */
  1262. if (zfp.currentJobID > 1) { /* only possible if nbWorkers >= 1 */
  1263. unsigned long long newlyProduced = zfp.produced - previous_zfp_update.produced;
  1264. unsigned long long newlyFlushed = zfp.flushed - previous_zfp_update.flushed;
  1265. assert(zfp.produced >= previous_zfp_update.produced);
  1266. assert(prefs->nbWorkers >= 1);
  1267. /* test if compression is blocked
  1268. * either because output is slow and all buffers are full
  1269. * or because input is slow and no job can start while waiting for at least one buffer to be filled.
  1270. * note : exclude starting part, since currentJobID > 1 */
  1271. if ( (zfp.consumed == previous_zfp_update.consumed) /* no data compressed : no data available, or no more buffer to compress to, OR compression is really slow (compression of a single block is slower than update rate)*/
  1272. && (zfp.nbActiveWorkers == 0) /* confirmed : no compression ongoing */
  1273. ) {
  1274. DISPLAYLEVEL(6, "all buffers full : compression stopped => slow down \n")
  1275. speedChange = slower;
  1276. }
  1277. previous_zfp_update = zfp;
  1278. if ( (newlyProduced > (newlyFlushed * 9 / 8)) /* compression produces more data than output can flush (though production can be spiky, due to work unit : (N==4)*block sizes) */
  1279. && (flushWaiting == 0) /* flush speed was never slowed by lack of production, so it's operating at max capacity */
  1280. ) {
  1281. DISPLAYLEVEL(6, "compression faster than flush (%llu > %llu), and flushed was never slowed down by lack of production => slow down \n", newlyProduced, newlyFlushed);
  1282. speedChange = slower;
  1283. }
  1284. flushWaiting = 0;
  1285. }
  1286. /* course correct only if there is at least one new job completed */
  1287. if (zfp.currentJobID > lastJobID) {
  1288. DISPLAYLEVEL(6, "compression level adaptation check \n")
  1289. /* check input speed */
  1290. if (zfp.currentJobID > (unsigned)(prefs->nbWorkers+1)) { /* warm up period, to fill all workers */
  1291. if (inputBlocked <= 0) {
  1292. DISPLAYLEVEL(6, "input is never blocked => input is slower than ingestion \n");
  1293. speedChange = slower;
  1294. } else if (speedChange == noChange) {
  1295. unsigned long long newlyIngested = zfp.ingested - previous_zfp_correction.ingested;
  1296. unsigned long long newlyConsumed = zfp.consumed - previous_zfp_correction.consumed;
  1297. unsigned long long newlyProduced = zfp.produced - previous_zfp_correction.produced;
  1298. unsigned long long newlyFlushed = zfp.flushed - previous_zfp_correction.flushed;
  1299. previous_zfp_correction = zfp;
  1300. assert(inputPresented > 0);
  1301. DISPLAYLEVEL(6, "input blocked %u/%u(%.2f) - ingested:%u vs %u:consumed - flushed:%u vs %u:produced \n",
  1302. inputBlocked, inputPresented, (double)inputBlocked/inputPresented*100,
  1303. (unsigned)newlyIngested, (unsigned)newlyConsumed,
  1304. (unsigned)newlyFlushed, (unsigned)newlyProduced);
  1305. if ( (inputBlocked > inputPresented / 8) /* input is waiting often, because input buffers is full : compression or output too slow */
  1306. && (newlyFlushed * 33 / 32 > newlyProduced) /* flush everything that is produced */
  1307. && (newlyIngested * 33 / 32 > newlyConsumed) /* input speed as fast or faster than compression speed */
  1308. ) {
  1309. DISPLAYLEVEL(6, "recommend faster as in(%llu) >= (%llu)comp(%llu) <= out(%llu) \n",
  1310. newlyIngested, newlyConsumed, newlyProduced, newlyFlushed);
  1311. speedChange = faster;
  1312. }
  1313. }
  1314. inputBlocked = 0;
  1315. inputPresented = 0;
  1316. }
  1317. if (speedChange == slower) {
  1318. DISPLAYLEVEL(6, "slower speed , higher compression \n")
  1319. compressionLevel ++;
  1320. if (compressionLevel > ZSTD_maxCLevel()) compressionLevel = ZSTD_maxCLevel();
  1321. if (compressionLevel > prefs->maxAdaptLevel) compressionLevel = prefs->maxAdaptLevel;
  1322. compressionLevel += (compressionLevel == 0); /* skip 0 */
  1323. ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_compressionLevel, compressionLevel);
  1324. }
  1325. if (speedChange == faster) {
  1326. DISPLAYLEVEL(6, "faster speed , lighter compression \n")
  1327. compressionLevel --;
  1328. if (compressionLevel < prefs->minAdaptLevel) compressionLevel = prefs->minAdaptLevel;
  1329. compressionLevel -= (compressionLevel == 0); /* skip 0 */
  1330. ZSTD_CCtx_setParameter(ress.cctx, ZSTD_c_compressionLevel, compressionLevel);
  1331. }
  1332. speedChange = noChange;
  1333. lastJobID = zfp.currentJobID;
  1334. } /* if (zfp.currentJobID > lastJobID) */
  1335. } /* if (g_adaptiveMode) */
  1336. } /* if (READY_FOR_UPDATE()) */
  1337. } /* while ((inBuff.pos != inBuff.size) */
  1338. } while (directive != ZSTD_e_end);
  1339. if (ferror(srcFile)) {
  1340. EXM_THROW(26, "Read error : I/O error");
  1341. }
  1342. if (fileSize != UTIL_FILESIZE_UNKNOWN && *readsize != fileSize) {
  1343. EXM_THROW(27, "Read error : Incomplete read : %llu / %llu B",
  1344. (unsigned long long)*readsize, (unsigned long long)fileSize);
  1345. }
  1346. return compressedfilesize;
  1347. }
  1348. /*! FIO_compressFilename_internal() :
  1349. * same as FIO_compressFilename_extRess(), with `ress.desFile` already opened.
  1350. * @return : 0 : compression completed correctly,
  1351. * 1 : missing or pb opening srcFileName
  1352. */
  1353. static int
  1354. FIO_compressFilename_internal(FIO_ctx_t* const fCtx,
  1355. FIO_prefs_t* const prefs,
  1356. cRess_t ress,
  1357. const char* dstFileName, const char* srcFileName,
  1358. int compressionLevel)
  1359. {
  1360. UTIL_time_t const timeStart = UTIL_getTime();
  1361. clock_t const cpuStart = clock();
  1362. U64 readsize = 0;
  1363. U64 compressedfilesize = 0;
  1364. U64 const fileSize = UTIL_getFileSize(srcFileName);
  1365. DISPLAYLEVEL(5, "%s: %llu bytes \n", srcFileName, (unsigned long long)fileSize);
  1366. /* compression format selection */
  1367. switch (prefs->compressionType) {
  1368. default:
  1369. case FIO_zstdCompression:
  1370. compressedfilesize = FIO_compressZstdFrame(fCtx, prefs, &ress, srcFileName, fileSize, compressionLevel, &readsize);
  1371. break;
  1372. case FIO_gzipCompression:
  1373. #ifdef ZSTD_GZCOMPRESS
  1374. compressedfilesize = FIO_compressGzFrame(&ress, srcFileName, fileSize, compressionLevel, &readsize);
  1375. #else
  1376. (void)compressionLevel;
  1377. EXM_THROW(20, "zstd: %s: file cannot be compressed as gzip (zstd compiled without ZSTD_GZCOMPRESS) -- ignored \n",
  1378. srcFileName);
  1379. #endif
  1380. break;
  1381. case FIO_xzCompression:
  1382. case FIO_lzmaCompression:
  1383. #ifdef ZSTD_LZMACOMPRESS
  1384. compressedfilesize = FIO_compressLzmaFrame(&ress, srcFileName, fileSize, compressionLevel, &readsize, prefs->compressionType==FIO_lzmaCompression);
  1385. #else
  1386. (void)compressionLevel;
  1387. EXM_THROW(20, "zstd: %s: file cannot be compressed as xz/lzma (zstd compiled without ZSTD_LZMACOMPRESS) -- ignored \n",
  1388. srcFileName);
  1389. #endif
  1390. break;
  1391. case FIO_lz4Compression:
  1392. #ifdef ZSTD_LZ4COMPRESS
  1393. compressedfilesize = FIO_compressLz4Frame(&ress, srcFileName, fileSize, compressionLevel, prefs->checksumFlag, &readsize);
  1394. #else
  1395. (void)compressionLevel;
  1396. EXM_THROW(20, "zstd: %s: file cannot be compressed as lz4 (zstd compiled without ZSTD_LZ4COMPRESS) -- ignored \n",
  1397. srcFileName);
  1398. #endif
  1399. break;
  1400. }
  1401. /* Status */
  1402. fCtx->totalBytesInput += (size_t)readsize;
  1403. fCtx->totalBytesOutput += (size_t)compressedfilesize;
  1404. DISPLAYLEVEL(2, "\r%79s\r", "");
  1405. if (g_display_prefs.displayLevel >= 2 &&
  1406. !fCtx->hasStdoutOutput &&
  1407. (g_display_prefs.displayLevel >= 3 || fCtx->nbFilesTotal <= 1)) {
  1408. UTIL_HumanReadableSize_t hr_isize = UTIL_makeHumanReadableSize((U64) readsize);
  1409. UTIL_HumanReadableSize_t hr_osize = UTIL_makeHumanReadableSize((U64) compressedfilesize);
  1410. if (readsize == 0) {
  1411. DISPLAYLEVEL(2,"%-20s : (%6.*f%4s => %6.*f%4s, %s) \n",
  1412. srcFileName,
  1413. hr_isize.precision, hr_isize.value, hr_isize.suffix,
  1414. hr_osize.precision, hr_osize.value, hr_osize.suffix,
  1415. dstFileName);
  1416. } else {
  1417. DISPLAYLEVEL(2,"%-20s :%6.2f%% (%6.*f%4s => %6.*f%4s, %s) \n",
  1418. srcFileName,
  1419. (double)compressedfilesize / (double)readsize * 100,
  1420. hr_isize.precision, hr_isize.value, hr_isize.suffix,
  1421. hr_osize.precision, hr_osize.value, hr_osize.suffix,
  1422. dstFileName);
  1423. }
  1424. }
  1425. /* Elapsed Time and CPU Load */
  1426. { clock_t const cpuEnd = clock();
  1427. double const cpuLoad_s = (double)(cpuEnd - cpuStart) / CLOCKS_PER_SEC;
  1428. U64 const timeLength_ns = UTIL_clockSpanNano(timeStart);
  1429. double const timeLength_s = (double)timeLength_ns / 1000000000;
  1430. double const cpuLoad_pct = (cpuLoad_s / timeLength_s) * 100;
  1431. DISPLAYLEVEL(4, "%-20s : Completed in %.2f sec (cpu load : %.0f%%)\n",
  1432. srcFileName, timeLength_s, cpuLoad_pct);
  1433. }
  1434. return 0;
  1435. }
  1436. /*! FIO_compressFilename_dstFile() :
  1437. * open dstFileName, or pass-through if ress.dstFile != NULL,
  1438. * then start compression with FIO_compressFilename_internal().
  1439. * Manages source removal (--rm) and file permissions transfer.
  1440. * note : ress.srcFile must be != NULL,
  1441. * so reach this function through FIO_compressFilename_srcFile().
  1442. * @return : 0 : compression completed correctly,
  1443. * 1 : pb
  1444. */
  1445. static int FIO_compressFilename_dstFile(FIO_ctx_t* const fCtx,
  1446. FIO_prefs_t* const prefs,
  1447. cRess_t ress,
  1448. const char* dstFileName,
  1449. const char* srcFileName,
  1450. int compressionLevel)
  1451. {
  1452. int closeDstFile = 0;
  1453. int result;
  1454. stat_t statbuf;
  1455. int transferMTime = 0;
  1456. assert(ress.srcFile != NULL);
  1457. if (ress.dstFile == NULL) {
  1458. int dstFilePermissions = DEFAULT_FILE_PERMISSIONS;
  1459. if ( strcmp (srcFileName, stdinmark)
  1460. && strcmp (dstFileName, stdoutmark)
  1461. && UTIL_stat(srcFileName, &statbuf)
  1462. && UTIL_isRegularFileStat(&statbuf) ) {
  1463. dstFilePermissions = statbuf.st_mode;
  1464. transferMTime = 1;
  1465. }
  1466. closeDstFile = 1;
  1467. DISPLAYLEVEL(6, "FIO_compressFilename_dstFile: opening dst: %s \n", dstFileName);
  1468. ress.dstFile = FIO_openDstFile(fCtx, prefs, srcFileName, dstFileName, dstFilePermissions);
  1469. if (ress.dstFile==NULL) return 1; /* could not open dstFileName */
  1470. /* Must only be added after FIO_openDstFile() succeeds.
  1471. * Otherwise we may delete the destination file if it already exists,
  1472. * and the user presses Ctrl-C when asked if they wish to overwrite.
  1473. */
  1474. addHandler(dstFileName);
  1475. }
  1476. result = FIO_compressFilename_internal(fCtx, prefs, ress, dstFileName, srcFileName, compressionLevel);
  1477. if (closeDstFile) {
  1478. FILE* const dstFile = ress.dstFile;
  1479. ress.dstFile = NULL;
  1480. clearHandler();
  1481. DISPLAYLEVEL(6, "FIO_compressFilename_dstFile: closing dst: %s \n", dstFileName);
  1482. if (fclose(dstFile)) { /* error closing dstFile */
  1483. DISPLAYLEVEL(1, "zstd: %s: %s \n", dstFileName, strerror(errno));
  1484. result=1;
  1485. }
  1486. if (transferMTime) {
  1487. UTIL_utime(dstFileName, &statbuf);
  1488. }
  1489. if ( (result != 0) /* operation failure */
  1490. && strcmp(dstFileName, stdoutmark) /* special case : don't remove() stdout */
  1491. ) {
  1492. FIO_removeFile(dstFileName); /* remove compression artefact; note don't do anything special if remove() fails */
  1493. }
  1494. }
  1495. return result;
  1496. }
  1497. /* List used to compare file extensions (used with --exclude-compressed flag)
  1498. * Different from the suffixList and should only apply to ZSTD compress operationResult
  1499. */
  1500. static const char *compressedFileExtensions[] = {
  1501. ZSTD_EXTENSION,
  1502. TZSTD_EXTENSION,
  1503. GZ_EXTENSION,
  1504. TGZ_EXTENSION,
  1505. LZMA_EXTENSION,
  1506. XZ_EXTENSION,
  1507. TXZ_EXTENSION,
  1508. LZ4_EXTENSION,
  1509. TLZ4_EXTENSION,
  1510. NULL
  1511. };
  1512. /*! FIO_compressFilename_srcFile() :
  1513. * @return : 0 : compression completed correctly,
  1514. * 1 : missing or pb opening srcFileName
  1515. */
  1516. static int
  1517. FIO_compressFilename_srcFile(FIO_ctx_t* const fCtx,
  1518. FIO_prefs_t* const prefs,
  1519. cRess_t ress,
  1520. const char* dstFileName,
  1521. const char* srcFileName,
  1522. int compressionLevel)
  1523. {
  1524. int result;
  1525. DISPLAYLEVEL(6, "FIO_compressFilename_srcFile: %s \n", srcFileName);
  1526. /* ensure src is not a directory */
  1527. if (UTIL_isDirectory(srcFileName)) {
  1528. DISPLAYLEVEL(1, "zstd: %s is a directory -- ignored \n", srcFileName);
  1529. return 1;
  1530. }
  1531. /* ensure src is not the same as dict (if present) */
  1532. if (ress.dictFileName != NULL && UTIL_isSameFile(srcFileName, ress.dictFileName)) {
  1533. DISPLAYLEVEL(1, "zstd: cannot use %s as an input file and dictionary \n", srcFileName);
  1534. return 1;
  1535. }
  1536. /* Check if "srcFile" is compressed. Only done if --exclude-compressed flag is used
  1537. * YES => ZSTD will skip compression of the file and will return 0.
  1538. * NO => ZSTD will resume with compress operation.
  1539. */
  1540. if (prefs->excludeCompressedFiles == 1 && UTIL_isCompressedFile(srcFileName, compressedFileExtensions)) {
  1541. DISPLAYLEVEL(4, "File is already compressed : %s \n", srcFileName);
  1542. return 0;
  1543. }
  1544. ress.srcFile = FIO_openSrcFile(prefs, srcFileName);
  1545. if (ress.srcFile == NULL) return 1; /* srcFile could not be opened */
  1546. result = FIO_compressFilename_dstFile(fCtx, prefs, ress, dstFileName, srcFileName, compressionLevel);
  1547. fclose(ress.srcFile);
  1548. ress.srcFile = NULL;
  1549. if ( prefs->removeSrcFile /* --rm */
  1550. && result == 0 /* success */
  1551. && strcmp(srcFileName, stdinmark) /* exception : don't erase stdin */
  1552. ) {
  1553. /* We must clear the handler, since after this point calling it would
  1554. * delete both the source and destination files.
  1555. */
  1556. clearHandler();
  1557. if (FIO_removeFile(srcFileName))
  1558. EXM_THROW(1, "zstd: %s: %s", srcFileName, strerror(errno));
  1559. }
  1560. return result;
  1561. }
  1562. static const char* checked_index(const char* options[], size_t length, size_t index) {
  1563. assert(index < length);
  1564. // Necessary to avoid warnings since -O3 will omit the above `assert`
  1565. (void) length;
  1566. return options[index];
  1567. }
  1568. #define INDEX(options, index) checked_index((options), sizeof(options) / sizeof(char*), (index))
  1569. void FIO_displayCompressionParameters(const FIO_prefs_t* prefs) {
  1570. static const char* formatOptions[5] = {ZSTD_EXTENSION, GZ_EXTENSION, XZ_EXTENSION,
  1571. LZMA_EXTENSION, LZ4_EXTENSION};
  1572. static const char* sparseOptions[3] = {" --no-sparse", "", " --sparse"};
  1573. static const char* checkSumOptions[3] = {" --no-check", "", " --check"};
  1574. static const char* rowMatchFinderOptions[3] = {"", " --no-row-match-finder", " --row-match-finder"};
  1575. static const char* compressLiteralsOptions[3] = {"", " --compress-literals", " --no-compress-literals"};
  1576. assert(g_display_prefs.displayLevel >= 4);
  1577. DISPLAY("--format=%s", formatOptions[prefs->compressionType]);
  1578. DISPLAY("%s", INDEX(sparseOptions, prefs->sparseFileSupport));
  1579. DISPLAY("%s", prefs->dictIDFlag ? "" : " --no-dictID");
  1580. DISPLAY("%s", INDEX(checkSumOptions, prefs->checksumFlag));
  1581. DISPLAY(" --block-size=%d", prefs->blockSize);
  1582. if (prefs->adaptiveMode)
  1583. DISPLAY(" --adapt=min=%d,max=%d", prefs->minAdaptLevel, prefs->maxAdaptLevel);
  1584. DISPLAY("%s", INDEX(rowMatchFinderOptions, prefs->useRowMatchFinder));
  1585. DISPLAY("%s", prefs->rsyncable ? " --rsyncable" : "");
  1586. if (prefs->streamSrcSize)
  1587. DISPLAY(" --stream-size=%u", (unsigned) prefs->streamSrcSize);
  1588. if (prefs->srcSizeHint)
  1589. DISPLAY(" --size-hint=%d", prefs->srcSizeHint);
  1590. if (prefs->targetCBlockSize)
  1591. DISPLAY(" --target-compressed-block-size=%u", (unsigned) prefs->targetCBlockSize);
  1592. DISPLAY("%s", INDEX(compressLiteralsOptions, prefs->literalCompressionMode));
  1593. DISPLAY(" --memory=%u", prefs->memLimit ? prefs->memLimit : 128 MB);
  1594. DISPLAY(" --threads=%d", prefs->nbWorkers);
  1595. DISPLAY("%s", prefs->excludeCompressedFiles ? " --exclude-compressed" : "");
  1596. DISPLAY(" --%scontent-size", prefs->contentSize ? "" : "no-");
  1597. DISPLAY("\n");
  1598. }
  1599. #undef INDEX
  1600. int FIO_compressFilename(FIO_ctx_t* const fCtx, FIO_prefs_t* const prefs, const char* dstFileName,
  1601. const char* srcFileName, const char* dictFileName,
  1602. int compressionLevel, ZSTD_compressionParameters comprParams)
  1603. {
  1604. cRess_t const ress = FIO_createCResources(prefs, dictFileName, UTIL_getFileSize(srcFileName), compressionLevel, comprParams);
  1605. int const result = FIO_compressFilename_srcFile(fCtx, prefs, ress, dstFileName, srcFileName, compressionLevel);
  1606. #define DISPLAY_LEVEL_DEFAULT 2
  1607. FIO_freeCResources(&ress);
  1608. return result;
  1609. }
  1610. /* FIO_determineCompressedName() :
  1611. * create a destination filename for compressed srcFileName.
  1612. * @return a pointer to it.
  1613. * This function never returns an error (it may abort() in case of pb)
  1614. */
  1615. static const char*
  1616. FIO_determineCompressedName(const char* srcFileName, const char* outDirName, const char* suffix)
  1617. {
  1618. static size_t dfnbCapacity = 0;
  1619. static char* dstFileNameBuffer = NULL; /* using static allocation : this function cannot be multi-threaded */
  1620. char* outDirFilename = NULL;
  1621. size_t sfnSize = strlen(srcFileName);
  1622. size_t const srcSuffixLen = strlen(suffix);
  1623. if (outDirName) {
  1624. outDirFilename = FIO_createFilename_fromOutDir(srcFileName, outDirName, srcSuffixLen);
  1625. sfnSize = strlen(outDirFilename);
  1626. assert(outDirFilename != NULL);
  1627. }
  1628. if (dfnbCapacity <= sfnSize+srcSuffixLen+1) {
  1629. /* resize buffer for dstName */
  1630. free(dstFileNameBuffer);
  1631. dfnbCapacity = sfnSize + srcSuffixLen + 30;
  1632. dstFileNameBuffer = (char*)malloc(dfnbCapacity);
  1633. if (!dstFileNameBuffer) {
  1634. EXM_THROW(30, "zstd: %s", strerror(errno));
  1635. }
  1636. }
  1637. assert(dstFileNameBuffer != NULL);
  1638. if (outDirFilename) {
  1639. memcpy(dstFileNameBuffer, outDirFilename, sfnSize);
  1640. free(outDirFilename);
  1641. } else {
  1642. memcpy(dstFileNameBuffer, srcFileName, sfnSize);
  1643. }
  1644. memcpy(dstFileNameBuffer+sfnSize, suffix, srcSuffixLen+1 /* Include terminating null */);
  1645. return dstFileNameBuffer;
  1646. }
  1647. static unsigned long long FIO_getLargestFileSize(const char** inFileNames, unsigned nbFiles)
  1648. {
  1649. size_t i;
  1650. unsigned long long fileSize, maxFileSize = 0;
  1651. for (i = 0; i < nbFiles; i++) {
  1652. fileSize = UTIL_getFileSize(inFileNames[i]);
  1653. maxFileSize = fileSize > maxFileSize ? fileSize : maxFileSize;
  1654. }
  1655. return maxFileSize;
  1656. }
  1657. /* FIO_compressMultipleFilenames() :
  1658. * compress nbFiles files
  1659. * into either one destination (outFileName),
  1660. * or into one file each (outFileName == NULL, but suffix != NULL),
  1661. * or into a destination folder (specified with -O)
  1662. */
  1663. int FIO_compressMultipleFilenames(FIO_ctx_t* const fCtx,
  1664. FIO_prefs_t* const prefs,
  1665. const char** inFileNamesTable,
  1666. const char* outMirroredRootDirName,
  1667. const char* outDirName,
  1668. const char* outFileName, const char* suffix,
  1669. const char* dictFileName, int compressionLevel,
  1670. ZSTD_compressionParameters comprParams)
  1671. {
  1672. int status;
  1673. int error = 0;
  1674. cRess_t ress = FIO_createCResources(prefs, dictFileName,
  1675. FIO_getLargestFileSize(inFileNamesTable, (unsigned)fCtx->nbFilesTotal),
  1676. compressionLevel, comprParams);
  1677. /* init */
  1678. assert(outFileName != NULL || suffix != NULL);
  1679. if (outFileName != NULL) { /* output into a single destination (stdout typically) */
  1680. if (FIO_removeMultiFilesWarning(fCtx, prefs, outFileName, 1 /* displayLevelCutoff */)) {
  1681. FIO_freeCResources(&ress);
  1682. return 1;
  1683. }
  1684. ress.dstFile = FIO_openDstFile(fCtx, prefs, NULL, outFileName, DEFAULT_FILE_PERMISSIONS);
  1685. if (ress.dstFile == NULL) { /* could not open outFileName */
  1686. error = 1;
  1687. } else {
  1688. for (; fCtx->currFileIdx < fCtx->nbFilesTotal; ++fCtx->currFileIdx) {
  1689. status = FIO_compressFilename_srcFile(fCtx, prefs, ress, outFileName, inFileNamesTable[fCtx->currFileIdx], compressionLevel);
  1690. if (!status) fCtx->nbFilesProcessed++;
  1691. error |= status;
  1692. }
  1693. if (fclose(ress.dstFile))
  1694. EXM_THROW(29, "Write error (%s) : cannot properly close %s",
  1695. strerror(errno), outFileName);
  1696. ress.dstFile = NULL;
  1697. }
  1698. } else {
  1699. if (outMirroredRootDirName)
  1700. UTIL_mirrorSourceFilesDirectories(inFileNamesTable, (unsigned)fCtx->nbFilesTotal, outMirroredRootDirName);
  1701. for (; fCtx->currFileIdx < fCtx->nbFilesTotal; ++fCtx->currFileIdx) {
  1702. const char* const srcFileName = inFileNamesTable[fCtx->currFileIdx];
  1703. const char* dstFileName = NULL;
  1704. if (outMirroredRootDirName) {
  1705. char* validMirroredDirName = UTIL_createMirroredDestDirName(srcFileName, outMirroredRootDirName);
  1706. if (validMirroredDirName) {
  1707. dstFileName = FIO_determineCompressedName(srcFileName, validMirroredDirName, suffix);
  1708. free(validMirroredDirName);
  1709. } else {
  1710. DISPLAYLEVEL(2, "zstd: --output-dir-mirror cannot compress '%s' into '%s' \n", srcFileName, outMirroredRootDirName);
  1711. error=1;
  1712. continue;
  1713. }
  1714. } else {
  1715. dstFileName = FIO_determineCompressedName(srcFileName, outDirName, suffix); /* cannot fail */
  1716. }
  1717. status = FIO_compressFilename_srcFile(fCtx, prefs, ress, dstFileName, srcFileName, compressionLevel);
  1718. if (!status) fCtx->nbFilesProcessed++;
  1719. error |= status;
  1720. }
  1721. if (outDirName)
  1722. FIO_checkFilenameCollisions(inFileNamesTable , (unsigned)fCtx->nbFilesTotal);
  1723. }
  1724. if (fCtx->nbFilesProcessed >= 1 && fCtx->nbFilesTotal > 1 && fCtx->totalBytesInput != 0) {
  1725. UTIL_HumanReadableSize_t hr_isize = UTIL_makeHumanReadableSize((U64) fCtx->totalBytesInput);
  1726. UTIL_HumanReadableSize_t hr_osize = UTIL_makeHumanReadableSize((U64) fCtx->totalBytesOutput);
  1727. DISPLAYLEVEL(2, "\r%79s\r", "");
  1728. DISPLAYLEVEL(2, "%3d files compressed :%.2f%% (%6.*f%4s => %6.*f%4s)\n",
  1729. fCtx->nbFilesProcessed,
  1730. (double)fCtx->totalBytesOutput/((double)fCtx->totalBytesInput)*100,
  1731. hr_isize.precision, hr_isize.value, hr_isize.suffix,
  1732. hr_osize.precision, hr_osize.value, hr_osize.suffix);
  1733. }
  1734. FIO_freeCResources(&ress);
  1735. return error;
  1736. }
  1737. #endif /* #ifndef ZSTD_NOCOMPRESS */
  1738. #ifndef ZSTD_NODECOMPRESS
  1739. /* **************************************************************************
  1740. * Decompression
  1741. ***************************************************************************/
  1742. typedef struct {
  1743. void* srcBuffer;
  1744. size_t srcBufferSize;
  1745. size_t srcBufferLoaded;
  1746. void* dstBuffer;
  1747. size_t dstBufferSize;
  1748. ZSTD_DStream* dctx;
  1749. FILE* dstFile;
  1750. } dRess_t;
  1751. static dRess_t FIO_createDResources(FIO_prefs_t* const prefs, const char* dictFileName)
  1752. {
  1753. dRess_t ress;
  1754. memset(&ress, 0, sizeof(ress));
  1755. if (prefs->patchFromMode)
  1756. FIO_adjustMemLimitForPatchFromMode(prefs, UTIL_getFileSize(dictFileName), 0 /* just use the dict size */);
  1757. /* Allocation */
  1758. ress.dctx = ZSTD_createDStream();
  1759. if (ress.dctx==NULL)
  1760. EXM_THROW(60, "Error: %s : can't create ZSTD_DStream", strerror(errno));
  1761. CHECK( ZSTD_DCtx_setMaxWindowSize(ress.dctx, prefs->memLimit) );
  1762. CHECK( ZSTD_DCtx_setParameter(ress.dctx, ZSTD_d_forceIgnoreChecksum, !prefs->checksumFlag));
  1763. ress.srcBufferSize = ZSTD_DStreamInSize();
  1764. ress.srcBuffer = malloc(ress.srcBufferSize);
  1765. ress.dstBufferSize = ZSTD_DStreamOutSize();
  1766. ress.dstBuffer = malloc(ress.dstBufferSize);
  1767. if (!ress.srcBuffer || !ress.dstBuffer)
  1768. EXM_THROW(61, "Allocation error : not enough memory");
  1769. /* dictionary */
  1770. { void* dictBuffer;
  1771. size_t const dictBufferSize = FIO_createDictBuffer(&dictBuffer, dictFileName, prefs);
  1772. CHECK( ZSTD_initDStream_usingDict(ress.dctx, dictBuffer, dictBufferSize) );
  1773. free(dictBuffer);
  1774. }
  1775. return ress;
  1776. }
  1777. static void FIO_freeDResources(dRess_t ress)
  1778. {
  1779. CHECK( ZSTD_freeDStream(ress.dctx) );
  1780. free(ress.srcBuffer);
  1781. free(ress.dstBuffer);
  1782. }
  1783. /** FIO_fwriteSparse() :
  1784. * @return : storedSkips,
  1785. * argument for next call to FIO_fwriteSparse() or FIO_fwriteSparseEnd() */
  1786. static unsigned
  1787. FIO_fwriteSparse(FILE* file,
  1788. const void* buffer, size_t bufferSize,
  1789. const FIO_prefs_t* const prefs,
  1790. unsigned storedSkips)
  1791. {
  1792. const size_t* const bufferT = (const size_t*)buffer; /* Buffer is supposed malloc'ed, hence aligned on size_t */
  1793. size_t bufferSizeT = bufferSize / sizeof(size_t);
  1794. const size_t* const bufferTEnd = bufferT + bufferSizeT;
  1795. const size_t* ptrT = bufferT;
  1796. static const size_t segmentSizeT = (32 KB) / sizeof(size_t); /* check every 32 KB */
  1797. if (prefs->testMode) return 0; /* do not output anything in test mode */
  1798. if (!prefs->sparseFileSupport) { /* normal write */
  1799. size_t const sizeCheck = fwrite(buffer, 1, bufferSize, file);
  1800. if (sizeCheck != bufferSize)
  1801. EXM_THROW(70, "Write error : cannot write decoded block : %s",
  1802. strerror(errno));
  1803. return 0;
  1804. }
  1805. /* avoid int overflow */
  1806. if (storedSkips > 1 GB) {
  1807. if (LONG_SEEK(file, 1 GB, SEEK_CUR) != 0)
  1808. EXM_THROW(91, "1 GB skip error (sparse file support)");
  1809. storedSkips -= 1 GB;
  1810. }
  1811. while (ptrT < bufferTEnd) {
  1812. size_t nb0T;
  1813. /* adjust last segment if < 32 KB */
  1814. size_t seg0SizeT = segmentSizeT;
  1815. if (seg0SizeT > bufferSizeT) seg0SizeT = bufferSizeT;
  1816. bufferSizeT -= seg0SizeT;
  1817. /* count leading zeroes */
  1818. for (nb0T=0; (nb0T < seg0SizeT) && (ptrT[nb0T] == 0); nb0T++) ;
  1819. storedSkips += (unsigned)(nb0T * sizeof(size_t));
  1820. if (nb0T != seg0SizeT) { /* not all 0s */
  1821. size_t const nbNon0ST = seg0SizeT - nb0T;
  1822. /* skip leading zeros */
  1823. if (LONG_SEEK(file, storedSkips, SEEK_CUR) != 0)
  1824. EXM_THROW(92, "Sparse skip error ; try --no-sparse");
  1825. storedSkips = 0;
  1826. /* write the rest */
  1827. if (fwrite(ptrT + nb0T, sizeof(size_t), nbNon0ST, file) != nbNon0ST)
  1828. EXM_THROW(93, "Write error : cannot write decoded block : %s",
  1829. strerror(errno));
  1830. }
  1831. ptrT += seg0SizeT;
  1832. }
  1833. { static size_t const maskT = sizeof(size_t)-1;
  1834. if (bufferSize & maskT) {
  1835. /* size not multiple of sizeof(size_t) : implies end of block */
  1836. const char* const restStart = (const char*)bufferTEnd;
  1837. const char* restPtr = restStart;
  1838. const char* const restEnd = (const char*)buffer + bufferSize;
  1839. assert(restEnd > restStart && restEnd < restStart + sizeof(size_t));
  1840. for ( ; (restPtr < restEnd) && (*restPtr == 0); restPtr++) ;
  1841. storedSkips += (unsigned) (restPtr - restStart);
  1842. if (restPtr != restEnd) {
  1843. /* not all remaining bytes are 0 */
  1844. size_t const restSize = (size_t)(restEnd - restPtr);
  1845. if (LONG_SEEK(file, storedSkips, SEEK_CUR) != 0)
  1846. EXM_THROW(92, "Sparse skip error ; try --no-sparse");
  1847. if (fwrite(restPtr, 1, restSize, file) != restSize)
  1848. EXM_THROW(95, "Write error : cannot write end of decoded block : %s",
  1849. strerror(errno));
  1850. storedSkips = 0;
  1851. } } }
  1852. return storedSkips;
  1853. }
  1854. static void
  1855. FIO_fwriteSparseEnd(const FIO_prefs_t* const prefs, FILE* file, unsigned storedSkips)
  1856. {
  1857. if (prefs->testMode) assert(storedSkips == 0);
  1858. if (storedSkips>0) {
  1859. assert(prefs->sparseFileSupport > 0); /* storedSkips>0 implies sparse support is enabled */
  1860. (void)prefs; /* assert can be disabled, in which case prefs becomes unused */
  1861. if (LONG_SEEK(file, storedSkips-1, SEEK_CUR) != 0)
  1862. EXM_THROW(69, "Final skip error (sparse file support)");
  1863. /* last zero must be explicitly written,
  1864. * so that skipped ones get implicitly translated as zero by FS */
  1865. { const char lastZeroByte[1] = { 0 };
  1866. if (fwrite(lastZeroByte, 1, 1, file) != 1)
  1867. EXM_THROW(69, "Write error : cannot write last zero : %s", strerror(errno));
  1868. } }
  1869. }
  1870. /** FIO_passThrough() : just copy input into output, for compatibility with gzip -df mode
  1871. @return : 0 (no error) */
  1872. static int FIO_passThrough(const FIO_prefs_t* const prefs,
  1873. FILE* foutput, FILE* finput,
  1874. void* buffer, size_t bufferSize,
  1875. size_t alreadyLoaded)
  1876. {
  1877. size_t const blockSize = MIN(64 KB, bufferSize);
  1878. size_t readFromInput;
  1879. unsigned storedSkips = 0;
  1880. /* assumption : ress->srcBufferLoaded bytes already loaded and stored within buffer */
  1881. { size_t const sizeCheck = fwrite(buffer, 1, alreadyLoaded, foutput);
  1882. if (sizeCheck != alreadyLoaded) {
  1883. DISPLAYLEVEL(1, "Pass-through write error : %s\n", strerror(errno));
  1884. return 1;
  1885. } }
  1886. do {
  1887. readFromInput = fread(buffer, 1, blockSize, finput);
  1888. storedSkips = FIO_fwriteSparse(foutput, buffer, readFromInput, prefs, storedSkips);
  1889. } while (readFromInput == blockSize);
  1890. if (ferror(finput)) {
  1891. DISPLAYLEVEL(1, "Pass-through read error : %s\n", strerror(errno));
  1892. return 1;
  1893. }
  1894. assert(feof(finput));
  1895. FIO_fwriteSparseEnd(prefs, foutput, storedSkips);
  1896. return 0;
  1897. }
  1898. /* FIO_zstdErrorHelp() :
  1899. * detailed error message when requested window size is too large */
  1900. static void
  1901. FIO_zstdErrorHelp(const FIO_prefs_t* const prefs,
  1902. const dRess_t* ress,
  1903. size_t err, const char* srcFileName)
  1904. {
  1905. ZSTD_frameHeader header;
  1906. /* Help message only for one specific error */
  1907. if (ZSTD_getErrorCode(err) != ZSTD_error_frameParameter_windowTooLarge)
  1908. return;
  1909. /* Try to decode the frame header */
  1910. err = ZSTD_getFrameHeader(&header, ress->srcBuffer, ress->srcBufferLoaded);
  1911. if (err == 0) {
  1912. unsigned long long const windowSize = header.windowSize;
  1913. unsigned const windowLog = FIO_highbit64(windowSize) + ((windowSize & (windowSize - 1)) != 0);
  1914. assert(prefs->memLimit > 0);
  1915. DISPLAYLEVEL(1, "%s : Window size larger than maximum : %llu > %u \n",
  1916. srcFileName, windowSize, prefs->memLimit);
  1917. if (windowLog <= ZSTD_WINDOWLOG_MAX) {
  1918. unsigned const windowMB = (unsigned)((windowSize >> 20) + ((windowSize & ((1 MB) - 1)) != 0));
  1919. assert(windowSize < (U64)(1ULL << 52)); /* ensure now overflow for windowMB */
  1920. DISPLAYLEVEL(1, "%s : Use --long=%u or --memory=%uMB \n",
  1921. srcFileName, windowLog, windowMB);
  1922. return;
  1923. } }
  1924. DISPLAYLEVEL(1, "%s : Window log larger than ZSTD_WINDOWLOG_MAX=%u; not supported \n",
  1925. srcFileName, ZSTD_WINDOWLOG_MAX);
  1926. }
  1927. /** FIO_decompressFrame() :
  1928. * @return : size of decoded zstd frame, or an error code
  1929. */
  1930. #define FIO_ERROR_FRAME_DECODING ((unsigned long long)(-2))
  1931. static unsigned long long
  1932. FIO_decompressZstdFrame(FIO_ctx_t* const fCtx, dRess_t* ress, FILE* finput,
  1933. const FIO_prefs_t* const prefs,
  1934. const char* srcFileName,
  1935. U64 alreadyDecoded) /* for multi-frames streams */
  1936. {
  1937. U64 frameSize = 0;
  1938. U32 storedSkips = 0;
  1939. /* display last 20 characters only */
  1940. { size_t const srcFileLength = strlen(srcFileName);
  1941. if (srcFileLength>20) srcFileName += srcFileLength-20;
  1942. }
  1943. ZSTD_DCtx_reset(ress->dctx, ZSTD_reset_session_only);
  1944. /* Header loading : ensures ZSTD_getFrameHeader() will succeed */
  1945. { size_t const toDecode = ZSTD_FRAMEHEADERSIZE_MAX;
  1946. if (ress->srcBufferLoaded < toDecode) {
  1947. size_t const toRead = toDecode - ress->srcBufferLoaded;
  1948. void* const startPosition = (char*)ress->srcBuffer + ress->srcBufferLoaded;
  1949. ress->srcBufferLoaded += fread(startPosition, 1, toRead, finput);
  1950. } }
  1951. /* Main decompression Loop */
  1952. while (1) {
  1953. ZSTD_inBuffer inBuff = { ress->srcBuffer, ress->srcBufferLoaded, 0 };
  1954. ZSTD_outBuffer outBuff= { ress->dstBuffer, ress->dstBufferSize, 0 };
  1955. size_t const readSizeHint = ZSTD_decompressStream(ress->dctx, &outBuff, &inBuff);
  1956. const int displayLevel = (g_display_prefs.progressSetting == FIO_ps_always) ? 1 : 2;
  1957. UTIL_HumanReadableSize_t const hrs = UTIL_makeHumanReadableSize(alreadyDecoded+frameSize);
  1958. if (ZSTD_isError(readSizeHint)) {
  1959. DISPLAYLEVEL(1, "%s : Decoding error (36) : %s \n",
  1960. srcFileName, ZSTD_getErrorName(readSizeHint));
  1961. FIO_zstdErrorHelp(prefs, ress, readSizeHint, srcFileName);
  1962. return FIO_ERROR_FRAME_DECODING;
  1963. }
  1964. /* Write block */
  1965. storedSkips = FIO_fwriteSparse(ress->dstFile, ress->dstBuffer, outBuff.pos, prefs, storedSkips);
  1966. frameSize += outBuff.pos;
  1967. if (fCtx->nbFilesTotal > 1) {
  1968. size_t srcFileNameSize = strlen(srcFileName);
  1969. if (srcFileNameSize > 18) {
  1970. const char* truncatedSrcFileName = srcFileName + srcFileNameSize - 15;
  1971. DISPLAYUPDATE(displayLevel, "\rDecompress: %2u/%2u files. Current: ...%s : %.*f%s... ",
  1972. fCtx->currFileIdx+1, fCtx->nbFilesTotal, truncatedSrcFileName, hrs.precision, hrs.value, hrs.suffix);
  1973. } else {
  1974. DISPLAYUPDATE(displayLevel, "\rDecompress: %2u/%2u files. Current: %s : %.*f%s... ",
  1975. fCtx->currFileIdx+1, fCtx->nbFilesTotal, srcFileName, hrs.precision, hrs.value, hrs.suffix);
  1976. }
  1977. } else {
  1978. DISPLAYUPDATE(displayLevel, "\r%-20.20s : %.*f%s... ",
  1979. srcFileName, hrs.precision, hrs.value, hrs.suffix);
  1980. }
  1981. if (inBuff.pos > 0) {
  1982. memmove(ress->srcBuffer, (char*)ress->srcBuffer + inBuff.pos, inBuff.size - inBuff.pos);
  1983. ress->srcBufferLoaded -= inBuff.pos;
  1984. }
  1985. if (readSizeHint == 0) break; /* end of frame */
  1986. /* Fill input buffer */
  1987. { size_t const toDecode = MIN(readSizeHint, ress->srcBufferSize); /* support large skippable frames */
  1988. if (ress->srcBufferLoaded < toDecode) {
  1989. size_t const toRead = toDecode - ress->srcBufferLoaded; /* > 0 */
  1990. void* const startPosition = (char*)ress->srcBuffer + ress->srcBufferLoaded;
  1991. size_t const readSize = fread(startPosition, 1, toRead, finput);
  1992. if (readSize==0) {
  1993. DISPLAYLEVEL(1, "%s : Read error (39) : premature end \n",
  1994. srcFileName);
  1995. return FIO_ERROR_FRAME_DECODING;
  1996. }
  1997. ress->srcBufferLoaded += readSize;
  1998. } } }
  1999. FIO_fwriteSparseEnd(prefs, ress->dstFile, storedSkips);
  2000. return frameSize;
  2001. }
  2002. #ifdef ZSTD_GZDECOMPRESS
  2003. static unsigned long long
  2004. FIO_decompressGzFrame(dRess_t* ress, FILE* srcFile,
  2005. const FIO_prefs_t* const prefs,
  2006. const char* srcFileName)
  2007. {
  2008. unsigned long long outFileSize = 0;
  2009. z_stream strm;
  2010. int flush = Z_NO_FLUSH;
  2011. int decodingError = 0;
  2012. unsigned storedSkips = 0;
  2013. strm.zalloc = Z_NULL;
  2014. strm.zfree = Z_NULL;
  2015. strm.opaque = Z_NULL;
  2016. strm.next_in = 0;
  2017. strm.avail_in = 0;
  2018. /* see http://www.zlib.net/manual.html */
  2019. if (inflateInit2(&strm, 15 /* maxWindowLogSize */ + 16 /* gzip only */) != Z_OK)
  2020. return FIO_ERROR_FRAME_DECODING;
  2021. strm.next_out = (Bytef*)ress->dstBuffer;
  2022. strm.avail_out = (uInt)ress->dstBufferSize;
  2023. strm.avail_in = (uInt)ress->srcBufferLoaded;
  2024. strm.next_in = (z_const unsigned char*)ress->srcBuffer;
  2025. for ( ; ; ) {
  2026. int ret;
  2027. if (strm.avail_in == 0) {
  2028. ress->srcBufferLoaded = fread(ress->srcBuffer, 1, ress->srcBufferSize, srcFile);
  2029. if (ress->srcBufferLoaded == 0) flush = Z_FINISH;
  2030. strm.next_in = (z_const unsigned char*)ress->srcBuffer;
  2031. strm.avail_in = (uInt)ress->srcBufferLoaded;
  2032. }
  2033. ret = inflate(&strm, flush);
  2034. if (ret == Z_BUF_ERROR) {
  2035. DISPLAYLEVEL(1, "zstd: %s: premature gz end \n", srcFileName);
  2036. decodingError = 1; break;
  2037. }
  2038. if (ret != Z_OK && ret != Z_STREAM_END) {
  2039. DISPLAYLEVEL(1, "zstd: %s: inflate error %d \n", srcFileName, ret);
  2040. decodingError = 1; break;
  2041. }
  2042. { size_t const decompBytes = ress->dstBufferSize - strm.avail_out;
  2043. if (decompBytes) {
  2044. storedSkips = FIO_fwriteSparse(ress->dstFile, ress->dstBuffer, decompBytes, prefs, storedSkips);
  2045. outFileSize += decompBytes;
  2046. strm.next_out = (Bytef*)ress->dstBuffer;
  2047. strm.avail_out = (uInt)ress->dstBufferSize;
  2048. }
  2049. }
  2050. if (ret == Z_STREAM_END) break;
  2051. }
  2052. if (strm.avail_in > 0)
  2053. memmove(ress->srcBuffer, strm.next_in, strm.avail_in);
  2054. ress->srcBufferLoaded = strm.avail_in;
  2055. if ( (inflateEnd(&strm) != Z_OK) /* release resources ; error detected */
  2056. && (decodingError==0) ) {
  2057. DISPLAYLEVEL(1, "zstd: %s: inflateEnd error \n", srcFileName);
  2058. decodingError = 1;
  2059. }
  2060. FIO_fwriteSparseEnd(prefs, ress->dstFile, storedSkips);
  2061. return decodingError ? FIO_ERROR_FRAME_DECODING : outFileSize;
  2062. }
  2063. #endif
  2064. #ifdef ZSTD_LZMADECOMPRESS
  2065. static unsigned long long
  2066. FIO_decompressLzmaFrame(dRess_t* ress, FILE* srcFile,
  2067. const FIO_prefs_t* const prefs,
  2068. const char* srcFileName, int plain_lzma)
  2069. {
  2070. unsigned long long outFileSize = 0;
  2071. lzma_stream strm = LZMA_STREAM_INIT;
  2072. lzma_action action = LZMA_RUN;
  2073. lzma_ret initRet;
  2074. int decodingError = 0;
  2075. unsigned storedSkips = 0;
  2076. strm.next_in = 0;
  2077. strm.avail_in = 0;
  2078. if (plain_lzma) {
  2079. initRet = lzma_alone_decoder(&strm, UINT64_MAX); /* LZMA */
  2080. } else {
  2081. initRet = lzma_stream_decoder(&strm, UINT64_MAX, 0); /* XZ */
  2082. }
  2083. if (initRet != LZMA_OK) {
  2084. DISPLAYLEVEL(1, "zstd: %s: %s error %d \n",
  2085. plain_lzma ? "lzma_alone_decoder" : "lzma_stream_decoder",
  2086. srcFileName, initRet);
  2087. return FIO_ERROR_FRAME_DECODING;
  2088. }
  2089. strm.next_out = (BYTE*)ress->dstBuffer;
  2090. strm.avail_out = ress->dstBufferSize;
  2091. strm.next_in = (BYTE const*)ress->srcBuffer;
  2092. strm.avail_in = ress->srcBufferLoaded;
  2093. for ( ; ; ) {
  2094. lzma_ret ret;
  2095. if (strm.avail_in == 0) {
  2096. ress->srcBufferLoaded = fread(ress->srcBuffer, 1, ress->srcBufferSize, srcFile);
  2097. if (ress->srcBufferLoaded == 0) action = LZMA_FINISH;
  2098. strm.next_in = (BYTE const*)ress->srcBuffer;
  2099. strm.avail_in = ress->srcBufferLoaded;
  2100. }
  2101. ret = lzma_code(&strm, action);
  2102. if (ret == LZMA_BUF_ERROR) {
  2103. DISPLAYLEVEL(1, "zstd: %s: premature lzma end \n", srcFileName);
  2104. decodingError = 1; break;
  2105. }
  2106. if (ret != LZMA_OK && ret != LZMA_STREAM_END) {
  2107. DISPLAYLEVEL(1, "zstd: %s: lzma_code decoding error %d \n",
  2108. srcFileName, ret);
  2109. decodingError = 1; break;
  2110. }
  2111. { size_t const decompBytes = ress->dstBufferSize - strm.avail_out;
  2112. if (decompBytes) {
  2113. storedSkips = FIO_fwriteSparse(ress->dstFile, ress->dstBuffer, decompBytes, prefs, storedSkips);
  2114. outFileSize += decompBytes;
  2115. strm.next_out = (BYTE*)ress->dstBuffer;
  2116. strm.avail_out = ress->dstBufferSize;
  2117. } }
  2118. if (ret == LZMA_STREAM_END) break;
  2119. }
  2120. if (strm.avail_in > 0)
  2121. memmove(ress->srcBuffer, strm.next_in, strm.avail_in);
  2122. ress->srcBufferLoaded = strm.avail_in;
  2123. lzma_end(&strm);
  2124. FIO_fwriteSparseEnd(prefs, ress->dstFile, storedSkips);
  2125. return decodingError ? FIO_ERROR_FRAME_DECODING : outFileSize;
  2126. }
  2127. #endif
  2128. #ifdef ZSTD_LZ4DECOMPRESS
  2129. static unsigned long long
  2130. FIO_decompressLz4Frame(dRess_t* ress, FILE* srcFile,
  2131. const FIO_prefs_t* const prefs,
  2132. const char* srcFileName)
  2133. {
  2134. unsigned long long filesize = 0;
  2135. LZ4F_errorCode_t nextToLoad;
  2136. LZ4F_decompressionContext_t dCtx;
  2137. LZ4F_errorCode_t const errorCode = LZ4F_createDecompressionContext(&dCtx, LZ4F_VERSION);
  2138. int decodingError = 0;
  2139. unsigned storedSkips = 0;
  2140. if (LZ4F_isError(errorCode)) {
  2141. DISPLAYLEVEL(1, "zstd: failed to create lz4 decompression context \n");
  2142. return FIO_ERROR_FRAME_DECODING;
  2143. }
  2144. /* Init feed with magic number (already consumed from FILE* sFile) */
  2145. { size_t inSize = 4;
  2146. size_t outSize= 0;
  2147. MEM_writeLE32(ress->srcBuffer, LZ4_MAGICNUMBER);
  2148. nextToLoad = LZ4F_decompress(dCtx, ress->dstBuffer, &outSize, ress->srcBuffer, &inSize, NULL);
  2149. if (LZ4F_isError(nextToLoad)) {
  2150. DISPLAYLEVEL(1, "zstd: %s: lz4 header error : %s \n",
  2151. srcFileName, LZ4F_getErrorName(nextToLoad));
  2152. LZ4F_freeDecompressionContext(dCtx);
  2153. return FIO_ERROR_FRAME_DECODING;
  2154. } }
  2155. /* Main Loop */
  2156. for (;nextToLoad;) {
  2157. size_t readSize;
  2158. size_t pos = 0;
  2159. size_t decodedBytes = ress->dstBufferSize;
  2160. /* Read input */
  2161. if (nextToLoad > ress->srcBufferSize) nextToLoad = ress->srcBufferSize;
  2162. readSize = fread(ress->srcBuffer, 1, nextToLoad, srcFile);
  2163. if (!readSize) break; /* reached end of file or stream */
  2164. while ((pos < readSize) || (decodedBytes == ress->dstBufferSize)) { /* still to read, or still to flush */
  2165. /* Decode Input (at least partially) */
  2166. size_t remaining = readSize - pos;
  2167. decodedBytes = ress->dstBufferSize;
  2168. nextToLoad = LZ4F_decompress(dCtx, ress->dstBuffer, &decodedBytes, (char*)(ress->srcBuffer)+pos, &remaining, NULL);
  2169. if (LZ4F_isError(nextToLoad)) {
  2170. DISPLAYLEVEL(1, "zstd: %s: lz4 decompression error : %s \n",
  2171. srcFileName, LZ4F_getErrorName(nextToLoad));
  2172. decodingError = 1; nextToLoad = 0; break;
  2173. }
  2174. pos += remaining;
  2175. /* Write Block */
  2176. if (decodedBytes) {
  2177. UTIL_HumanReadableSize_t hrs;
  2178. storedSkips = FIO_fwriteSparse(ress->dstFile, ress->dstBuffer, decodedBytes, prefs, storedSkips);
  2179. filesize += decodedBytes;
  2180. hrs = UTIL_makeHumanReadableSize(filesize);
  2181. DISPLAYUPDATE(2, "\rDecompressed : %.*f%s ", hrs.precision, hrs.value, hrs.suffix);
  2182. }
  2183. if (!nextToLoad) break;
  2184. }
  2185. }
  2186. /* can be out because readSize == 0, which could be an fread() error */
  2187. if (ferror(srcFile)) {
  2188. DISPLAYLEVEL(1, "zstd: %s: read error \n", srcFileName);
  2189. decodingError=1;
  2190. }
  2191. if (nextToLoad!=0) {
  2192. DISPLAYLEVEL(1, "zstd: %s: unfinished lz4 stream \n", srcFileName);
  2193. decodingError=1;
  2194. }
  2195. LZ4F_freeDecompressionContext(dCtx);
  2196. ress->srcBufferLoaded = 0; /* LZ4F will reach exact frame boundary */
  2197. FIO_fwriteSparseEnd(prefs, ress->dstFile, storedSkips);
  2198. return decodingError ? FIO_ERROR_FRAME_DECODING : filesize;
  2199. }
  2200. #endif
  2201. /** FIO_decompressFrames() :
  2202. * Find and decode frames inside srcFile
  2203. * srcFile presumed opened and valid
  2204. * @return : 0 : OK
  2205. * 1 : error
  2206. */
  2207. static int FIO_decompressFrames(FIO_ctx_t* const fCtx,
  2208. dRess_t ress, FILE* srcFile,
  2209. const FIO_prefs_t* const prefs,
  2210. const char* dstFileName, const char* srcFileName)
  2211. {
  2212. unsigned readSomething = 0;
  2213. unsigned long long filesize = 0;
  2214. assert(srcFile != NULL);
  2215. /* for each frame */
  2216. for ( ; ; ) {
  2217. /* check magic number -> version */
  2218. size_t const toRead = 4;
  2219. const BYTE* const buf = (const BYTE*)ress.srcBuffer;
  2220. if (ress.srcBufferLoaded < toRead) /* load up to 4 bytes for header */
  2221. ress.srcBufferLoaded += fread((char*)ress.srcBuffer + ress.srcBufferLoaded,
  2222. (size_t)1, toRead - ress.srcBufferLoaded, srcFile);
  2223. if (ress.srcBufferLoaded==0) {
  2224. if (readSomething==0) { /* srcFile is empty (which is invalid) */
  2225. DISPLAYLEVEL(1, "zstd: %s: unexpected end of file \n", srcFileName);
  2226. return 1;
  2227. } /* else, just reached frame boundary */
  2228. break; /* no more input */
  2229. }
  2230. readSomething = 1; /* there is at least 1 byte in srcFile */
  2231. if (ress.srcBufferLoaded < toRead) {
  2232. DISPLAYLEVEL(1, "zstd: %s: unknown header \n", srcFileName);
  2233. return 1;
  2234. }
  2235. if (ZSTD_isFrame(buf, ress.srcBufferLoaded)) {
  2236. unsigned long long const frameSize = FIO_decompressZstdFrame(fCtx, &ress, srcFile, prefs, srcFileName, filesize);
  2237. if (frameSize == FIO_ERROR_FRAME_DECODING) return 1;
  2238. filesize += frameSize;
  2239. } else if (buf[0] == 31 && buf[1] == 139) { /* gz magic number */
  2240. #ifdef ZSTD_GZDECOMPRESS
  2241. unsigned long long const frameSize = FIO_decompressGzFrame(&ress, srcFile, prefs, srcFileName);
  2242. if (frameSize == FIO_ERROR_FRAME_DECODING) return 1;
  2243. filesize += frameSize;
  2244. #else
  2245. DISPLAYLEVEL(1, "zstd: %s: gzip file cannot be uncompressed (zstd compiled without HAVE_ZLIB) -- ignored \n", srcFileName);
  2246. return 1;
  2247. #endif
  2248. } else if ((buf[0] == 0xFD && buf[1] == 0x37) /* xz magic number */
  2249. || (buf[0] == 0x5D && buf[1] == 0x00)) { /* lzma header (no magic number) */
  2250. #ifdef ZSTD_LZMADECOMPRESS
  2251. unsigned long long const frameSize = FIO_decompressLzmaFrame(&ress, srcFile, prefs, srcFileName, buf[0] != 0xFD);
  2252. if (frameSize == FIO_ERROR_FRAME_DECODING) return 1;
  2253. filesize += frameSize;
  2254. #else
  2255. DISPLAYLEVEL(1, "zstd: %s: xz/lzma file cannot be uncompressed (zstd compiled without HAVE_LZMA) -- ignored \n", srcFileName);
  2256. return 1;
  2257. #endif
  2258. } else if (MEM_readLE32(buf) == LZ4_MAGICNUMBER) {
  2259. #ifdef ZSTD_LZ4DECOMPRESS
  2260. unsigned long long const frameSize = FIO_decompressLz4Frame(&ress, srcFile, prefs, srcFileName);
  2261. if (frameSize == FIO_ERROR_FRAME_DECODING) return 1;
  2262. filesize += frameSize;
  2263. #else
  2264. DISPLAYLEVEL(1, "zstd: %s: lz4 file cannot be uncompressed (zstd compiled without HAVE_LZ4) -- ignored \n", srcFileName);
  2265. return 1;
  2266. #endif
  2267. } else if ((prefs->overwrite) && !strcmp (dstFileName, stdoutmark)) { /* pass-through mode */
  2268. return FIO_passThrough(prefs,
  2269. ress.dstFile, srcFile,
  2270. ress.srcBuffer, ress.srcBufferSize,
  2271. ress.srcBufferLoaded);
  2272. } else {
  2273. DISPLAYLEVEL(1, "zstd: %s: unsupported format \n", srcFileName);
  2274. return 1;
  2275. } } /* for each frame */
  2276. /* Final Status */
  2277. fCtx->totalBytesOutput += (size_t)filesize;
  2278. DISPLAYLEVEL(2, "\r%79s\r", "");
  2279. /* No status message in pipe mode (stdin - stdout) or multi-files mode */
  2280. if ((g_display_prefs.displayLevel >= 2 && fCtx->nbFilesTotal <= 1) ||
  2281. g_display_prefs.displayLevel >= 3 ||
  2282. g_display_prefs.progressSetting == FIO_ps_always) {
  2283. DISPLAYLEVEL(1, "\r%-20s: %llu bytes \n", srcFileName, filesize);
  2284. }
  2285. return 0;
  2286. }
  2287. /** FIO_decompressDstFile() :
  2288. open `dstFileName`,
  2289. or path-through if ress.dstFile is already != 0,
  2290. then start decompression process (FIO_decompressFrames()).
  2291. @return : 0 : OK
  2292. 1 : operation aborted
  2293. */
  2294. static int FIO_decompressDstFile(FIO_ctx_t* const fCtx,
  2295. FIO_prefs_t* const prefs,
  2296. dRess_t ress, FILE* srcFile,
  2297. const char* dstFileName, const char* srcFileName)
  2298. {
  2299. int result;
  2300. stat_t statbuf;
  2301. int releaseDstFile = 0;
  2302. int transferMTime = 0;
  2303. if ((ress.dstFile == NULL) && (prefs->testMode==0)) {
  2304. int dstFilePermissions = DEFAULT_FILE_PERMISSIONS;
  2305. if ( strcmp(srcFileName, stdinmark) /* special case : don't transfer permissions from stdin */
  2306. && strcmp(dstFileName, stdoutmark)
  2307. && UTIL_stat(srcFileName, &statbuf)
  2308. && UTIL_isRegularFileStat(&statbuf) ) {
  2309. dstFilePermissions = statbuf.st_mode;
  2310. transferMTime = 1;
  2311. }
  2312. releaseDstFile = 1;
  2313. ress.dstFile = FIO_openDstFile(fCtx, prefs, srcFileName, dstFileName, dstFilePermissions);
  2314. if (ress.dstFile==NULL) return 1;
  2315. /* Must only be added after FIO_openDstFile() succeeds.
  2316. * Otherwise we may delete the destination file if it already exists,
  2317. * and the user presses Ctrl-C when asked if they wish to overwrite.
  2318. */
  2319. addHandler(dstFileName);
  2320. }
  2321. result = FIO_decompressFrames(fCtx, ress, srcFile, prefs, dstFileName, srcFileName);
  2322. if (releaseDstFile) {
  2323. FILE* const dstFile = ress.dstFile;
  2324. clearHandler();
  2325. ress.dstFile = NULL;
  2326. if (fclose(dstFile)) {
  2327. DISPLAYLEVEL(1, "zstd: %s: %s \n", dstFileName, strerror(errno));
  2328. result = 1;
  2329. }
  2330. if (transferMTime) {
  2331. UTIL_utime(dstFileName, &statbuf);
  2332. }
  2333. if ( (result != 0) /* operation failure */
  2334. && strcmp(dstFileName, stdoutmark) /* special case : don't remove() stdout */
  2335. ) {
  2336. FIO_removeFile(dstFileName); /* remove decompression artefact; note: don't do anything special if remove() fails */
  2337. }
  2338. }
  2339. return result;
  2340. }
  2341. /** FIO_decompressSrcFile() :
  2342. Open `srcFileName`, transfer control to decompressDstFile()
  2343. @return : 0 : OK
  2344. 1 : error
  2345. */
  2346. static int FIO_decompressSrcFile(FIO_ctx_t* const fCtx, FIO_prefs_t* const prefs, dRess_t ress, const char* dstFileName, const char* srcFileName)
  2347. {
  2348. FILE* srcFile;
  2349. int result;
  2350. if (UTIL_isDirectory(srcFileName)) {
  2351. DISPLAYLEVEL(1, "zstd: %s is a directory -- ignored \n", srcFileName);
  2352. return 1;
  2353. }
  2354. srcFile = FIO_openSrcFile(prefs, srcFileName);
  2355. if (srcFile==NULL) return 1;
  2356. ress.srcBufferLoaded = 0;
  2357. result = FIO_decompressDstFile(fCtx, prefs, ress, srcFile, dstFileName, srcFileName);
  2358. /* Close file */
  2359. if (fclose(srcFile)) {
  2360. DISPLAYLEVEL(1, "zstd: %s: %s \n", srcFileName, strerror(errno)); /* error should not happen */
  2361. return 1;
  2362. }
  2363. if ( prefs->removeSrcFile /* --rm */
  2364. && (result==0) /* decompression successful */
  2365. && strcmp(srcFileName, stdinmark) ) /* not stdin */ {
  2366. /* We must clear the handler, since after this point calling it would
  2367. * delete both the source and destination files.
  2368. */
  2369. clearHandler();
  2370. if (FIO_removeFile(srcFileName)) {
  2371. /* failed to remove src file */
  2372. DISPLAYLEVEL(1, "zstd: %s: %s \n", srcFileName, strerror(errno));
  2373. return 1;
  2374. } }
  2375. return result;
  2376. }
  2377. int FIO_decompressFilename(FIO_ctx_t* const fCtx, FIO_prefs_t* const prefs,
  2378. const char* dstFileName, const char* srcFileName,
  2379. const char* dictFileName)
  2380. {
  2381. dRess_t const ress = FIO_createDResources(prefs, dictFileName);
  2382. int const decodingError = FIO_decompressSrcFile(fCtx, prefs, ress, dstFileName, srcFileName);
  2383. FIO_freeDResources(ress);
  2384. return decodingError;
  2385. }
  2386. static const char *suffixList[] = {
  2387. ZSTD_EXTENSION,
  2388. TZSTD_EXTENSION,
  2389. #ifndef ZSTD_NODECOMPRESS
  2390. ZSTD_ALT_EXTENSION,
  2391. #endif
  2392. #ifdef ZSTD_GZDECOMPRESS
  2393. GZ_EXTENSION,
  2394. TGZ_EXTENSION,
  2395. #endif
  2396. #ifdef ZSTD_LZMADECOMPRESS
  2397. LZMA_EXTENSION,
  2398. XZ_EXTENSION,
  2399. TXZ_EXTENSION,
  2400. #endif
  2401. #ifdef ZSTD_LZ4DECOMPRESS
  2402. LZ4_EXTENSION,
  2403. TLZ4_EXTENSION,
  2404. #endif
  2405. NULL
  2406. };
  2407. static const char *suffixListStr =
  2408. ZSTD_EXTENSION "/" TZSTD_EXTENSION
  2409. #ifdef ZSTD_GZDECOMPRESS
  2410. "/" GZ_EXTENSION "/" TGZ_EXTENSION
  2411. #endif
  2412. #ifdef ZSTD_LZMADECOMPRESS
  2413. "/" LZMA_EXTENSION "/" XZ_EXTENSION "/" TXZ_EXTENSION
  2414. #endif
  2415. #ifdef ZSTD_LZ4DECOMPRESS
  2416. "/" LZ4_EXTENSION "/" TLZ4_EXTENSION
  2417. #endif
  2418. ;
  2419. /* FIO_determineDstName() :
  2420. * create a destination filename from a srcFileName.
  2421. * @return a pointer to it.
  2422. * @return == NULL if there is an error */
  2423. static const char*
  2424. FIO_determineDstName(const char* srcFileName, const char* outDirName)
  2425. {
  2426. static size_t dfnbCapacity = 0;
  2427. static char* dstFileNameBuffer = NULL; /* using static allocation : this function cannot be multi-threaded */
  2428. size_t dstFileNameEndPos;
  2429. char* outDirFilename = NULL;
  2430. const char* dstSuffix = "";
  2431. size_t dstSuffixLen = 0;
  2432. size_t sfnSize = strlen(srcFileName);
  2433. size_t srcSuffixLen;
  2434. const char* const srcSuffix = strrchr(srcFileName, '.');
  2435. if (srcSuffix == NULL) {
  2436. DISPLAYLEVEL(1,
  2437. "zstd: %s: unknown suffix (%s expected). "
  2438. "Can't derive the output file name. "
  2439. "Specify it with -o dstFileName. Ignoring.\n",
  2440. srcFileName, suffixListStr);
  2441. return NULL;
  2442. }
  2443. srcSuffixLen = strlen(srcSuffix);
  2444. {
  2445. const char** matchedSuffixPtr;
  2446. for (matchedSuffixPtr = suffixList; *matchedSuffixPtr != NULL; matchedSuffixPtr++) {
  2447. if (!strcmp(*matchedSuffixPtr, srcSuffix)) {
  2448. break;
  2449. }
  2450. }
  2451. /* check suffix is authorized */
  2452. if (sfnSize <= srcSuffixLen || *matchedSuffixPtr == NULL) {
  2453. DISPLAYLEVEL(1,
  2454. "zstd: %s: unknown suffix (%s expected). "
  2455. "Can't derive the output file name. "
  2456. "Specify it with -o dstFileName. Ignoring.\n",
  2457. srcFileName, suffixListStr);
  2458. return NULL;
  2459. }
  2460. if ((*matchedSuffixPtr)[1] == 't') {
  2461. dstSuffix = ".tar";
  2462. dstSuffixLen = strlen(dstSuffix);
  2463. }
  2464. }
  2465. if (outDirName) {
  2466. outDirFilename = FIO_createFilename_fromOutDir(srcFileName, outDirName, 0);
  2467. sfnSize = strlen(outDirFilename);
  2468. assert(outDirFilename != NULL);
  2469. }
  2470. if (dfnbCapacity+srcSuffixLen <= sfnSize+1+dstSuffixLen) {
  2471. /* allocate enough space to write dstFilename into it */
  2472. free(dstFileNameBuffer);
  2473. dfnbCapacity = sfnSize + 20;
  2474. dstFileNameBuffer = (char*)malloc(dfnbCapacity);
  2475. if (dstFileNameBuffer==NULL)
  2476. EXM_THROW(74, "%s : not enough memory for dstFileName",
  2477. strerror(errno));
  2478. }
  2479. /* return dst name == src name truncated from suffix */
  2480. assert(dstFileNameBuffer != NULL);
  2481. dstFileNameEndPos = sfnSize - srcSuffixLen;
  2482. if (outDirFilename) {
  2483. memcpy(dstFileNameBuffer, outDirFilename, dstFileNameEndPos);
  2484. free(outDirFilename);
  2485. } else {
  2486. memcpy(dstFileNameBuffer, srcFileName, dstFileNameEndPos);
  2487. }
  2488. /* The short tar extensions tzst, tgz, txz and tlz4 files should have "tar"
  2489. * extension on decompression. Also writes terminating null. */
  2490. strcpy(dstFileNameBuffer + dstFileNameEndPos, dstSuffix);
  2491. return dstFileNameBuffer;
  2492. /* note : dstFileNameBuffer memory is not going to be free */
  2493. }
  2494. int
  2495. FIO_decompressMultipleFilenames(FIO_ctx_t* const fCtx,
  2496. FIO_prefs_t* const prefs,
  2497. const char** srcNamesTable,
  2498. const char* outMirroredRootDirName,
  2499. const char* outDirName, const char* outFileName,
  2500. const char* dictFileName)
  2501. {
  2502. int status;
  2503. int error = 0;
  2504. dRess_t ress = FIO_createDResources(prefs, dictFileName);
  2505. if (outFileName) {
  2506. if (FIO_removeMultiFilesWarning(fCtx, prefs, outFileName, 1 /* displayLevelCutoff */)) {
  2507. FIO_freeDResources(ress);
  2508. return 1;
  2509. }
  2510. if (!prefs->testMode) {
  2511. ress.dstFile = FIO_openDstFile(fCtx, prefs, NULL, outFileName, DEFAULT_FILE_PERMISSIONS);
  2512. if (ress.dstFile == 0) EXM_THROW(19, "cannot open %s", outFileName);
  2513. }
  2514. for (; fCtx->currFileIdx < fCtx->nbFilesTotal; fCtx->currFileIdx++) {
  2515. status = FIO_decompressSrcFile(fCtx, prefs, ress, outFileName, srcNamesTable[fCtx->currFileIdx]);
  2516. if (!status) fCtx->nbFilesProcessed++;
  2517. error |= status;
  2518. }
  2519. if ((!prefs->testMode) && (fclose(ress.dstFile)))
  2520. EXM_THROW(72, "Write error : %s : cannot properly close output file",
  2521. strerror(errno));
  2522. } else {
  2523. if (outMirroredRootDirName)
  2524. UTIL_mirrorSourceFilesDirectories(srcNamesTable, (unsigned)fCtx->nbFilesTotal, outMirroredRootDirName);
  2525. for (; fCtx->currFileIdx < fCtx->nbFilesTotal; fCtx->currFileIdx++) { /* create dstFileName */
  2526. const char* const srcFileName = srcNamesTable[fCtx->currFileIdx];
  2527. const char* dstFileName = NULL;
  2528. if (outMirroredRootDirName) {
  2529. char* validMirroredDirName = UTIL_createMirroredDestDirName(srcFileName, outMirroredRootDirName);
  2530. if (validMirroredDirName) {
  2531. dstFileName = FIO_determineDstName(srcFileName, validMirroredDirName);
  2532. free(validMirroredDirName);
  2533. } else {
  2534. DISPLAYLEVEL(2, "zstd: --output-dir-mirror cannot decompress '%s' into '%s'\n", srcFileName, outMirroredRootDirName);
  2535. }
  2536. } else {
  2537. dstFileName = FIO_determineDstName(srcFileName, outDirName);
  2538. }
  2539. if (dstFileName == NULL) { error=1; continue; }
  2540. status = FIO_decompressSrcFile(fCtx, prefs, ress, dstFileName, srcFileName);
  2541. if (!status) fCtx->nbFilesProcessed++;
  2542. error |= status;
  2543. }
  2544. if (outDirName)
  2545. FIO_checkFilenameCollisions(srcNamesTable , (unsigned)fCtx->nbFilesTotal);
  2546. }
  2547. if (fCtx->nbFilesProcessed >= 1 && fCtx->nbFilesTotal > 1 && fCtx->totalBytesOutput != 0)
  2548. DISPLAYLEVEL(2, "%d files decompressed : %6zu bytes total \n", fCtx->nbFilesProcessed, fCtx->totalBytesOutput);
  2549. FIO_freeDResources(ress);
  2550. return error;
  2551. }
  2552. /* **************************************************************************
  2553. * .zst file info (--list command)
  2554. ***************************************************************************/
  2555. typedef struct {
  2556. U64 decompressedSize;
  2557. U64 compressedSize;
  2558. U64 windowSize;
  2559. int numActualFrames;
  2560. int numSkippableFrames;
  2561. int decompUnavailable;
  2562. int usesCheck;
  2563. U32 nbFiles;
  2564. } fileInfo_t;
  2565. typedef enum {
  2566. info_success=0,
  2567. info_frame_error=1,
  2568. info_not_zstd=2,
  2569. info_file_error=3,
  2570. info_truncated_input=4,
  2571. } InfoError;
  2572. #define ERROR_IF(c,n,...) { \
  2573. if (c) { \
  2574. DISPLAYLEVEL(1, __VA_ARGS__); \
  2575. DISPLAYLEVEL(1, " \n"); \
  2576. return n; \
  2577. } \
  2578. }
  2579. static InfoError
  2580. FIO_analyzeFrames(fileInfo_t* info, FILE* const srcFile)
  2581. {
  2582. /* begin analyzing frame */
  2583. for ( ; ; ) {
  2584. BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX];
  2585. size_t const numBytesRead = fread(headerBuffer, 1, sizeof(headerBuffer), srcFile);
  2586. if (numBytesRead < ZSTD_FRAMEHEADERSIZE_MIN(ZSTD_f_zstd1)) {
  2587. if ( feof(srcFile)
  2588. && (numBytesRead == 0)
  2589. && (info->compressedSize > 0)
  2590. && (info->compressedSize != UTIL_FILESIZE_UNKNOWN) ) {
  2591. unsigned long long file_position = (unsigned long long) LONG_TELL(srcFile);
  2592. unsigned long long file_size = (unsigned long long) info->compressedSize;
  2593. ERROR_IF(file_position != file_size, info_truncated_input,
  2594. "Error: seeked to position %llu, which is beyond file size of %llu\n",
  2595. file_position,
  2596. file_size);
  2597. break; /* correct end of file => success */
  2598. }
  2599. ERROR_IF(feof(srcFile), info_not_zstd, "Error: reached end of file with incomplete frame");
  2600. ERROR_IF(1, info_frame_error, "Error: did not reach end of file but ran out of frames");
  2601. }
  2602. { U32 const magicNumber = MEM_readLE32(headerBuffer);
  2603. /* Zstandard frame */
  2604. if (magicNumber == ZSTD_MAGICNUMBER) {
  2605. ZSTD_frameHeader header;
  2606. U64 const frameContentSize = ZSTD_getFrameContentSize(headerBuffer, numBytesRead);
  2607. if ( frameContentSize == ZSTD_CONTENTSIZE_ERROR
  2608. || frameContentSize == ZSTD_CONTENTSIZE_UNKNOWN ) {
  2609. info->decompUnavailable = 1;
  2610. } else {
  2611. info->decompressedSize += frameContentSize;
  2612. }
  2613. ERROR_IF(ZSTD_getFrameHeader(&header, headerBuffer, numBytesRead) != 0,
  2614. info_frame_error, "Error: could not decode frame header");
  2615. info->windowSize = header.windowSize;
  2616. /* move to the end of the frame header */
  2617. { size_t const headerSize = ZSTD_frameHeaderSize(headerBuffer, numBytesRead);
  2618. ERROR_IF(ZSTD_isError(headerSize), info_frame_error, "Error: could not determine frame header size");
  2619. ERROR_IF(fseek(srcFile, ((long)headerSize)-((long)numBytesRead), SEEK_CUR) != 0,
  2620. info_frame_error, "Error: could not move to end of frame header");
  2621. }
  2622. /* skip all blocks in the frame */
  2623. { int lastBlock = 0;
  2624. do {
  2625. BYTE blockHeaderBuffer[3];
  2626. ERROR_IF(fread(blockHeaderBuffer, 1, 3, srcFile) != 3,
  2627. info_frame_error, "Error while reading block header");
  2628. { U32 const blockHeader = MEM_readLE24(blockHeaderBuffer);
  2629. U32 const blockTypeID = (blockHeader >> 1) & 3;
  2630. U32 const isRLE = (blockTypeID == 1);
  2631. U32 const isWrongBlock = (blockTypeID == 3);
  2632. long const blockSize = isRLE ? 1 : (long)(blockHeader >> 3);
  2633. ERROR_IF(isWrongBlock, info_frame_error, "Error: unsupported block type");
  2634. lastBlock = blockHeader & 1;
  2635. ERROR_IF(fseek(srcFile, blockSize, SEEK_CUR) != 0,
  2636. info_frame_error, "Error: could not skip to end of block");
  2637. }
  2638. } while (lastBlock != 1);
  2639. }
  2640. /* check if checksum is used */
  2641. { BYTE const frameHeaderDescriptor = headerBuffer[4];
  2642. int const contentChecksumFlag = (frameHeaderDescriptor & (1 << 2)) >> 2;
  2643. if (contentChecksumFlag) {
  2644. info->usesCheck = 1;
  2645. ERROR_IF(fseek(srcFile, 4, SEEK_CUR) != 0,
  2646. info_frame_error, "Error: could not skip past checksum");
  2647. } }
  2648. info->numActualFrames++;
  2649. }
  2650. /* Skippable frame */
  2651. else if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
  2652. U32 const frameSize = MEM_readLE32(headerBuffer + 4);
  2653. long const seek = (long)(8 + frameSize - numBytesRead);
  2654. ERROR_IF(LONG_SEEK(srcFile, seek, SEEK_CUR) != 0,
  2655. info_frame_error, "Error: could not find end of skippable frame");
  2656. info->numSkippableFrames++;
  2657. }
  2658. /* unknown content */
  2659. else {
  2660. return info_not_zstd;
  2661. }
  2662. } /* magic number analysis */
  2663. } /* end analyzing frames */
  2664. return info_success;
  2665. }
  2666. static InfoError
  2667. getFileInfo_fileConfirmed(fileInfo_t* info, const char* inFileName)
  2668. {
  2669. InfoError status;
  2670. FILE* const srcFile = FIO_openSrcFile(NULL, inFileName);
  2671. ERROR_IF(srcFile == NULL, info_file_error, "Error: could not open source file %s", inFileName);
  2672. info->compressedSize = UTIL_getFileSize(inFileName);
  2673. status = FIO_analyzeFrames(info, srcFile);
  2674. fclose(srcFile);
  2675. info->nbFiles = 1;
  2676. return status;
  2677. }
  2678. /** getFileInfo() :
  2679. * Reads information from file, stores in *info
  2680. * @return : InfoError status
  2681. */
  2682. static InfoError
  2683. getFileInfo(fileInfo_t* info, const char* srcFileName)
  2684. {
  2685. ERROR_IF(!UTIL_isRegularFile(srcFileName),
  2686. info_file_error, "Error : %s is not a file", srcFileName);
  2687. return getFileInfo_fileConfirmed(info, srcFileName);
  2688. }
  2689. static void
  2690. displayInfo(const char* inFileName, const fileInfo_t* info, int displayLevel)
  2691. {
  2692. UTIL_HumanReadableSize_t const window_hrs = UTIL_makeHumanReadableSize(info->windowSize);
  2693. UTIL_HumanReadableSize_t const compressed_hrs = UTIL_makeHumanReadableSize(info->compressedSize);
  2694. UTIL_HumanReadableSize_t const decompressed_hrs = UTIL_makeHumanReadableSize(info->decompressedSize);
  2695. double const ratio = (info->compressedSize == 0) ? 0 : ((double)info->decompressedSize)/(double)info->compressedSize;
  2696. const char* const checkString = (info->usesCheck ? "XXH64" : "None");
  2697. if (displayLevel <= 2) {
  2698. if (!info->decompUnavailable) {
  2699. DISPLAYOUT("%6d %5d %6.*f%4s %8.*f%4s %5.3f %5s %s\n",
  2700. info->numSkippableFrames + info->numActualFrames,
  2701. info->numSkippableFrames,
  2702. compressed_hrs.precision, compressed_hrs.value, compressed_hrs.suffix,
  2703. decompressed_hrs.precision, decompressed_hrs.value, decompressed_hrs.suffix,
  2704. ratio, checkString, inFileName);
  2705. } else {
  2706. DISPLAYOUT("%6d %5d %6.*f%4s %5s %s\n",
  2707. info->numSkippableFrames + info->numActualFrames,
  2708. info->numSkippableFrames,
  2709. compressed_hrs.precision, compressed_hrs.value, compressed_hrs.suffix,
  2710. checkString, inFileName);
  2711. }
  2712. } else {
  2713. DISPLAYOUT("%s \n", inFileName);
  2714. DISPLAYOUT("# Zstandard Frames: %d\n", info->numActualFrames);
  2715. if (info->numSkippableFrames)
  2716. DISPLAYOUT("# Skippable Frames: %d\n", info->numSkippableFrames);
  2717. DISPLAYOUT("Window Size: %.*f%s (%llu B)\n",
  2718. window_hrs.precision, window_hrs.value, window_hrs.suffix,
  2719. (unsigned long long)info->windowSize);
  2720. DISPLAYOUT("Compressed Size: %.*f%s (%llu B)\n",
  2721. compressed_hrs.precision, compressed_hrs.value, compressed_hrs.suffix,
  2722. (unsigned long long)info->compressedSize);
  2723. if (!info->decompUnavailable) {
  2724. DISPLAYOUT("Decompressed Size: %.*f%s (%llu B)\n",
  2725. decompressed_hrs.precision, decompressed_hrs.value, decompressed_hrs.suffix,
  2726. (unsigned long long)info->decompressedSize);
  2727. DISPLAYOUT("Ratio: %.4f\n", ratio);
  2728. }
  2729. DISPLAYOUT("Check: %s\n", checkString);
  2730. DISPLAYOUT("\n");
  2731. }
  2732. }
  2733. static fileInfo_t FIO_addFInfo(fileInfo_t fi1, fileInfo_t fi2)
  2734. {
  2735. fileInfo_t total;
  2736. memset(&total, 0, sizeof(total));
  2737. total.numActualFrames = fi1.numActualFrames + fi2.numActualFrames;
  2738. total.numSkippableFrames = fi1.numSkippableFrames + fi2.numSkippableFrames;
  2739. total.compressedSize = fi1.compressedSize + fi2.compressedSize;
  2740. total.decompressedSize = fi1.decompressedSize + fi2.decompressedSize;
  2741. total.decompUnavailable = fi1.decompUnavailable | fi2.decompUnavailable;
  2742. total.usesCheck = fi1.usesCheck & fi2.usesCheck;
  2743. total.nbFiles = fi1.nbFiles + fi2.nbFiles;
  2744. return total;
  2745. }
  2746. static int
  2747. FIO_listFile(fileInfo_t* total, const char* inFileName, int displayLevel)
  2748. {
  2749. fileInfo_t info;
  2750. memset(&info, 0, sizeof(info));
  2751. { InfoError const error = getFileInfo(&info, inFileName);
  2752. switch (error) {
  2753. case info_frame_error:
  2754. /* display error, but provide output */
  2755. DISPLAYLEVEL(1, "Error while parsing \"%s\" \n", inFileName);
  2756. break;
  2757. case info_not_zstd:
  2758. DISPLAYOUT("File \"%s\" not compressed by zstd \n", inFileName);
  2759. if (displayLevel > 2) DISPLAYOUT("\n");
  2760. return 1;
  2761. case info_file_error:
  2762. /* error occurred while opening the file */
  2763. if (displayLevel > 2) DISPLAYOUT("\n");
  2764. return 1;
  2765. case info_truncated_input:
  2766. DISPLAYOUT("File \"%s\" is truncated \n", inFileName);
  2767. if (displayLevel > 2) DISPLAYOUT("\n");
  2768. return 1;
  2769. case info_success:
  2770. default:
  2771. break;
  2772. }
  2773. displayInfo(inFileName, &info, displayLevel);
  2774. *total = FIO_addFInfo(*total, info);
  2775. assert(error == info_success || error == info_frame_error);
  2776. return (int)error;
  2777. }
  2778. }
  2779. int FIO_listMultipleFiles(unsigned numFiles, const char** filenameTable, int displayLevel)
  2780. {
  2781. /* ensure no specified input is stdin (needs fseek() capability) */
  2782. { unsigned u;
  2783. for (u=0; u<numFiles;u++) {
  2784. ERROR_IF(!strcmp (filenameTable[u], stdinmark),
  2785. 1, "zstd: --list does not support reading from standard input");
  2786. } }
  2787. if (numFiles == 0) {
  2788. if (!IS_CONSOLE(stdin)) {
  2789. DISPLAYLEVEL(1, "zstd: --list does not support reading from standard input \n");
  2790. }
  2791. DISPLAYLEVEL(1, "No files given \n");
  2792. return 1;
  2793. }
  2794. if (displayLevel <= 2) {
  2795. DISPLAYOUT("Frames Skips Compressed Uncompressed Ratio Check Filename\n");
  2796. }
  2797. { int error = 0;
  2798. fileInfo_t total;
  2799. memset(&total, 0, sizeof(total));
  2800. total.usesCheck = 1;
  2801. /* --list each file, and check for any error */
  2802. { unsigned u;
  2803. for (u=0; u<numFiles;u++) {
  2804. error |= FIO_listFile(&total, filenameTable[u], displayLevel);
  2805. } }
  2806. if (numFiles > 1 && displayLevel <= 2) { /* display total */
  2807. UTIL_HumanReadableSize_t const compressed_hrs = UTIL_makeHumanReadableSize(total.compressedSize);
  2808. UTIL_HumanReadableSize_t const decompressed_hrs = UTIL_makeHumanReadableSize(total.decompressedSize);
  2809. double const ratio = (total.compressedSize == 0) ? 0 : ((double)total.decompressedSize)/(double)total.compressedSize;
  2810. const char* const checkString = (total.usesCheck ? "XXH64" : "");
  2811. DISPLAYOUT("----------------------------------------------------------------- \n");
  2812. if (total.decompUnavailable) {
  2813. DISPLAYOUT("%6d %5d %6.*f%4s %5s %u files\n",
  2814. total.numSkippableFrames + total.numActualFrames,
  2815. total.numSkippableFrames,
  2816. compressed_hrs.precision, compressed_hrs.value, compressed_hrs.suffix,
  2817. checkString, (unsigned)total.nbFiles);
  2818. } else {
  2819. DISPLAYOUT("%6d %5d %6.*f%4s %8.*f%4s %5.3f %5s %u files\n",
  2820. total.numSkippableFrames + total.numActualFrames,
  2821. total.numSkippableFrames,
  2822. compressed_hrs.precision, compressed_hrs.value, compressed_hrs.suffix,
  2823. decompressed_hrs.precision, decompressed_hrs.value, decompressed_hrs.suffix,
  2824. ratio, checkString, (unsigned)total.nbFiles);
  2825. } }
  2826. return error;
  2827. }
  2828. }
  2829. #endif /* #ifndef ZSTD_NODECOMPRESS */