blocksort.c 30 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094
  1. /*-------------------------------------------------------------*/
  2. /*--- Block sorting machinery ---*/
  3. /*--- blocksort.c ---*/
  4. /*-------------------------------------------------------------*/
  5. /* ------------------------------------------------------------------
  6. This file is part of bzip2/libbzip2, a program and library for
  7. lossless, block-sorting data compression.
  8. bzip2/libbzip2 version 1.0.8 of 13 July 2019
  9. Copyright (C) 1996-2019 Julian Seward <jseward@acm.org>
  10. Please read the WARNING, DISCLAIMER and PATENTS sections in the
  11. README file.
  12. This program is released under the terms of the license contained
  13. in the file LICENSE.
  14. ------------------------------------------------------------------ */
  15. #include "bzlib_private.h"
  16. /*---------------------------------------------*/
  17. /*--- Fallback O(N log(N)^2) sorting ---*/
  18. /*--- algorithm, for repetitive blocks ---*/
  19. /*---------------------------------------------*/
  20. /*---------------------------------------------*/
  21. static
  22. __inline__
  23. void fallbackSimpleSort ( UInt32* fmap,
  24. UInt32* eclass,
  25. Int32 lo,
  26. Int32 hi )
  27. {
  28. Int32 i, j, tmp;
  29. UInt32 ec_tmp;
  30. if (lo == hi) return;
  31. if (hi - lo > 3) {
  32. for ( i = hi-4; i >= lo; i-- ) {
  33. tmp = fmap[i];
  34. ec_tmp = eclass[tmp];
  35. for ( j = i+4; j <= hi && ec_tmp > eclass[fmap[j]]; j += 4 )
  36. fmap[j-4] = fmap[j];
  37. fmap[j-4] = tmp;
  38. }
  39. }
  40. for ( i = hi-1; i >= lo; i-- ) {
  41. tmp = fmap[i];
  42. ec_tmp = eclass[tmp];
  43. for ( j = i+1; j <= hi && ec_tmp > eclass[fmap[j]]; j++ )
  44. fmap[j-1] = fmap[j];
  45. fmap[j-1] = tmp;
  46. }
  47. }
  48. /*---------------------------------------------*/
  49. #define fswap(zz1, zz2) \
  50. { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; }
  51. #define fvswap(zzp1, zzp2, zzn) \
  52. { \
  53. Int32 yyp1 = (zzp1); \
  54. Int32 yyp2 = (zzp2); \
  55. Int32 yyn = (zzn); \
  56. while (yyn > 0) { \
  57. fswap(fmap[yyp1], fmap[yyp2]); \
  58. yyp1++; yyp2++; yyn--; \
  59. } \
  60. }
  61. #define fmin(a,b) ((a) < (b)) ? (a) : (b)
  62. #define fpush(lz,hz) { stackLo[sp] = lz; \
  63. stackHi[sp] = hz; \
  64. sp++; }
  65. #define fpop(lz,hz) { sp--; \
  66. lz = stackLo[sp]; \
  67. hz = stackHi[sp]; }
  68. #define FALLBACK_QSORT_SMALL_THRESH 10
  69. #define FALLBACK_QSORT_STACK_SIZE 100
  70. static
  71. void fallbackQSort3 ( UInt32* fmap,
  72. UInt32* eclass,
  73. Int32 loSt,
  74. Int32 hiSt )
  75. {
  76. Int32 unLo, unHi, ltLo, gtHi, n, m;
  77. Int32 sp, lo, hi;
  78. UInt32 med, r, r3;
  79. Int32 stackLo[FALLBACK_QSORT_STACK_SIZE];
  80. Int32 stackHi[FALLBACK_QSORT_STACK_SIZE];
  81. r = 0;
  82. sp = 0;
  83. fpush ( loSt, hiSt );
  84. while (sp > 0) {
  85. AssertH ( sp < FALLBACK_QSORT_STACK_SIZE - 1, 1004 );
  86. fpop ( lo, hi );
  87. if (hi - lo < FALLBACK_QSORT_SMALL_THRESH) {
  88. fallbackSimpleSort ( fmap, eclass, lo, hi );
  89. continue;
  90. }
  91. /* Random partitioning. Median of 3 sometimes fails to
  92. avoid bad cases. Median of 9 seems to help but
  93. looks rather expensive. This too seems to work but
  94. is cheaper. Guidance for the magic constants
  95. 7621 and 32768 is taken from Sedgewick's algorithms
  96. book, chapter 35.
  97. */
  98. r = ((r * 7621) + 1) % 32768;
  99. r3 = r % 3;
  100. if (r3 == 0) med = eclass[fmap[lo]]; else
  101. if (r3 == 1) med = eclass[fmap[(lo+hi)>>1]]; else
  102. med = eclass[fmap[hi]];
  103. unLo = ltLo = lo;
  104. unHi = gtHi = hi;
  105. while (1) {
  106. while (1) {
  107. if (unLo > unHi) break;
  108. n = (Int32)eclass[fmap[unLo]] - (Int32)med;
  109. if (n == 0) {
  110. fswap(fmap[unLo], fmap[ltLo]);
  111. ltLo++; unLo++;
  112. continue;
  113. };
  114. if (n > 0) break;
  115. unLo++;
  116. }
  117. while (1) {
  118. if (unLo > unHi) break;
  119. n = (Int32)eclass[fmap[unHi]] - (Int32)med;
  120. if (n == 0) {
  121. fswap(fmap[unHi], fmap[gtHi]);
  122. gtHi--; unHi--;
  123. continue;
  124. };
  125. if (n < 0) break;
  126. unHi--;
  127. }
  128. if (unLo > unHi) break;
  129. fswap(fmap[unLo], fmap[unHi]); unLo++; unHi--;
  130. }
  131. AssertD ( unHi == unLo-1, "fallbackQSort3(2)" );
  132. if (gtHi < ltLo) continue;
  133. n = fmin(ltLo-lo, unLo-ltLo); fvswap(lo, unLo-n, n);
  134. m = fmin(hi-gtHi, gtHi-unHi); fvswap(unLo, hi-m+1, m);
  135. n = lo + unLo - ltLo - 1;
  136. m = hi - (gtHi - unHi) + 1;
  137. if (n - lo > hi - m) {
  138. fpush ( lo, n );
  139. fpush ( m, hi );
  140. } else {
  141. fpush ( m, hi );
  142. fpush ( lo, n );
  143. }
  144. }
  145. }
  146. #undef fmin
  147. #undef fpush
  148. #undef fpop
  149. #undef fswap
  150. #undef fvswap
  151. #undef FALLBACK_QSORT_SMALL_THRESH
  152. #undef FALLBACK_QSORT_STACK_SIZE
  153. /*---------------------------------------------*/
  154. /* Pre:
  155. nblock > 0
  156. eclass exists for [0 .. nblock-1]
  157. ((UChar*)eclass) [0 .. nblock-1] holds block
  158. ptr exists for [0 .. nblock-1]
  159. Post:
  160. ((UChar*)eclass) [0 .. nblock-1] holds block
  161. All other areas of eclass destroyed
  162. fmap [0 .. nblock-1] holds sorted order
  163. bhtab [ 0 .. 2+(nblock/32) ] destroyed
  164. */
  165. #define SET_BH(zz) bhtab[(zz) >> 5] |= ((UInt32)1 << ((zz) & 31))
  166. #define CLEAR_BH(zz) bhtab[(zz) >> 5] &= ~((UInt32)1 << ((zz) & 31))
  167. #define ISSET_BH(zz) (bhtab[(zz) >> 5] & ((UInt32)1 << ((zz) & 31)))
  168. #define WORD_BH(zz) bhtab[(zz) >> 5]
  169. #define UNALIGNED_BH(zz) ((zz) & 0x01f)
  170. static
  171. void fallbackSort ( UInt32* fmap,
  172. UInt32* eclass,
  173. UInt32* bhtab,
  174. Int32 nblock,
  175. Int32 verb )
  176. {
  177. Int32 ftab[257];
  178. Int32 ftabCopy[256];
  179. Int32 H, i, j, k, l, r, cc, cc1;
  180. Int32 nNotDone;
  181. Int32 nBhtab;
  182. UChar* eclass8 = (UChar*)eclass;
  183. /*--
  184. Initial 1-char radix sort to generate
  185. initial fmap and initial BH bits.
  186. --*/
  187. if (verb >= 4)
  188. VPrintf0 ( " bucket sorting ...\n" );
  189. for (i = 0; i < 257; i++) ftab[i] = 0;
  190. for (i = 0; i < nblock; i++) ftab[eclass8[i]]++;
  191. for (i = 0; i < 256; i++) ftabCopy[i] = ftab[i];
  192. for (i = 1; i < 257; i++) ftab[i] += ftab[i-1];
  193. for (i = 0; i < nblock; i++) {
  194. j = eclass8[i];
  195. k = ftab[j] - 1;
  196. ftab[j] = k;
  197. fmap[k] = i;
  198. }
  199. nBhtab = 2 + (nblock / 32);
  200. for (i = 0; i < nBhtab; i++) bhtab[i] = 0;
  201. for (i = 0; i < 256; i++) SET_BH(ftab[i]);
  202. /*--
  203. Inductively refine the buckets. Kind-of an
  204. "exponential radix sort" (!), inspired by the
  205. Manber-Myers suffix array construction algorithm.
  206. --*/
  207. /*-- set sentinel bits for block-end detection --*/
  208. for (i = 0; i < 32; i++) {
  209. SET_BH(nblock + 2*i);
  210. CLEAR_BH(nblock + 2*i + 1);
  211. }
  212. /*-- the log(N) loop --*/
  213. H = 1;
  214. while (1) {
  215. if (verb >= 4)
  216. VPrintf1 ( " depth %6d has ", H );
  217. j = 0;
  218. for (i = 0; i < nblock; i++) {
  219. if (ISSET_BH(i)) j = i;
  220. k = fmap[i] - H; if (k < 0) k += nblock;
  221. eclass[k] = j;
  222. }
  223. nNotDone = 0;
  224. r = -1;
  225. while (1) {
  226. /*-- find the next non-singleton bucket --*/
  227. k = r + 1;
  228. while (ISSET_BH(k) && UNALIGNED_BH(k)) k++;
  229. if (ISSET_BH(k)) {
  230. while (WORD_BH(k) == 0xffffffff) k += 32;
  231. while (ISSET_BH(k)) k++;
  232. }
  233. l = k - 1;
  234. if (l >= nblock) break;
  235. while (!ISSET_BH(k) && UNALIGNED_BH(k)) k++;
  236. if (!ISSET_BH(k)) {
  237. while (WORD_BH(k) == 0x00000000) k += 32;
  238. while (!ISSET_BH(k)) k++;
  239. }
  240. r = k - 1;
  241. if (r >= nblock) break;
  242. /*-- now [l, r] bracket current bucket --*/
  243. if (r > l) {
  244. nNotDone += (r - l + 1);
  245. fallbackQSort3 ( fmap, eclass, l, r );
  246. /*-- scan bucket and generate header bits-- */
  247. cc = -1;
  248. for (i = l; i <= r; i++) {
  249. cc1 = eclass[fmap[i]];
  250. if (cc != cc1) { SET_BH(i); cc = cc1; };
  251. }
  252. }
  253. }
  254. if (verb >= 4)
  255. VPrintf1 ( "%6d unresolved strings\n", nNotDone );
  256. H *= 2;
  257. if (H > nblock || nNotDone == 0) break;
  258. }
  259. /*--
  260. Reconstruct the original block in
  261. eclass8 [0 .. nblock-1], since the
  262. previous phase destroyed it.
  263. --*/
  264. if (verb >= 4)
  265. VPrintf0 ( " reconstructing block ...\n" );
  266. j = 0;
  267. for (i = 0; i < nblock; i++) {
  268. while (ftabCopy[j] == 0) j++;
  269. ftabCopy[j]--;
  270. eclass8[fmap[i]] = (UChar)j;
  271. }
  272. AssertH ( j < 256, 1005 );
  273. }
  274. #undef SET_BH
  275. #undef CLEAR_BH
  276. #undef ISSET_BH
  277. #undef WORD_BH
  278. #undef UNALIGNED_BH
  279. /*---------------------------------------------*/
  280. /*--- The main, O(N^2 log(N)) sorting ---*/
  281. /*--- algorithm. Faster for "normal" ---*/
  282. /*--- non-repetitive blocks. ---*/
  283. /*---------------------------------------------*/
  284. /*---------------------------------------------*/
  285. static
  286. __inline__
  287. Bool mainGtU ( UInt32 i1,
  288. UInt32 i2,
  289. UChar* block,
  290. UInt16* quadrant,
  291. UInt32 nblock,
  292. Int32* budget )
  293. {
  294. Int32 k;
  295. UChar c1, c2;
  296. UInt16 s1, s2;
  297. AssertD ( i1 != i2, "mainGtU" );
  298. /* 1 */
  299. c1 = block[i1]; c2 = block[i2];
  300. if (c1 != c2) return (c1 > c2);
  301. i1++; i2++;
  302. /* 2 */
  303. c1 = block[i1]; c2 = block[i2];
  304. if (c1 != c2) return (c1 > c2);
  305. i1++; i2++;
  306. /* 3 */
  307. c1 = block[i1]; c2 = block[i2];
  308. if (c1 != c2) return (c1 > c2);
  309. i1++; i2++;
  310. /* 4 */
  311. c1 = block[i1]; c2 = block[i2];
  312. if (c1 != c2) return (c1 > c2);
  313. i1++; i2++;
  314. /* 5 */
  315. c1 = block[i1]; c2 = block[i2];
  316. if (c1 != c2) return (c1 > c2);
  317. i1++; i2++;
  318. /* 6 */
  319. c1 = block[i1]; c2 = block[i2];
  320. if (c1 != c2) return (c1 > c2);
  321. i1++; i2++;
  322. /* 7 */
  323. c1 = block[i1]; c2 = block[i2];
  324. if (c1 != c2) return (c1 > c2);
  325. i1++; i2++;
  326. /* 8 */
  327. c1 = block[i1]; c2 = block[i2];
  328. if (c1 != c2) return (c1 > c2);
  329. i1++; i2++;
  330. /* 9 */
  331. c1 = block[i1]; c2 = block[i2];
  332. if (c1 != c2) return (c1 > c2);
  333. i1++; i2++;
  334. /* 10 */
  335. c1 = block[i1]; c2 = block[i2];
  336. if (c1 != c2) return (c1 > c2);
  337. i1++; i2++;
  338. /* 11 */
  339. c1 = block[i1]; c2 = block[i2];
  340. if (c1 != c2) return (c1 > c2);
  341. i1++; i2++;
  342. /* 12 */
  343. c1 = block[i1]; c2 = block[i2];
  344. if (c1 != c2) return (c1 > c2);
  345. i1++; i2++;
  346. k = nblock + 8;
  347. do {
  348. /* 1 */
  349. c1 = block[i1]; c2 = block[i2];
  350. if (c1 != c2) return (c1 > c2);
  351. s1 = quadrant[i1]; s2 = quadrant[i2];
  352. if (s1 != s2) return (s1 > s2);
  353. i1++; i2++;
  354. /* 2 */
  355. c1 = block[i1]; c2 = block[i2];
  356. if (c1 != c2) return (c1 > c2);
  357. s1 = quadrant[i1]; s2 = quadrant[i2];
  358. if (s1 != s2) return (s1 > s2);
  359. i1++; i2++;
  360. /* 3 */
  361. c1 = block[i1]; c2 = block[i2];
  362. if (c1 != c2) return (c1 > c2);
  363. s1 = quadrant[i1]; s2 = quadrant[i2];
  364. if (s1 != s2) return (s1 > s2);
  365. i1++; i2++;
  366. /* 4 */
  367. c1 = block[i1]; c2 = block[i2];
  368. if (c1 != c2) return (c1 > c2);
  369. s1 = quadrant[i1]; s2 = quadrant[i2];
  370. if (s1 != s2) return (s1 > s2);
  371. i1++; i2++;
  372. /* 5 */
  373. c1 = block[i1]; c2 = block[i2];
  374. if (c1 != c2) return (c1 > c2);
  375. s1 = quadrant[i1]; s2 = quadrant[i2];
  376. if (s1 != s2) return (s1 > s2);
  377. i1++; i2++;
  378. /* 6 */
  379. c1 = block[i1]; c2 = block[i2];
  380. if (c1 != c2) return (c1 > c2);
  381. s1 = quadrant[i1]; s2 = quadrant[i2];
  382. if (s1 != s2) return (s1 > s2);
  383. i1++; i2++;
  384. /* 7 */
  385. c1 = block[i1]; c2 = block[i2];
  386. if (c1 != c2) return (c1 > c2);
  387. s1 = quadrant[i1]; s2 = quadrant[i2];
  388. if (s1 != s2) return (s1 > s2);
  389. i1++; i2++;
  390. /* 8 */
  391. c1 = block[i1]; c2 = block[i2];
  392. if (c1 != c2) return (c1 > c2);
  393. s1 = quadrant[i1]; s2 = quadrant[i2];
  394. if (s1 != s2) return (s1 > s2);
  395. i1++; i2++;
  396. if (i1 >= nblock) i1 -= nblock;
  397. if (i2 >= nblock) i2 -= nblock;
  398. k -= 8;
  399. (*budget)--;
  400. }
  401. while (k >= 0);
  402. return False;
  403. }
  404. /*---------------------------------------------*/
  405. /*--
  406. Knuth's increments seem to work better
  407. than Incerpi-Sedgewick here. Possibly
  408. because the number of elems to sort is
  409. usually small, typically <= 20.
  410. --*/
  411. static
  412. Int32 incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280,
  413. 9841, 29524, 88573, 265720,
  414. 797161, 2391484 };
  415. static
  416. void mainSimpleSort ( UInt32* ptr,
  417. UChar* block,
  418. UInt16* quadrant,
  419. Int32 nblock,
  420. Int32 lo,
  421. Int32 hi,
  422. Int32 d,
  423. Int32* budget )
  424. {
  425. Int32 i, j, h, bigN, hp;
  426. UInt32 v;
  427. bigN = hi - lo + 1;
  428. if (bigN < 2) return;
  429. hp = 0;
  430. while (incs[hp] < bigN) hp++;
  431. hp--;
  432. for (; hp >= 0; hp--) {
  433. h = incs[hp];
  434. i = lo + h;
  435. while (True) {
  436. /*-- copy 1 --*/
  437. if (i > hi) break;
  438. v = ptr[i];
  439. j = i;
  440. while ( mainGtU (
  441. ptr[j-h]+d, v+d, block, quadrant, nblock, budget
  442. ) ) {
  443. ptr[j] = ptr[j-h];
  444. j = j - h;
  445. if (j <= (lo + h - 1)) break;
  446. }
  447. ptr[j] = v;
  448. i++;
  449. /*-- copy 2 --*/
  450. if (i > hi) break;
  451. v = ptr[i];
  452. j = i;
  453. while ( mainGtU (
  454. ptr[j-h]+d, v+d, block, quadrant, nblock, budget
  455. ) ) {
  456. ptr[j] = ptr[j-h];
  457. j = j - h;
  458. if (j <= (lo + h - 1)) break;
  459. }
  460. ptr[j] = v;
  461. i++;
  462. /*-- copy 3 --*/
  463. if (i > hi) break;
  464. v = ptr[i];
  465. j = i;
  466. while ( mainGtU (
  467. ptr[j-h]+d, v+d, block, quadrant, nblock, budget
  468. ) ) {
  469. ptr[j] = ptr[j-h];
  470. j = j - h;
  471. if (j <= (lo + h - 1)) break;
  472. }
  473. ptr[j] = v;
  474. i++;
  475. if (*budget < 0) return;
  476. }
  477. }
  478. }
  479. /*---------------------------------------------*/
  480. /*--
  481. The following is an implementation of
  482. an elegant 3-way quicksort for strings,
  483. described in a paper "Fast Algorithms for
  484. Sorting and Searching Strings", by Robert
  485. Sedgewick and Jon L. Bentley.
  486. --*/
  487. #define mswap(zz1, zz2) \
  488. { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; }
  489. #define mvswap(zzp1, zzp2, zzn) \
  490. { \
  491. Int32 yyp1 = (zzp1); \
  492. Int32 yyp2 = (zzp2); \
  493. Int32 yyn = (zzn); \
  494. while (yyn > 0) { \
  495. mswap(ptr[yyp1], ptr[yyp2]); \
  496. yyp1++; yyp2++; yyn--; \
  497. } \
  498. }
  499. static
  500. __inline__
  501. UChar mmed3 ( UChar a, UChar b, UChar c )
  502. {
  503. UChar t;
  504. if (a > b) { t = a; a = b; b = t; };
  505. if (b > c) {
  506. b = c;
  507. if (a > b) b = a;
  508. }
  509. return b;
  510. }
  511. #define mmin(a,b) ((a) < (b)) ? (a) : (b)
  512. #define mpush(lz,hz,dz) { stackLo[sp] = lz; \
  513. stackHi[sp] = hz; \
  514. stackD [sp] = dz; \
  515. sp++; }
  516. #define mpop(lz,hz,dz) { sp--; \
  517. lz = stackLo[sp]; \
  518. hz = stackHi[sp]; \
  519. dz = stackD [sp]; }
  520. #define mnextsize(az) (nextHi[az]-nextLo[az])
  521. #define mnextswap(az,bz) \
  522. { Int32 tz; \
  523. tz = nextLo[az]; nextLo[az] = nextLo[bz]; nextLo[bz] = tz; \
  524. tz = nextHi[az]; nextHi[az] = nextHi[bz]; nextHi[bz] = tz; \
  525. tz = nextD [az]; nextD [az] = nextD [bz]; nextD [bz] = tz; }
  526. #define MAIN_QSORT_SMALL_THRESH 20
  527. #define MAIN_QSORT_DEPTH_THRESH (BZ_N_RADIX + BZ_N_QSORT)
  528. #define MAIN_QSORT_STACK_SIZE 100
  529. static
  530. void mainQSort3 ( UInt32* ptr,
  531. UChar* block,
  532. UInt16* quadrant,
  533. Int32 nblock,
  534. Int32 loSt,
  535. Int32 hiSt,
  536. Int32 dSt,
  537. Int32* budget )
  538. {
  539. Int32 unLo, unHi, ltLo, gtHi, n, m, med;
  540. Int32 sp, lo, hi, d;
  541. Int32 stackLo[MAIN_QSORT_STACK_SIZE];
  542. Int32 stackHi[MAIN_QSORT_STACK_SIZE];
  543. Int32 stackD [MAIN_QSORT_STACK_SIZE];
  544. Int32 nextLo[3];
  545. Int32 nextHi[3];
  546. Int32 nextD [3];
  547. sp = 0;
  548. mpush ( loSt, hiSt, dSt );
  549. while (sp > 0) {
  550. AssertH ( sp < MAIN_QSORT_STACK_SIZE - 2, 1001 );
  551. mpop ( lo, hi, d );
  552. if (hi - lo < MAIN_QSORT_SMALL_THRESH ||
  553. d > MAIN_QSORT_DEPTH_THRESH) {
  554. mainSimpleSort ( ptr, block, quadrant, nblock, lo, hi, d, budget );
  555. if (*budget < 0) return;
  556. continue;
  557. }
  558. med = (Int32)
  559. mmed3 ( block[ptr[ lo ]+d],
  560. block[ptr[ hi ]+d],
  561. block[ptr[ (lo+hi)>>1 ]+d] );
  562. unLo = ltLo = lo;
  563. unHi = gtHi = hi;
  564. while (True) {
  565. while (True) {
  566. if (unLo > unHi) break;
  567. n = ((Int32)block[ptr[unLo]+d]) - med;
  568. if (n == 0) {
  569. mswap(ptr[unLo], ptr[ltLo]);
  570. ltLo++; unLo++; continue;
  571. };
  572. if (n > 0) break;
  573. unLo++;
  574. }
  575. while (True) {
  576. if (unLo > unHi) break;
  577. n = ((Int32)block[ptr[unHi]+d]) - med;
  578. if (n == 0) {
  579. mswap(ptr[unHi], ptr[gtHi]);
  580. gtHi--; unHi--; continue;
  581. };
  582. if (n < 0) break;
  583. unHi--;
  584. }
  585. if (unLo > unHi) break;
  586. mswap(ptr[unLo], ptr[unHi]); unLo++; unHi--;
  587. }
  588. AssertD ( unHi == unLo-1, "mainQSort3(2)" );
  589. if (gtHi < ltLo) {
  590. mpush(lo, hi, d+1 );
  591. continue;
  592. }
  593. n = mmin(ltLo-lo, unLo-ltLo); mvswap(lo, unLo-n, n);
  594. m = mmin(hi-gtHi, gtHi-unHi); mvswap(unLo, hi-m+1, m);
  595. n = lo + unLo - ltLo - 1;
  596. m = hi - (gtHi - unHi) + 1;
  597. nextLo[0] = lo; nextHi[0] = n; nextD[0] = d;
  598. nextLo[1] = m; nextHi[1] = hi; nextD[1] = d;
  599. nextLo[2] = n+1; nextHi[2] = m-1; nextD[2] = d+1;
  600. if (mnextsize(0) < mnextsize(1)) mnextswap(0,1);
  601. if (mnextsize(1) < mnextsize(2)) mnextswap(1,2);
  602. if (mnextsize(0) < mnextsize(1)) mnextswap(0,1);
  603. AssertD (mnextsize(0) >= mnextsize(1), "mainQSort3(8)" );
  604. AssertD (mnextsize(1) >= mnextsize(2), "mainQSort3(9)" );
  605. mpush (nextLo[0], nextHi[0], nextD[0]);
  606. mpush (nextLo[1], nextHi[1], nextD[1]);
  607. mpush (nextLo[2], nextHi[2], nextD[2]);
  608. }
  609. }
  610. #undef mswap
  611. #undef mvswap
  612. #undef mpush
  613. #undef mpop
  614. #undef mmin
  615. #undef mnextsize
  616. #undef mnextswap
  617. #undef MAIN_QSORT_SMALL_THRESH
  618. #undef MAIN_QSORT_DEPTH_THRESH
  619. #undef MAIN_QSORT_STACK_SIZE
  620. /*---------------------------------------------*/
  621. /* Pre:
  622. nblock > N_OVERSHOOT
  623. block32 exists for [0 .. nblock-1 +N_OVERSHOOT]
  624. ((UChar*)block32) [0 .. nblock-1] holds block
  625. ptr exists for [0 .. nblock-1]
  626. Post:
  627. ((UChar*)block32) [0 .. nblock-1] holds block
  628. All other areas of block32 destroyed
  629. ftab [0 .. 65536 ] destroyed
  630. ptr [0 .. nblock-1] holds sorted order
  631. if (*budget < 0), sorting was abandoned
  632. */
  633. #define BIGFREQ(b) (ftab[((b)+1) << 8] - ftab[(b) << 8])
  634. #define SETMASK (1 << 21)
  635. #define CLEARMASK (~(SETMASK))
  636. static
  637. void mainSort ( UInt32* ptr,
  638. UChar* block,
  639. UInt16* quadrant,
  640. UInt32* ftab,
  641. Int32 nblock,
  642. Int32 verb,
  643. Int32* budget )
  644. {
  645. Int32 i, j, k, ss, sb;
  646. Int32 runningOrder[256];
  647. Bool bigDone[256];
  648. Int32 copyStart[256];
  649. Int32 copyEnd [256];
  650. UChar c1;
  651. Int32 numQSorted;
  652. UInt16 s;
  653. if (verb >= 4) VPrintf0 ( " main sort initialise ...\n" );
  654. /*-- set up the 2-byte frequency table --*/
  655. for (i = 65536; i >= 0; i--) ftab[i] = 0;
  656. j = block[0] << 8;
  657. i = nblock-1;
  658. for (; i >= 3; i -= 4) {
  659. quadrant[i] = 0;
  660. j = (j >> 8) | ( ((UInt16)block[i]) << 8);
  661. ftab[j]++;
  662. quadrant[i-1] = 0;
  663. j = (j >> 8) | ( ((UInt16)block[i-1]) << 8);
  664. ftab[j]++;
  665. quadrant[i-2] = 0;
  666. j = (j >> 8) | ( ((UInt16)block[i-2]) << 8);
  667. ftab[j]++;
  668. quadrant[i-3] = 0;
  669. j = (j >> 8) | ( ((UInt16)block[i-3]) << 8);
  670. ftab[j]++;
  671. }
  672. for (; i >= 0; i--) {
  673. quadrant[i] = 0;
  674. j = (j >> 8) | ( ((UInt16)block[i]) << 8);
  675. ftab[j]++;
  676. }
  677. /*-- (emphasises close relationship of block & quadrant) --*/
  678. for (i = 0; i < BZ_N_OVERSHOOT; i++) {
  679. block [nblock+i] = block[i];
  680. quadrant[nblock+i] = 0;
  681. }
  682. if (verb >= 4) VPrintf0 ( " bucket sorting ...\n" );
  683. /*-- Complete the initial radix sort --*/
  684. for (i = 1; i <= 65536; i++) ftab[i] += ftab[i-1];
  685. s = block[0] << 8;
  686. i = nblock-1;
  687. for (; i >= 3; i -= 4) {
  688. s = (s >> 8) | (block[i] << 8);
  689. j = ftab[s] -1;
  690. ftab[s] = j;
  691. ptr[j] = i;
  692. s = (s >> 8) | (block[i-1] << 8);
  693. j = ftab[s] -1;
  694. ftab[s] = j;
  695. ptr[j] = i-1;
  696. s = (s >> 8) | (block[i-2] << 8);
  697. j = ftab[s] -1;
  698. ftab[s] = j;
  699. ptr[j] = i-2;
  700. s = (s >> 8) | (block[i-3] << 8);
  701. j = ftab[s] -1;
  702. ftab[s] = j;
  703. ptr[j] = i-3;
  704. }
  705. for (; i >= 0; i--) {
  706. s = (s >> 8) | (block[i] << 8);
  707. j = ftab[s] -1;
  708. ftab[s] = j;
  709. ptr[j] = i;
  710. }
  711. /*--
  712. Now ftab contains the first loc of every small bucket.
  713. Calculate the running order, from smallest to largest
  714. big bucket.
  715. --*/
  716. for (i = 0; i <= 255; i++) {
  717. bigDone [i] = False;
  718. runningOrder[i] = i;
  719. }
  720. {
  721. Int32 vv;
  722. Int32 h = 1;
  723. do h = 3 * h + 1; while (h <= 256);
  724. do {
  725. h = h / 3;
  726. for (i = h; i <= 255; i++) {
  727. vv = runningOrder[i];
  728. j = i;
  729. while ( BIGFREQ(runningOrder[j-h]) > BIGFREQ(vv) ) {
  730. runningOrder[j] = runningOrder[j-h];
  731. j = j - h;
  732. if (j <= (h - 1)) goto zero;
  733. }
  734. zero:
  735. runningOrder[j] = vv;
  736. }
  737. } while (h != 1);
  738. }
  739. /*--
  740. The main sorting loop.
  741. --*/
  742. numQSorted = 0;
  743. for (i = 0; i <= 255; i++) {
  744. /*--
  745. Process big buckets, starting with the least full.
  746. Basically this is a 3-step process in which we call
  747. mainQSort3 to sort the small buckets [ss, j], but
  748. also make a big effort to avoid the calls if we can.
  749. --*/
  750. ss = runningOrder[i];
  751. /*--
  752. Step 1:
  753. Complete the big bucket [ss] by quicksorting
  754. any unsorted small buckets [ss, j], for j != ss.
  755. Hopefully previous pointer-scanning phases have already
  756. completed many of the small buckets [ss, j], so
  757. we don't have to sort them at all.
  758. --*/
  759. for (j = 0; j <= 255; j++) {
  760. if (j != ss) {
  761. sb = (ss << 8) + j;
  762. if ( ! (ftab[sb] & SETMASK) ) {
  763. Int32 lo = ftab[sb] & CLEARMASK;
  764. Int32 hi = (ftab[sb+1] & CLEARMASK) - 1;
  765. if (hi > lo) {
  766. if (verb >= 4)
  767. VPrintf4 ( " qsort [0x%x, 0x%x] "
  768. "done %d this %d\n",
  769. ss, j, numQSorted, hi - lo + 1 );
  770. mainQSort3 (
  771. ptr, block, quadrant, nblock,
  772. lo, hi, BZ_N_RADIX, budget
  773. );
  774. numQSorted += (hi - lo + 1);
  775. if (*budget < 0) return;
  776. }
  777. }
  778. ftab[sb] |= SETMASK;
  779. }
  780. }
  781. AssertH ( !bigDone[ss], 1006 );
  782. /*--
  783. Step 2:
  784. Now scan this big bucket [ss] so as to synthesise the
  785. sorted order for small buckets [t, ss] for all t,
  786. including, magically, the bucket [ss,ss] too.
  787. This will avoid doing Real Work in subsequent Step 1's.
  788. --*/
  789. {
  790. for (j = 0; j <= 255; j++) {
  791. copyStart[j] = ftab[(j << 8) + ss] & CLEARMASK;
  792. copyEnd [j] = (ftab[(j << 8) + ss + 1] & CLEARMASK) - 1;
  793. }
  794. for (j = ftab[ss << 8] & CLEARMASK; j < copyStart[ss]; j++) {
  795. k = ptr[j]-1; if (k < 0) k += nblock;
  796. c1 = block[k];
  797. if (!bigDone[c1])
  798. ptr[ copyStart[c1]++ ] = k;
  799. }
  800. for (j = (ftab[(ss+1) << 8] & CLEARMASK) - 1; j > copyEnd[ss]; j--) {
  801. k = ptr[j]-1; if (k < 0) k += nblock;
  802. c1 = block[k];
  803. if (!bigDone[c1])
  804. ptr[ copyEnd[c1]-- ] = k;
  805. }
  806. }
  807. AssertH ( (copyStart[ss]-1 == copyEnd[ss])
  808. ||
  809. /* Extremely rare case missing in bzip2-1.0.0 and 1.0.1.
  810. Necessity for this case is demonstrated by compressing
  811. a sequence of approximately 48.5 million of character
  812. 251; 1.0.0/1.0.1 will then die here. */
  813. (copyStart[ss] == 0 && copyEnd[ss] == nblock-1),
  814. 1007 )
  815. for (j = 0; j <= 255; j++) ftab[(j << 8) + ss] |= SETMASK;
  816. /*--
  817. Step 3:
  818. The [ss] big bucket is now done. Record this fact,
  819. and update the quadrant descriptors. Remember to
  820. update quadrants in the overshoot area too, if
  821. necessary. The "if (i < 255)" test merely skips
  822. this updating for the last bucket processed, since
  823. updating for the last bucket is pointless.
  824. The quadrant array provides a way to incrementally
  825. cache sort orderings, as they appear, so as to
  826. make subsequent comparisons in fullGtU() complete
  827. faster. For repetitive blocks this makes a big
  828. difference (but not big enough to be able to avoid
  829. the fallback sorting mechanism, exponential radix sort).
  830. The precise meaning is: at all times:
  831. for 0 <= i < nblock and 0 <= j <= nblock
  832. if block[i] != block[j],
  833. then the relative values of quadrant[i] and
  834. quadrant[j] are meaningless.
  835. else {
  836. if quadrant[i] < quadrant[j]
  837. then the string starting at i lexicographically
  838. precedes the string starting at j
  839. else if quadrant[i] > quadrant[j]
  840. then the string starting at j lexicographically
  841. precedes the string starting at i
  842. else
  843. the relative ordering of the strings starting
  844. at i and j has not yet been determined.
  845. }
  846. --*/
  847. bigDone[ss] = True;
  848. if (i < 255) {
  849. Int32 bbStart = ftab[ss << 8] & CLEARMASK;
  850. Int32 bbSize = (ftab[(ss+1) << 8] & CLEARMASK) - bbStart;
  851. Int32 shifts = 0;
  852. while ((bbSize >> shifts) > 65534) shifts++;
  853. for (j = bbSize-1; j >= 0; j--) {
  854. Int32 a2update = ptr[bbStart + j];
  855. UInt16 qVal = (UInt16)(j >> shifts);
  856. quadrant[a2update] = qVal;
  857. if (a2update < BZ_N_OVERSHOOT)
  858. quadrant[a2update + nblock] = qVal;
  859. }
  860. AssertH ( ((bbSize-1) >> shifts) <= 65535, 1002 );
  861. }
  862. }
  863. if (verb >= 4)
  864. VPrintf3 ( " %d pointers, %d sorted, %d scanned\n",
  865. nblock, numQSorted, nblock - numQSorted );
  866. }
  867. #undef BIGFREQ
  868. #undef SETMASK
  869. #undef CLEARMASK
  870. /*---------------------------------------------*/
  871. /* Pre:
  872. nblock > 0
  873. arr2 exists for [0 .. nblock-1 +N_OVERSHOOT]
  874. ((UChar*)arr2) [0 .. nblock-1] holds block
  875. arr1 exists for [0 .. nblock-1]
  876. Post:
  877. ((UChar*)arr2) [0 .. nblock-1] holds block
  878. All other areas of block destroyed
  879. ftab [ 0 .. 65536 ] destroyed
  880. arr1 [0 .. nblock-1] holds sorted order
  881. */
  882. void BZ2_blockSort ( EState* s )
  883. {
  884. UInt32* ptr = s->ptr;
  885. UChar* block = s->block;
  886. UInt32* ftab = s->ftab;
  887. Int32 nblock = s->nblock;
  888. Int32 verb = s->verbosity;
  889. Int32 wfact = s->workFactor;
  890. UInt16* quadrant;
  891. Int32 budget;
  892. Int32 budgetInit;
  893. Int32 i;
  894. if (nblock < 10000) {
  895. fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb );
  896. } else {
  897. /* Calculate the location for quadrant, remembering to get
  898. the alignment right. Assumes that &(block[0]) is at least
  899. 2-byte aligned -- this should be ok since block is really
  900. the first section of arr2.
  901. */
  902. i = nblock+BZ_N_OVERSHOOT;
  903. if (i & 1) i++;
  904. quadrant = (UInt16*)(&(block[i]));
  905. /* (wfact-1) / 3 puts the default-factor-30
  906. transition point at very roughly the same place as
  907. with v0.1 and v0.9.0.
  908. Not that it particularly matters any more, since the
  909. resulting compressed stream is now the same regardless
  910. of whether or not we use the main sort or fallback sort.
  911. */
  912. if (wfact < 1 ) wfact = 1;
  913. if (wfact > 100) wfact = 100;
  914. budgetInit = nblock * ((wfact-1) / 3);
  915. budget = budgetInit;
  916. mainSort ( ptr, block, quadrant, ftab, nblock, verb, &budget );
  917. if (verb >= 3)
  918. VPrintf3 ( " %d work, %d block, ratio %5.2f\n",
  919. budgetInit - budget,
  920. nblock,
  921. (float)(budgetInit - budget) /
  922. (float)(nblock==0 ? 1 : nblock) );
  923. if (budget < 0) {
  924. if (verb >= 2)
  925. VPrintf0 ( " too repetitive; using fallback"
  926. " sorting algorithm\n" );
  927. fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb );
  928. }
  929. }
  930. s->origPtr = -1;
  931. for (i = 0; i < s->nblock; i++)
  932. if (ptr[i] == 0)
  933. { s->origPtr = i; break; };
  934. AssertH( s->origPtr != -1, 1003 );
  935. }
  936. /*-------------------------------------------------------------*/
  937. /*--- end blocksort.c ---*/
  938. /*-------------------------------------------------------------*/