12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094 |
- /*-------------------------------------------------------------*/
- /*--- Block sorting machinery ---*/
- /*--- blocksort.c ---*/
- /*-------------------------------------------------------------*/
- /* ------------------------------------------------------------------
- This file is part of bzip2/libbzip2, a program and library for
- lossless, block-sorting data compression.
- bzip2/libbzip2 version 1.0.8 of 13 July 2019
- Copyright (C) 1996-2019 Julian Seward <jseward@acm.org>
- Please read the WARNING, DISCLAIMER and PATENTS sections in the
- README file.
- This program is released under the terms of the license contained
- in the file LICENSE.
- ------------------------------------------------------------------ */
- #include "bzlib_private.h"
- /*---------------------------------------------*/
- /*--- Fallback O(N log(N)^2) sorting ---*/
- /*--- algorithm, for repetitive blocks ---*/
- /*---------------------------------------------*/
- /*---------------------------------------------*/
- static
- __inline__
- void fallbackSimpleSort ( UInt32* fmap,
- UInt32* eclass,
- Int32 lo,
- Int32 hi )
- {
- Int32 i, j, tmp;
- UInt32 ec_tmp;
- if (lo == hi) return;
- if (hi - lo > 3) {
- for ( i = hi-4; i >= lo; i-- ) {
- tmp = fmap[i];
- ec_tmp = eclass[tmp];
- for ( j = i+4; j <= hi && ec_tmp > eclass[fmap[j]]; j += 4 )
- fmap[j-4] = fmap[j];
- fmap[j-4] = tmp;
- }
- }
- for ( i = hi-1; i >= lo; i-- ) {
- tmp = fmap[i];
- ec_tmp = eclass[tmp];
- for ( j = i+1; j <= hi && ec_tmp > eclass[fmap[j]]; j++ )
- fmap[j-1] = fmap[j];
- fmap[j-1] = tmp;
- }
- }
- /*---------------------------------------------*/
- #define fswap(zz1, zz2) \
- { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; }
- #define fvswap(zzp1, zzp2, zzn) \
- { \
- Int32 yyp1 = (zzp1); \
- Int32 yyp2 = (zzp2); \
- Int32 yyn = (zzn); \
- while (yyn > 0) { \
- fswap(fmap[yyp1], fmap[yyp2]); \
- yyp1++; yyp2++; yyn--; \
- } \
- }
- #define fmin(a,b) ((a) < (b)) ? (a) : (b)
- #define fpush(lz,hz) { stackLo[sp] = lz; \
- stackHi[sp] = hz; \
- sp++; }
- #define fpop(lz,hz) { sp--; \
- lz = stackLo[sp]; \
- hz = stackHi[sp]; }
- #define FALLBACK_QSORT_SMALL_THRESH 10
- #define FALLBACK_QSORT_STACK_SIZE 100
- static
- void fallbackQSort3 ( UInt32* fmap,
- UInt32* eclass,
- Int32 loSt,
- Int32 hiSt )
- {
- Int32 unLo, unHi, ltLo, gtHi, n, m;
- Int32 sp, lo, hi;
- UInt32 med, r, r3;
- Int32 stackLo[FALLBACK_QSORT_STACK_SIZE];
- Int32 stackHi[FALLBACK_QSORT_STACK_SIZE];
- r = 0;
- sp = 0;
- fpush ( loSt, hiSt );
- while (sp > 0) {
- AssertH ( sp < FALLBACK_QSORT_STACK_SIZE - 1, 1004 );
- fpop ( lo, hi );
- if (hi - lo < FALLBACK_QSORT_SMALL_THRESH) {
- fallbackSimpleSort ( fmap, eclass, lo, hi );
- continue;
- }
- /* Random partitioning. Median of 3 sometimes fails to
- avoid bad cases. Median of 9 seems to help but
- looks rather expensive. This too seems to work but
- is cheaper. Guidance for the magic constants
- 7621 and 32768 is taken from Sedgewick's algorithms
- book, chapter 35.
- */
- r = ((r * 7621) + 1) % 32768;
- r3 = r % 3;
- if (r3 == 0) med = eclass[fmap[lo]]; else
- if (r3 == 1) med = eclass[fmap[(lo+hi)>>1]]; else
- med = eclass[fmap[hi]];
- unLo = ltLo = lo;
- unHi = gtHi = hi;
- while (1) {
- while (1) {
- if (unLo > unHi) break;
- n = (Int32)eclass[fmap[unLo]] - (Int32)med;
- if (n == 0) {
- fswap(fmap[unLo], fmap[ltLo]);
- ltLo++; unLo++;
- continue;
- };
- if (n > 0) break;
- unLo++;
- }
- while (1) {
- if (unLo > unHi) break;
- n = (Int32)eclass[fmap[unHi]] - (Int32)med;
- if (n == 0) {
- fswap(fmap[unHi], fmap[gtHi]);
- gtHi--; unHi--;
- continue;
- };
- if (n < 0) break;
- unHi--;
- }
- if (unLo > unHi) break;
- fswap(fmap[unLo], fmap[unHi]); unLo++; unHi--;
- }
- AssertD ( unHi == unLo-1, "fallbackQSort3(2)" );
- if (gtHi < ltLo) continue;
- n = fmin(ltLo-lo, unLo-ltLo); fvswap(lo, unLo-n, n);
- m = fmin(hi-gtHi, gtHi-unHi); fvswap(unLo, hi-m+1, m);
- n = lo + unLo - ltLo - 1;
- m = hi - (gtHi - unHi) + 1;
- if (n - lo > hi - m) {
- fpush ( lo, n );
- fpush ( m, hi );
- } else {
- fpush ( m, hi );
- fpush ( lo, n );
- }
- }
- }
- #undef fmin
- #undef fpush
- #undef fpop
- #undef fswap
- #undef fvswap
- #undef FALLBACK_QSORT_SMALL_THRESH
- #undef FALLBACK_QSORT_STACK_SIZE
- /*---------------------------------------------*/
- /* Pre:
- nblock > 0
- eclass exists for [0 .. nblock-1]
- ((UChar*)eclass) [0 .. nblock-1] holds block
- ptr exists for [0 .. nblock-1]
- Post:
- ((UChar*)eclass) [0 .. nblock-1] holds block
- All other areas of eclass destroyed
- fmap [0 .. nblock-1] holds sorted order
- bhtab [ 0 .. 2+(nblock/32) ] destroyed
- */
- #define SET_BH(zz) bhtab[(zz) >> 5] |= ((UInt32)1 << ((zz) & 31))
- #define CLEAR_BH(zz) bhtab[(zz) >> 5] &= ~((UInt32)1 << ((zz) & 31))
- #define ISSET_BH(zz) (bhtab[(zz) >> 5] & ((UInt32)1 << ((zz) & 31)))
- #define WORD_BH(zz) bhtab[(zz) >> 5]
- #define UNALIGNED_BH(zz) ((zz) & 0x01f)
- static
- void fallbackSort ( UInt32* fmap,
- UInt32* eclass,
- UInt32* bhtab,
- Int32 nblock,
- Int32 verb )
- {
- Int32 ftab[257];
- Int32 ftabCopy[256];
- Int32 H, i, j, k, l, r, cc, cc1;
- Int32 nNotDone;
- Int32 nBhtab;
- UChar* eclass8 = (UChar*)eclass;
- /*--
- Initial 1-char radix sort to generate
- initial fmap and initial BH bits.
- --*/
- if (verb >= 4)
- VPrintf0 ( " bucket sorting ...\n" );
- for (i = 0; i < 257; i++) ftab[i] = 0;
- for (i = 0; i < nblock; i++) ftab[eclass8[i]]++;
- for (i = 0; i < 256; i++) ftabCopy[i] = ftab[i];
- for (i = 1; i < 257; i++) ftab[i] += ftab[i-1];
- for (i = 0; i < nblock; i++) {
- j = eclass8[i];
- k = ftab[j] - 1;
- ftab[j] = k;
- fmap[k] = i;
- }
- nBhtab = 2 + (nblock / 32);
- for (i = 0; i < nBhtab; i++) bhtab[i] = 0;
- for (i = 0; i < 256; i++) SET_BH(ftab[i]);
- /*--
- Inductively refine the buckets. Kind-of an
- "exponential radix sort" (!), inspired by the
- Manber-Myers suffix array construction algorithm.
- --*/
- /*-- set sentinel bits for block-end detection --*/
- for (i = 0; i < 32; i++) {
- SET_BH(nblock + 2*i);
- CLEAR_BH(nblock + 2*i + 1);
- }
- /*-- the log(N) loop --*/
- H = 1;
- while (1) {
- if (verb >= 4)
- VPrintf1 ( " depth %6d has ", H );
- j = 0;
- for (i = 0; i < nblock; i++) {
- if (ISSET_BH(i)) j = i;
- k = fmap[i] - H; if (k < 0) k += nblock;
- eclass[k] = j;
- }
- nNotDone = 0;
- r = -1;
- while (1) {
- /*-- find the next non-singleton bucket --*/
- k = r + 1;
- while (ISSET_BH(k) && UNALIGNED_BH(k)) k++;
- if (ISSET_BH(k)) {
- while (WORD_BH(k) == 0xffffffff) k += 32;
- while (ISSET_BH(k)) k++;
- }
- l = k - 1;
- if (l >= nblock) break;
- while (!ISSET_BH(k) && UNALIGNED_BH(k)) k++;
- if (!ISSET_BH(k)) {
- while (WORD_BH(k) == 0x00000000) k += 32;
- while (!ISSET_BH(k)) k++;
- }
- r = k - 1;
- if (r >= nblock) break;
- /*-- now [l, r] bracket current bucket --*/
- if (r > l) {
- nNotDone += (r - l + 1);
- fallbackQSort3 ( fmap, eclass, l, r );
- /*-- scan bucket and generate header bits-- */
- cc = -1;
- for (i = l; i <= r; i++) {
- cc1 = eclass[fmap[i]];
- if (cc != cc1) { SET_BH(i); cc = cc1; };
- }
- }
- }
- if (verb >= 4)
- VPrintf1 ( "%6d unresolved strings\n", nNotDone );
- H *= 2;
- if (H > nblock || nNotDone == 0) break;
- }
- /*--
- Reconstruct the original block in
- eclass8 [0 .. nblock-1], since the
- previous phase destroyed it.
- --*/
- if (verb >= 4)
- VPrintf0 ( " reconstructing block ...\n" );
- j = 0;
- for (i = 0; i < nblock; i++) {
- while (ftabCopy[j] == 0) j++;
- ftabCopy[j]--;
- eclass8[fmap[i]] = (UChar)j;
- }
- AssertH ( j < 256, 1005 );
- }
- #undef SET_BH
- #undef CLEAR_BH
- #undef ISSET_BH
- #undef WORD_BH
- #undef UNALIGNED_BH
- /*---------------------------------------------*/
- /*--- The main, O(N^2 log(N)) sorting ---*/
- /*--- algorithm. Faster for "normal" ---*/
- /*--- non-repetitive blocks. ---*/
- /*---------------------------------------------*/
- /*---------------------------------------------*/
- static
- __inline__
- Bool mainGtU ( UInt32 i1,
- UInt32 i2,
- UChar* block,
- UInt16* quadrant,
- UInt32 nblock,
- Int32* budget )
- {
- Int32 k;
- UChar c1, c2;
- UInt16 s1, s2;
- AssertD ( i1 != i2, "mainGtU" );
- /* 1 */
- c1 = block[i1]; c2 = block[i2];
- if (c1 != c2) return (c1 > c2);
- i1++; i2++;
- /* 2 */
- c1 = block[i1]; c2 = block[i2];
- if (c1 != c2) return (c1 > c2);
- i1++; i2++;
- /* 3 */
- c1 = block[i1]; c2 = block[i2];
- if (c1 != c2) return (c1 > c2);
- i1++; i2++;
- /* 4 */
- c1 = block[i1]; c2 = block[i2];
- if (c1 != c2) return (c1 > c2);
- i1++; i2++;
- /* 5 */
- c1 = block[i1]; c2 = block[i2];
- if (c1 != c2) return (c1 > c2);
- i1++; i2++;
- /* 6 */
- c1 = block[i1]; c2 = block[i2];
- if (c1 != c2) return (c1 > c2);
- i1++; i2++;
- /* 7 */
- c1 = block[i1]; c2 = block[i2];
- if (c1 != c2) return (c1 > c2);
- i1++; i2++;
- /* 8 */
- c1 = block[i1]; c2 = block[i2];
- if (c1 != c2) return (c1 > c2);
- i1++; i2++;
- /* 9 */
- c1 = block[i1]; c2 = block[i2];
- if (c1 != c2) return (c1 > c2);
- i1++; i2++;
- /* 10 */
- c1 = block[i1]; c2 = block[i2];
- if (c1 != c2) return (c1 > c2);
- i1++; i2++;
- /* 11 */
- c1 = block[i1]; c2 = block[i2];
- if (c1 != c2) return (c1 > c2);
- i1++; i2++;
- /* 12 */
- c1 = block[i1]; c2 = block[i2];
- if (c1 != c2) return (c1 > c2);
- i1++; i2++;
- k = nblock + 8;
- do {
- /* 1 */
- c1 = block[i1]; c2 = block[i2];
- if (c1 != c2) return (c1 > c2);
- s1 = quadrant[i1]; s2 = quadrant[i2];
- if (s1 != s2) return (s1 > s2);
- i1++; i2++;
- /* 2 */
- c1 = block[i1]; c2 = block[i2];
- if (c1 != c2) return (c1 > c2);
- s1 = quadrant[i1]; s2 = quadrant[i2];
- if (s1 != s2) return (s1 > s2);
- i1++; i2++;
- /* 3 */
- c1 = block[i1]; c2 = block[i2];
- if (c1 != c2) return (c1 > c2);
- s1 = quadrant[i1]; s2 = quadrant[i2];
- if (s1 != s2) return (s1 > s2);
- i1++; i2++;
- /* 4 */
- c1 = block[i1]; c2 = block[i2];
- if (c1 != c2) return (c1 > c2);
- s1 = quadrant[i1]; s2 = quadrant[i2];
- if (s1 != s2) return (s1 > s2);
- i1++; i2++;
- /* 5 */
- c1 = block[i1]; c2 = block[i2];
- if (c1 != c2) return (c1 > c2);
- s1 = quadrant[i1]; s2 = quadrant[i2];
- if (s1 != s2) return (s1 > s2);
- i1++; i2++;
- /* 6 */
- c1 = block[i1]; c2 = block[i2];
- if (c1 != c2) return (c1 > c2);
- s1 = quadrant[i1]; s2 = quadrant[i2];
- if (s1 != s2) return (s1 > s2);
- i1++; i2++;
- /* 7 */
- c1 = block[i1]; c2 = block[i2];
- if (c1 != c2) return (c1 > c2);
- s1 = quadrant[i1]; s2 = quadrant[i2];
- if (s1 != s2) return (s1 > s2);
- i1++; i2++;
- /* 8 */
- c1 = block[i1]; c2 = block[i2];
- if (c1 != c2) return (c1 > c2);
- s1 = quadrant[i1]; s2 = quadrant[i2];
- if (s1 != s2) return (s1 > s2);
- i1++; i2++;
- if (i1 >= nblock) i1 -= nblock;
- if (i2 >= nblock) i2 -= nblock;
- k -= 8;
- (*budget)--;
- }
- while (k >= 0);
- return False;
- }
- /*---------------------------------------------*/
- /*--
- Knuth's increments seem to work better
- than Incerpi-Sedgewick here. Possibly
- because the number of elems to sort is
- usually small, typically <= 20.
- --*/
- static
- Int32 incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280,
- 9841, 29524, 88573, 265720,
- 797161, 2391484 };
- static
- void mainSimpleSort ( UInt32* ptr,
- UChar* block,
- UInt16* quadrant,
- Int32 nblock,
- Int32 lo,
- Int32 hi,
- Int32 d,
- Int32* budget )
- {
- Int32 i, j, h, bigN, hp;
- UInt32 v;
- bigN = hi - lo + 1;
- if (bigN < 2) return;
- hp = 0;
- while (incs[hp] < bigN) hp++;
- hp--;
- for (; hp >= 0; hp--) {
- h = incs[hp];
- i = lo + h;
- while (True) {
- /*-- copy 1 --*/
- if (i > hi) break;
- v = ptr[i];
- j = i;
- while ( mainGtU (
- ptr[j-h]+d, v+d, block, quadrant, nblock, budget
- ) ) {
- ptr[j] = ptr[j-h];
- j = j - h;
- if (j <= (lo + h - 1)) break;
- }
- ptr[j] = v;
- i++;
- /*-- copy 2 --*/
- if (i > hi) break;
- v = ptr[i];
- j = i;
- while ( mainGtU (
- ptr[j-h]+d, v+d, block, quadrant, nblock, budget
- ) ) {
- ptr[j] = ptr[j-h];
- j = j - h;
- if (j <= (lo + h - 1)) break;
- }
- ptr[j] = v;
- i++;
- /*-- copy 3 --*/
- if (i > hi) break;
- v = ptr[i];
- j = i;
- while ( mainGtU (
- ptr[j-h]+d, v+d, block, quadrant, nblock, budget
- ) ) {
- ptr[j] = ptr[j-h];
- j = j - h;
- if (j <= (lo + h - 1)) break;
- }
- ptr[j] = v;
- i++;
- if (*budget < 0) return;
- }
- }
- }
- /*---------------------------------------------*/
- /*--
- The following is an implementation of
- an elegant 3-way quicksort for strings,
- described in a paper "Fast Algorithms for
- Sorting and Searching Strings", by Robert
- Sedgewick and Jon L. Bentley.
- --*/
- #define mswap(zz1, zz2) \
- { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; }
- #define mvswap(zzp1, zzp2, zzn) \
- { \
- Int32 yyp1 = (zzp1); \
- Int32 yyp2 = (zzp2); \
- Int32 yyn = (zzn); \
- while (yyn > 0) { \
- mswap(ptr[yyp1], ptr[yyp2]); \
- yyp1++; yyp2++; yyn--; \
- } \
- }
- static
- __inline__
- UChar mmed3 ( UChar a, UChar b, UChar c )
- {
- UChar t;
- if (a > b) { t = a; a = b; b = t; };
- if (b > c) {
- b = c;
- if (a > b) b = a;
- }
- return b;
- }
- #define mmin(a,b) ((a) < (b)) ? (a) : (b)
- #define mpush(lz,hz,dz) { stackLo[sp] = lz; \
- stackHi[sp] = hz; \
- stackD [sp] = dz; \
- sp++; }
- #define mpop(lz,hz,dz) { sp--; \
- lz = stackLo[sp]; \
- hz = stackHi[sp]; \
- dz = stackD [sp]; }
- #define mnextsize(az) (nextHi[az]-nextLo[az])
- #define mnextswap(az,bz) \
- { Int32 tz; \
- tz = nextLo[az]; nextLo[az] = nextLo[bz]; nextLo[bz] = tz; \
- tz = nextHi[az]; nextHi[az] = nextHi[bz]; nextHi[bz] = tz; \
- tz = nextD [az]; nextD [az] = nextD [bz]; nextD [bz] = tz; }
- #define MAIN_QSORT_SMALL_THRESH 20
- #define MAIN_QSORT_DEPTH_THRESH (BZ_N_RADIX + BZ_N_QSORT)
- #define MAIN_QSORT_STACK_SIZE 100
- static
- void mainQSort3 ( UInt32* ptr,
- UChar* block,
- UInt16* quadrant,
- Int32 nblock,
- Int32 loSt,
- Int32 hiSt,
- Int32 dSt,
- Int32* budget )
- {
- Int32 unLo, unHi, ltLo, gtHi, n, m, med;
- Int32 sp, lo, hi, d;
- Int32 stackLo[MAIN_QSORT_STACK_SIZE];
- Int32 stackHi[MAIN_QSORT_STACK_SIZE];
- Int32 stackD [MAIN_QSORT_STACK_SIZE];
- Int32 nextLo[3];
- Int32 nextHi[3];
- Int32 nextD [3];
- sp = 0;
- mpush ( loSt, hiSt, dSt );
- while (sp > 0) {
- AssertH ( sp < MAIN_QSORT_STACK_SIZE - 2, 1001 );
- mpop ( lo, hi, d );
- if (hi - lo < MAIN_QSORT_SMALL_THRESH ||
- d > MAIN_QSORT_DEPTH_THRESH) {
- mainSimpleSort ( ptr, block, quadrant, nblock, lo, hi, d, budget );
- if (*budget < 0) return;
- continue;
- }
- med = (Int32)
- mmed3 ( block[ptr[ lo ]+d],
- block[ptr[ hi ]+d],
- block[ptr[ (lo+hi)>>1 ]+d] );
- unLo = ltLo = lo;
- unHi = gtHi = hi;
- while (True) {
- while (True) {
- if (unLo > unHi) break;
- n = ((Int32)block[ptr[unLo]+d]) - med;
- if (n == 0) {
- mswap(ptr[unLo], ptr[ltLo]);
- ltLo++; unLo++; continue;
- };
- if (n > 0) break;
- unLo++;
- }
- while (True) {
- if (unLo > unHi) break;
- n = ((Int32)block[ptr[unHi]+d]) - med;
- if (n == 0) {
- mswap(ptr[unHi], ptr[gtHi]);
- gtHi--; unHi--; continue;
- };
- if (n < 0) break;
- unHi--;
- }
- if (unLo > unHi) break;
- mswap(ptr[unLo], ptr[unHi]); unLo++; unHi--;
- }
- AssertD ( unHi == unLo-1, "mainQSort3(2)" );
- if (gtHi < ltLo) {
- mpush(lo, hi, d+1 );
- continue;
- }
- n = mmin(ltLo-lo, unLo-ltLo); mvswap(lo, unLo-n, n);
- m = mmin(hi-gtHi, gtHi-unHi); mvswap(unLo, hi-m+1, m);
- n = lo + unLo - ltLo - 1;
- m = hi - (gtHi - unHi) + 1;
- nextLo[0] = lo; nextHi[0] = n; nextD[0] = d;
- nextLo[1] = m; nextHi[1] = hi; nextD[1] = d;
- nextLo[2] = n+1; nextHi[2] = m-1; nextD[2] = d+1;
- if (mnextsize(0) < mnextsize(1)) mnextswap(0,1);
- if (mnextsize(1) < mnextsize(2)) mnextswap(1,2);
- if (mnextsize(0) < mnextsize(1)) mnextswap(0,1);
- AssertD (mnextsize(0) >= mnextsize(1), "mainQSort3(8)" );
- AssertD (mnextsize(1) >= mnextsize(2), "mainQSort3(9)" );
- mpush (nextLo[0], nextHi[0], nextD[0]);
- mpush (nextLo[1], nextHi[1], nextD[1]);
- mpush (nextLo[2], nextHi[2], nextD[2]);
- }
- }
- #undef mswap
- #undef mvswap
- #undef mpush
- #undef mpop
- #undef mmin
- #undef mnextsize
- #undef mnextswap
- #undef MAIN_QSORT_SMALL_THRESH
- #undef MAIN_QSORT_DEPTH_THRESH
- #undef MAIN_QSORT_STACK_SIZE
- /*---------------------------------------------*/
- /* Pre:
- nblock > N_OVERSHOOT
- block32 exists for [0 .. nblock-1 +N_OVERSHOOT]
- ((UChar*)block32) [0 .. nblock-1] holds block
- ptr exists for [0 .. nblock-1]
- Post:
- ((UChar*)block32) [0 .. nblock-1] holds block
- All other areas of block32 destroyed
- ftab [0 .. 65536 ] destroyed
- ptr [0 .. nblock-1] holds sorted order
- if (*budget < 0), sorting was abandoned
- */
- #define BIGFREQ(b) (ftab[((b)+1) << 8] - ftab[(b) << 8])
- #define SETMASK (1 << 21)
- #define CLEARMASK (~(SETMASK))
- static
- void mainSort ( UInt32* ptr,
- UChar* block,
- UInt16* quadrant,
- UInt32* ftab,
- Int32 nblock,
- Int32 verb,
- Int32* budget )
- {
- Int32 i, j, k, ss, sb;
- Int32 runningOrder[256];
- Bool bigDone[256];
- Int32 copyStart[256];
- Int32 copyEnd [256];
- UChar c1;
- Int32 numQSorted;
- UInt16 s;
- if (verb >= 4) VPrintf0 ( " main sort initialise ...\n" );
- /*-- set up the 2-byte frequency table --*/
- for (i = 65536; i >= 0; i--) ftab[i] = 0;
- j = block[0] << 8;
- i = nblock-1;
- for (; i >= 3; i -= 4) {
- quadrant[i] = 0;
- j = (j >> 8) | ( ((UInt16)block[i]) << 8);
- ftab[j]++;
- quadrant[i-1] = 0;
- j = (j >> 8) | ( ((UInt16)block[i-1]) << 8);
- ftab[j]++;
- quadrant[i-2] = 0;
- j = (j >> 8) | ( ((UInt16)block[i-2]) << 8);
- ftab[j]++;
- quadrant[i-3] = 0;
- j = (j >> 8) | ( ((UInt16)block[i-3]) << 8);
- ftab[j]++;
- }
- for (; i >= 0; i--) {
- quadrant[i] = 0;
- j = (j >> 8) | ( ((UInt16)block[i]) << 8);
- ftab[j]++;
- }
- /*-- (emphasises close relationship of block & quadrant) --*/
- for (i = 0; i < BZ_N_OVERSHOOT; i++) {
- block [nblock+i] = block[i];
- quadrant[nblock+i] = 0;
- }
- if (verb >= 4) VPrintf0 ( " bucket sorting ...\n" );
- /*-- Complete the initial radix sort --*/
- for (i = 1; i <= 65536; i++) ftab[i] += ftab[i-1];
- s = block[0] << 8;
- i = nblock-1;
- for (; i >= 3; i -= 4) {
- s = (s >> 8) | (block[i] << 8);
- j = ftab[s] -1;
- ftab[s] = j;
- ptr[j] = i;
- s = (s >> 8) | (block[i-1] << 8);
- j = ftab[s] -1;
- ftab[s] = j;
- ptr[j] = i-1;
- s = (s >> 8) | (block[i-2] << 8);
- j = ftab[s] -1;
- ftab[s] = j;
- ptr[j] = i-2;
- s = (s >> 8) | (block[i-3] << 8);
- j = ftab[s] -1;
- ftab[s] = j;
- ptr[j] = i-3;
- }
- for (; i >= 0; i--) {
- s = (s >> 8) | (block[i] << 8);
- j = ftab[s] -1;
- ftab[s] = j;
- ptr[j] = i;
- }
- /*--
- Now ftab contains the first loc of every small bucket.
- Calculate the running order, from smallest to largest
- big bucket.
- --*/
- for (i = 0; i <= 255; i++) {
- bigDone [i] = False;
- runningOrder[i] = i;
- }
- {
- Int32 vv;
- Int32 h = 1;
- do h = 3 * h + 1; while (h <= 256);
- do {
- h = h / 3;
- for (i = h; i <= 255; i++) {
- vv = runningOrder[i];
- j = i;
- while ( BIGFREQ(runningOrder[j-h]) > BIGFREQ(vv) ) {
- runningOrder[j] = runningOrder[j-h];
- j = j - h;
- if (j <= (h - 1)) goto zero;
- }
- zero:
- runningOrder[j] = vv;
- }
- } while (h != 1);
- }
- /*--
- The main sorting loop.
- --*/
- numQSorted = 0;
- for (i = 0; i <= 255; i++) {
- /*--
- Process big buckets, starting with the least full.
- Basically this is a 3-step process in which we call
- mainQSort3 to sort the small buckets [ss, j], but
- also make a big effort to avoid the calls if we can.
- --*/
- ss = runningOrder[i];
- /*--
- Step 1:
- Complete the big bucket [ss] by quicksorting
- any unsorted small buckets [ss, j], for j != ss.
- Hopefully previous pointer-scanning phases have already
- completed many of the small buckets [ss, j], so
- we don't have to sort them at all.
- --*/
- for (j = 0; j <= 255; j++) {
- if (j != ss) {
- sb = (ss << 8) + j;
- if ( ! (ftab[sb] & SETMASK) ) {
- Int32 lo = ftab[sb] & CLEARMASK;
- Int32 hi = (ftab[sb+1] & CLEARMASK) - 1;
- if (hi > lo) {
- if (verb >= 4)
- VPrintf4 ( " qsort [0x%x, 0x%x] "
- "done %d this %d\n",
- ss, j, numQSorted, hi - lo + 1 );
- mainQSort3 (
- ptr, block, quadrant, nblock,
- lo, hi, BZ_N_RADIX, budget
- );
- numQSorted += (hi - lo + 1);
- if (*budget < 0) return;
- }
- }
- ftab[sb] |= SETMASK;
- }
- }
- AssertH ( !bigDone[ss], 1006 );
- /*--
- Step 2:
- Now scan this big bucket [ss] so as to synthesise the
- sorted order for small buckets [t, ss] for all t,
- including, magically, the bucket [ss,ss] too.
- This will avoid doing Real Work in subsequent Step 1's.
- --*/
- {
- for (j = 0; j <= 255; j++) {
- copyStart[j] = ftab[(j << 8) + ss] & CLEARMASK;
- copyEnd [j] = (ftab[(j << 8) + ss + 1] & CLEARMASK) - 1;
- }
- for (j = ftab[ss << 8] & CLEARMASK; j < copyStart[ss]; j++) {
- k = ptr[j]-1; if (k < 0) k += nblock;
- c1 = block[k];
- if (!bigDone[c1])
- ptr[ copyStart[c1]++ ] = k;
- }
- for (j = (ftab[(ss+1) << 8] & CLEARMASK) - 1; j > copyEnd[ss]; j--) {
- k = ptr[j]-1; if (k < 0) k += nblock;
- c1 = block[k];
- if (!bigDone[c1])
- ptr[ copyEnd[c1]-- ] = k;
- }
- }
- AssertH ( (copyStart[ss]-1 == copyEnd[ss])
- ||
- /* Extremely rare case missing in bzip2-1.0.0 and 1.0.1.
- Necessity for this case is demonstrated by compressing
- a sequence of approximately 48.5 million of character
- 251; 1.0.0/1.0.1 will then die here. */
- (copyStart[ss] == 0 && copyEnd[ss] == nblock-1),
- 1007 )
- for (j = 0; j <= 255; j++) ftab[(j << 8) + ss] |= SETMASK;
- /*--
- Step 3:
- The [ss] big bucket is now done. Record this fact,
- and update the quadrant descriptors. Remember to
- update quadrants in the overshoot area too, if
- necessary. The "if (i < 255)" test merely skips
- this updating for the last bucket processed, since
- updating for the last bucket is pointless.
- The quadrant array provides a way to incrementally
- cache sort orderings, as they appear, so as to
- make subsequent comparisons in fullGtU() complete
- faster. For repetitive blocks this makes a big
- difference (but not big enough to be able to avoid
- the fallback sorting mechanism, exponential radix sort).
- The precise meaning is: at all times:
- for 0 <= i < nblock and 0 <= j <= nblock
- if block[i] != block[j],
- then the relative values of quadrant[i] and
- quadrant[j] are meaningless.
- else {
- if quadrant[i] < quadrant[j]
- then the string starting at i lexicographically
- precedes the string starting at j
- else if quadrant[i] > quadrant[j]
- then the string starting at j lexicographically
- precedes the string starting at i
- else
- the relative ordering of the strings starting
- at i and j has not yet been determined.
- }
- --*/
- bigDone[ss] = True;
- if (i < 255) {
- Int32 bbStart = ftab[ss << 8] & CLEARMASK;
- Int32 bbSize = (ftab[(ss+1) << 8] & CLEARMASK) - bbStart;
- Int32 shifts = 0;
- while ((bbSize >> shifts) > 65534) shifts++;
- for (j = bbSize-1; j >= 0; j--) {
- Int32 a2update = ptr[bbStart + j];
- UInt16 qVal = (UInt16)(j >> shifts);
- quadrant[a2update] = qVal;
- if (a2update < BZ_N_OVERSHOOT)
- quadrant[a2update + nblock] = qVal;
- }
- AssertH ( ((bbSize-1) >> shifts) <= 65535, 1002 );
- }
- }
- if (verb >= 4)
- VPrintf3 ( " %d pointers, %d sorted, %d scanned\n",
- nblock, numQSorted, nblock - numQSorted );
- }
- #undef BIGFREQ
- #undef SETMASK
- #undef CLEARMASK
- /*---------------------------------------------*/
- /* Pre:
- nblock > 0
- arr2 exists for [0 .. nblock-1 +N_OVERSHOOT]
- ((UChar*)arr2) [0 .. nblock-1] holds block
- arr1 exists for [0 .. nblock-1]
- Post:
- ((UChar*)arr2) [0 .. nblock-1] holds block
- All other areas of block destroyed
- ftab [ 0 .. 65536 ] destroyed
- arr1 [0 .. nblock-1] holds sorted order
- */
- void BZ2_blockSort ( EState* s )
- {
- UInt32* ptr = s->ptr;
- UChar* block = s->block;
- UInt32* ftab = s->ftab;
- Int32 nblock = s->nblock;
- Int32 verb = s->verbosity;
- Int32 wfact = s->workFactor;
- UInt16* quadrant;
- Int32 budget;
- Int32 budgetInit;
- Int32 i;
- if (nblock < 10000) {
- fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb );
- } else {
- /* Calculate the location for quadrant, remembering to get
- the alignment right. Assumes that &(block[0]) is at least
- 2-byte aligned -- this should be ok since block is really
- the first section of arr2.
- */
- i = nblock+BZ_N_OVERSHOOT;
- if (i & 1) i++;
- quadrant = (UInt16*)(&(block[i]));
- /* (wfact-1) / 3 puts the default-factor-30
- transition point at very roughly the same place as
- with v0.1 and v0.9.0.
- Not that it particularly matters any more, since the
- resulting compressed stream is now the same regardless
- of whether or not we use the main sort or fallback sort.
- */
- if (wfact < 1 ) wfact = 1;
- if (wfact > 100) wfact = 100;
- budgetInit = nblock * ((wfact-1) / 3);
- budget = budgetInit;
- mainSort ( ptr, block, quadrant, ftab, nblock, verb, &budget );
- if (verb >= 3)
- VPrintf3 ( " %d work, %d block, ratio %5.2f\n",
- budgetInit - budget,
- nblock,
- (float)(budgetInit - budget) /
- (float)(nblock==0 ? 1 : nblock) );
- if (budget < 0) {
- if (verb >= 2)
- VPrintf0 ( " too repetitive; using fallback"
- " sorting algorithm\n" );
- fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb );
- }
- }
- s->origPtr = -1;
- for (i = 0; i < s->nblock; i++)
- if (ptr[i] == 0)
- { s->origPtr = i; break; };
- AssertH( s->origPtr != -1, 1003 );
- }
- /*-------------------------------------------------------------*/
- /*--- end blocksort.c ---*/
- /*-------------------------------------------------------------*/
|