divsufsort.c 53 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913
  1. /*
  2. * divsufsort.c for libdivsufsort-lite
  3. * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
  4. *
  5. * Permission is hereby granted, free of charge, to any person
  6. * obtaining a copy of this software and associated documentation
  7. * files (the "Software"), to deal in the Software without
  8. * restriction, including without limitation the rights to use,
  9. * copy, modify, merge, publish, distribute, sublicense, and/or sell
  10. * copies of the Software, and to permit persons to whom the
  11. * Software is furnished to do so, subject to the following
  12. * conditions:
  13. *
  14. * The above copyright notice and this permission notice shall be
  15. * included in all copies or substantial portions of the Software.
  16. *
  17. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  18. * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
  19. * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  20. * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
  21. * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
  22. * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  23. * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  24. * OTHER DEALINGS IN THE SOFTWARE.
  25. */
  26. /*- Compiler specifics -*/
  27. #ifdef __clang__
  28. #pragma clang diagnostic ignored "-Wshorten-64-to-32"
  29. #endif
  30. #if defined(_MSC_VER)
  31. # pragma warning(disable : 4244)
  32. # pragma warning(disable : 4127) /* C4127 : Condition expression is constant */
  33. #endif
  34. /*- Dependencies -*/
  35. #include <assert.h>
  36. #include <stdio.h>
  37. #include <stdlib.h>
  38. #include "divsufsort.h"
  39. /*- Constants -*/
  40. #if defined(INLINE)
  41. # undef INLINE
  42. #endif
  43. #if !defined(INLINE)
  44. # define INLINE __inline
  45. #endif
  46. #if defined(ALPHABET_SIZE) && (ALPHABET_SIZE < 1)
  47. # undef ALPHABET_SIZE
  48. #endif
  49. #if !defined(ALPHABET_SIZE)
  50. # define ALPHABET_SIZE (256)
  51. #endif
  52. #define BUCKET_A_SIZE (ALPHABET_SIZE)
  53. #define BUCKET_B_SIZE (ALPHABET_SIZE * ALPHABET_SIZE)
  54. #if defined(SS_INSERTIONSORT_THRESHOLD)
  55. # if SS_INSERTIONSORT_THRESHOLD < 1
  56. # undef SS_INSERTIONSORT_THRESHOLD
  57. # define SS_INSERTIONSORT_THRESHOLD (1)
  58. # endif
  59. #else
  60. # define SS_INSERTIONSORT_THRESHOLD (8)
  61. #endif
  62. #if defined(SS_BLOCKSIZE)
  63. # if SS_BLOCKSIZE < 0
  64. # undef SS_BLOCKSIZE
  65. # define SS_BLOCKSIZE (0)
  66. # elif 32768 <= SS_BLOCKSIZE
  67. # undef SS_BLOCKSIZE
  68. # define SS_BLOCKSIZE (32767)
  69. # endif
  70. #else
  71. # define SS_BLOCKSIZE (1024)
  72. #endif
  73. /* minstacksize = log(SS_BLOCKSIZE) / log(3) * 2 */
  74. #if SS_BLOCKSIZE == 0
  75. # define SS_MISORT_STACKSIZE (96)
  76. #elif SS_BLOCKSIZE <= 4096
  77. # define SS_MISORT_STACKSIZE (16)
  78. #else
  79. # define SS_MISORT_STACKSIZE (24)
  80. #endif
  81. #define SS_SMERGE_STACKSIZE (32)
  82. #define TR_INSERTIONSORT_THRESHOLD (8)
  83. #define TR_STACKSIZE (64)
  84. /*- Macros -*/
  85. #ifndef SWAP
  86. # define SWAP(_a, _b) do { t = (_a); (_a) = (_b); (_b) = t; } while(0)
  87. #endif /* SWAP */
  88. #ifndef MIN
  89. # define MIN(_a, _b) (((_a) < (_b)) ? (_a) : (_b))
  90. #endif /* MIN */
  91. #ifndef MAX
  92. # define MAX(_a, _b) (((_a) > (_b)) ? (_a) : (_b))
  93. #endif /* MAX */
  94. #define STACK_PUSH(_a, _b, _c, _d)\
  95. do {\
  96. assert(ssize < STACK_SIZE);\
  97. stack[ssize].a = (_a), stack[ssize].b = (_b),\
  98. stack[ssize].c = (_c), stack[ssize++].d = (_d);\
  99. } while(0)
  100. #define STACK_PUSH5(_a, _b, _c, _d, _e)\
  101. do {\
  102. assert(ssize < STACK_SIZE);\
  103. stack[ssize].a = (_a), stack[ssize].b = (_b),\
  104. stack[ssize].c = (_c), stack[ssize].d = (_d), stack[ssize++].e = (_e);\
  105. } while(0)
  106. #define STACK_POP(_a, _b, _c, _d)\
  107. do {\
  108. assert(0 <= ssize);\
  109. if(ssize == 0) { return; }\
  110. (_a) = stack[--ssize].a, (_b) = stack[ssize].b,\
  111. (_c) = stack[ssize].c, (_d) = stack[ssize].d;\
  112. } while(0)
  113. #define STACK_POP5(_a, _b, _c, _d, _e)\
  114. do {\
  115. assert(0 <= ssize);\
  116. if(ssize == 0) { return; }\
  117. (_a) = stack[--ssize].a, (_b) = stack[ssize].b,\
  118. (_c) = stack[ssize].c, (_d) = stack[ssize].d, (_e) = stack[ssize].e;\
  119. } while(0)
  120. #define BUCKET_A(_c0) bucket_A[(_c0)]
  121. #if ALPHABET_SIZE == 256
  122. #define BUCKET_B(_c0, _c1) (bucket_B[((_c1) << 8) | (_c0)])
  123. #define BUCKET_BSTAR(_c0, _c1) (bucket_B[((_c0) << 8) | (_c1)])
  124. #else
  125. #define BUCKET_B(_c0, _c1) (bucket_B[(_c1) * ALPHABET_SIZE + (_c0)])
  126. #define BUCKET_BSTAR(_c0, _c1) (bucket_B[(_c0) * ALPHABET_SIZE + (_c1)])
  127. #endif
  128. /*- Private Functions -*/
  129. static const int lg_table[256]= {
  130. -1,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
  131. 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
  132. 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
  133. 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
  134. 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
  135. 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
  136. 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
  137. 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7
  138. };
  139. #if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE)
  140. static INLINE
  141. int
  142. ss_ilg(int n) {
  143. #if SS_BLOCKSIZE == 0
  144. return (n & 0xffff0000) ?
  145. ((n & 0xff000000) ?
  146. 24 + lg_table[(n >> 24) & 0xff] :
  147. 16 + lg_table[(n >> 16) & 0xff]) :
  148. ((n & 0x0000ff00) ?
  149. 8 + lg_table[(n >> 8) & 0xff] :
  150. 0 + lg_table[(n >> 0) & 0xff]);
  151. #elif SS_BLOCKSIZE < 256
  152. return lg_table[n];
  153. #else
  154. return (n & 0xff00) ?
  155. 8 + lg_table[(n >> 8) & 0xff] :
  156. 0 + lg_table[(n >> 0) & 0xff];
  157. #endif
  158. }
  159. #endif /* (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) */
  160. #if SS_BLOCKSIZE != 0
  161. static const int sqq_table[256] = {
  162. 0, 16, 22, 27, 32, 35, 39, 42, 45, 48, 50, 53, 55, 57, 59, 61,
  163. 64, 65, 67, 69, 71, 73, 75, 76, 78, 80, 81, 83, 84, 86, 87, 89,
  164. 90, 91, 93, 94, 96, 97, 98, 99, 101, 102, 103, 104, 106, 107, 108, 109,
  165. 110, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126,
  166. 128, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142,
  167. 143, 144, 144, 145, 146, 147, 148, 149, 150, 150, 151, 152, 153, 154, 155, 155,
  168. 156, 157, 158, 159, 160, 160, 161, 162, 163, 163, 164, 165, 166, 167, 167, 168,
  169. 169, 170, 170, 171, 172, 173, 173, 174, 175, 176, 176, 177, 178, 178, 179, 180,
  170. 181, 181, 182, 183, 183, 184, 185, 185, 186, 187, 187, 188, 189, 189, 190, 191,
  171. 192, 192, 193, 193, 194, 195, 195, 196, 197, 197, 198, 199, 199, 200, 201, 201,
  172. 202, 203, 203, 204, 204, 205, 206, 206, 207, 208, 208, 209, 209, 210, 211, 211,
  173. 212, 212, 213, 214, 214, 215, 215, 216, 217, 217, 218, 218, 219, 219, 220, 221,
  174. 221, 222, 222, 223, 224, 224, 225, 225, 226, 226, 227, 227, 228, 229, 229, 230,
  175. 230, 231, 231, 232, 232, 233, 234, 234, 235, 235, 236, 236, 237, 237, 238, 238,
  176. 239, 240, 240, 241, 241, 242, 242, 243, 243, 244, 244, 245, 245, 246, 246, 247,
  177. 247, 248, 248, 249, 249, 250, 250, 251, 251, 252, 252, 253, 253, 254, 254, 255
  178. };
  179. static INLINE
  180. int
  181. ss_isqrt(int x) {
  182. int y, e;
  183. if(x >= (SS_BLOCKSIZE * SS_BLOCKSIZE)) { return SS_BLOCKSIZE; }
  184. e = (x & 0xffff0000) ?
  185. ((x & 0xff000000) ?
  186. 24 + lg_table[(x >> 24) & 0xff] :
  187. 16 + lg_table[(x >> 16) & 0xff]) :
  188. ((x & 0x0000ff00) ?
  189. 8 + lg_table[(x >> 8) & 0xff] :
  190. 0 + lg_table[(x >> 0) & 0xff]);
  191. if(e >= 16) {
  192. y = sqq_table[x >> ((e - 6) - (e & 1))] << ((e >> 1) - 7);
  193. if(e >= 24) { y = (y + 1 + x / y) >> 1; }
  194. y = (y + 1 + x / y) >> 1;
  195. } else if(e >= 8) {
  196. y = (sqq_table[x >> ((e - 6) - (e & 1))] >> (7 - (e >> 1))) + 1;
  197. } else {
  198. return sqq_table[x] >> 4;
  199. }
  200. return (x < (y * y)) ? y - 1 : y;
  201. }
  202. #endif /* SS_BLOCKSIZE != 0 */
  203. /*---------------------------------------------------------------------------*/
  204. /* Compares two suffixes. */
  205. static INLINE
  206. int
  207. ss_compare(const unsigned char *T,
  208. const int *p1, const int *p2,
  209. int depth) {
  210. const unsigned char *U1, *U2, *U1n, *U2n;
  211. for(U1 = T + depth + *p1,
  212. U2 = T + depth + *p2,
  213. U1n = T + *(p1 + 1) + 2,
  214. U2n = T + *(p2 + 1) + 2;
  215. (U1 < U1n) && (U2 < U2n) && (*U1 == *U2);
  216. ++U1, ++U2) {
  217. }
  218. return U1 < U1n ?
  219. (U2 < U2n ? *U1 - *U2 : 1) :
  220. (U2 < U2n ? -1 : 0);
  221. }
  222. /*---------------------------------------------------------------------------*/
  223. #if (SS_BLOCKSIZE != 1) && (SS_INSERTIONSORT_THRESHOLD != 1)
  224. /* Insertionsort for small size groups */
  225. static
  226. void
  227. ss_insertionsort(const unsigned char *T, const int *PA,
  228. int *first, int *last, int depth) {
  229. int *i, *j;
  230. int t;
  231. int r;
  232. for(i = last - 2; first <= i; --i) {
  233. for(t = *i, j = i + 1; 0 < (r = ss_compare(T, PA + t, PA + *j, depth));) {
  234. do { *(j - 1) = *j; } while((++j < last) && (*j < 0));
  235. if(last <= j) { break; }
  236. }
  237. if(r == 0) { *j = ~*j; }
  238. *(j - 1) = t;
  239. }
  240. }
  241. #endif /* (SS_BLOCKSIZE != 1) && (SS_INSERTIONSORT_THRESHOLD != 1) */
  242. /*---------------------------------------------------------------------------*/
  243. #if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE)
  244. static INLINE
  245. void
  246. ss_fixdown(const unsigned char *Td, const int *PA,
  247. int *SA, int i, int size) {
  248. int j, k;
  249. int v;
  250. int c, d, e;
  251. for(v = SA[i], c = Td[PA[v]]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) {
  252. d = Td[PA[SA[k = j++]]];
  253. if(d < (e = Td[PA[SA[j]]])) { k = j; d = e; }
  254. if(d <= c) { break; }
  255. }
  256. SA[i] = v;
  257. }
  258. /* Simple top-down heapsort. */
  259. static
  260. void
  261. ss_heapsort(const unsigned char *Td, const int *PA, int *SA, int size) {
  262. int i, m;
  263. int t;
  264. m = size;
  265. if((size % 2) == 0) {
  266. m--;
  267. if(Td[PA[SA[m / 2]]] < Td[PA[SA[m]]]) { SWAP(SA[m], SA[m / 2]); }
  268. }
  269. for(i = m / 2 - 1; 0 <= i; --i) { ss_fixdown(Td, PA, SA, i, m); }
  270. if((size % 2) == 0) { SWAP(SA[0], SA[m]); ss_fixdown(Td, PA, SA, 0, m); }
  271. for(i = m - 1; 0 < i; --i) {
  272. t = SA[0], SA[0] = SA[i];
  273. ss_fixdown(Td, PA, SA, 0, i);
  274. SA[i] = t;
  275. }
  276. }
  277. /*---------------------------------------------------------------------------*/
  278. /* Returns the median of three elements. */
  279. static INLINE
  280. int *
  281. ss_median3(const unsigned char *Td, const int *PA,
  282. int *v1, int *v2, int *v3) {
  283. int *t;
  284. if(Td[PA[*v1]] > Td[PA[*v2]]) { SWAP(v1, v2); }
  285. if(Td[PA[*v2]] > Td[PA[*v3]]) {
  286. if(Td[PA[*v1]] > Td[PA[*v3]]) { return v1; }
  287. else { return v3; }
  288. }
  289. return v2;
  290. }
  291. /* Returns the median of five elements. */
  292. static INLINE
  293. int *
  294. ss_median5(const unsigned char *Td, const int *PA,
  295. int *v1, int *v2, int *v3, int *v4, int *v5) {
  296. int *t;
  297. if(Td[PA[*v2]] > Td[PA[*v3]]) { SWAP(v2, v3); }
  298. if(Td[PA[*v4]] > Td[PA[*v5]]) { SWAP(v4, v5); }
  299. if(Td[PA[*v2]] > Td[PA[*v4]]) { SWAP(v2, v4); SWAP(v3, v5); }
  300. if(Td[PA[*v1]] > Td[PA[*v3]]) { SWAP(v1, v3); }
  301. if(Td[PA[*v1]] > Td[PA[*v4]]) { SWAP(v1, v4); SWAP(v3, v5); }
  302. if(Td[PA[*v3]] > Td[PA[*v4]]) { return v4; }
  303. return v3;
  304. }
  305. /* Returns the pivot element. */
  306. static INLINE
  307. int *
  308. ss_pivot(const unsigned char *Td, const int *PA, int *first, int *last) {
  309. int *middle;
  310. int t;
  311. t = last - first;
  312. middle = first + t / 2;
  313. if(t <= 512) {
  314. if(t <= 32) {
  315. return ss_median3(Td, PA, first, middle, last - 1);
  316. } else {
  317. t >>= 2;
  318. return ss_median5(Td, PA, first, first + t, middle, last - 1 - t, last - 1);
  319. }
  320. }
  321. t >>= 3;
  322. first = ss_median3(Td, PA, first, first + t, first + (t << 1));
  323. middle = ss_median3(Td, PA, middle - t, middle, middle + t);
  324. last = ss_median3(Td, PA, last - 1 - (t << 1), last - 1 - t, last - 1);
  325. return ss_median3(Td, PA, first, middle, last);
  326. }
  327. /*---------------------------------------------------------------------------*/
  328. /* Binary partition for substrings. */
  329. static INLINE
  330. int *
  331. ss_partition(const int *PA,
  332. int *first, int *last, int depth) {
  333. int *a, *b;
  334. int t;
  335. for(a = first - 1, b = last;;) {
  336. for(; (++a < b) && ((PA[*a] + depth) >= (PA[*a + 1] + 1));) { *a = ~*a; }
  337. for(; (a < --b) && ((PA[*b] + depth) < (PA[*b + 1] + 1));) { }
  338. if(b <= a) { break; }
  339. t = ~*b;
  340. *b = *a;
  341. *a = t;
  342. }
  343. if(first < a) { *first = ~*first; }
  344. return a;
  345. }
  346. /* Multikey introsort for medium size groups. */
  347. static
  348. void
  349. ss_mintrosort(const unsigned char *T, const int *PA,
  350. int *first, int *last,
  351. int depth) {
  352. #define STACK_SIZE SS_MISORT_STACKSIZE
  353. struct { int *a, *b, c; int d; } stack[STACK_SIZE];
  354. const unsigned char *Td;
  355. int *a, *b, *c, *d, *e, *f;
  356. int s, t;
  357. int ssize;
  358. int limit;
  359. int v, x = 0;
  360. for(ssize = 0, limit = ss_ilg(last - first);;) {
  361. if((last - first) <= SS_INSERTIONSORT_THRESHOLD) {
  362. #if 1 < SS_INSERTIONSORT_THRESHOLD
  363. if(1 < (last - first)) { ss_insertionsort(T, PA, first, last, depth); }
  364. #endif
  365. STACK_POP(first, last, depth, limit);
  366. continue;
  367. }
  368. Td = T + depth;
  369. if(limit-- == 0) { ss_heapsort(Td, PA, first, last - first); }
  370. if(limit < 0) {
  371. for(a = first + 1, v = Td[PA[*first]]; a < last; ++a) {
  372. if((x = Td[PA[*a]]) != v) {
  373. if(1 < (a - first)) { break; }
  374. v = x;
  375. first = a;
  376. }
  377. }
  378. if(Td[PA[*first] - 1] < v) {
  379. first = ss_partition(PA, first, a, depth);
  380. }
  381. if((a - first) <= (last - a)) {
  382. if(1 < (a - first)) {
  383. STACK_PUSH(a, last, depth, -1);
  384. last = a, depth += 1, limit = ss_ilg(a - first);
  385. } else {
  386. first = a, limit = -1;
  387. }
  388. } else {
  389. if(1 < (last - a)) {
  390. STACK_PUSH(first, a, depth + 1, ss_ilg(a - first));
  391. first = a, limit = -1;
  392. } else {
  393. last = a, depth += 1, limit = ss_ilg(a - first);
  394. }
  395. }
  396. continue;
  397. }
  398. /* choose pivot */
  399. a = ss_pivot(Td, PA, first, last);
  400. v = Td[PA[*a]];
  401. SWAP(*first, *a);
  402. /* partition */
  403. for(b = first; (++b < last) && ((x = Td[PA[*b]]) == v);) { }
  404. if(((a = b) < last) && (x < v)) {
  405. for(; (++b < last) && ((x = Td[PA[*b]]) <= v);) {
  406. if(x == v) { SWAP(*b, *a); ++a; }
  407. }
  408. }
  409. for(c = last; (b < --c) && ((x = Td[PA[*c]]) == v);) { }
  410. if((b < (d = c)) && (x > v)) {
  411. for(; (b < --c) && ((x = Td[PA[*c]]) >= v);) {
  412. if(x == v) { SWAP(*c, *d); --d; }
  413. }
  414. }
  415. for(; b < c;) {
  416. SWAP(*b, *c);
  417. for(; (++b < c) && ((x = Td[PA[*b]]) <= v);) {
  418. if(x == v) { SWAP(*b, *a); ++a; }
  419. }
  420. for(; (b < --c) && ((x = Td[PA[*c]]) >= v);) {
  421. if(x == v) { SWAP(*c, *d); --d; }
  422. }
  423. }
  424. if(a <= d) {
  425. c = b - 1;
  426. if((s = a - first) > (t = b - a)) { s = t; }
  427. for(e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
  428. if((s = d - c) > (t = last - d - 1)) { s = t; }
  429. for(e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
  430. a = first + (b - a), c = last - (d - c);
  431. b = (v <= Td[PA[*a] - 1]) ? a : ss_partition(PA, a, c, depth);
  432. if((a - first) <= (last - c)) {
  433. if((last - c) <= (c - b)) {
  434. STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
  435. STACK_PUSH(c, last, depth, limit);
  436. last = a;
  437. } else if((a - first) <= (c - b)) {
  438. STACK_PUSH(c, last, depth, limit);
  439. STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
  440. last = a;
  441. } else {
  442. STACK_PUSH(c, last, depth, limit);
  443. STACK_PUSH(first, a, depth, limit);
  444. first = b, last = c, depth += 1, limit = ss_ilg(c - b);
  445. }
  446. } else {
  447. if((a - first) <= (c - b)) {
  448. STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
  449. STACK_PUSH(first, a, depth, limit);
  450. first = c;
  451. } else if((last - c) <= (c - b)) {
  452. STACK_PUSH(first, a, depth, limit);
  453. STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
  454. first = c;
  455. } else {
  456. STACK_PUSH(first, a, depth, limit);
  457. STACK_PUSH(c, last, depth, limit);
  458. first = b, last = c, depth += 1, limit = ss_ilg(c - b);
  459. }
  460. }
  461. } else {
  462. limit += 1;
  463. if(Td[PA[*first] - 1] < v) {
  464. first = ss_partition(PA, first, last, depth);
  465. limit = ss_ilg(last - first);
  466. }
  467. depth += 1;
  468. }
  469. }
  470. #undef STACK_SIZE
  471. }
  472. #endif /* (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) */
  473. /*---------------------------------------------------------------------------*/
  474. #if SS_BLOCKSIZE != 0
  475. static INLINE
  476. void
  477. ss_blockswap(int *a, int *b, int n) {
  478. int t;
  479. for(; 0 < n; --n, ++a, ++b) {
  480. t = *a, *a = *b, *b = t;
  481. }
  482. }
  483. static INLINE
  484. void
  485. ss_rotate(int *first, int *middle, int *last) {
  486. int *a, *b, t;
  487. int l, r;
  488. l = middle - first, r = last - middle;
  489. for(; (0 < l) && (0 < r);) {
  490. if(l == r) { ss_blockswap(first, middle, l); break; }
  491. if(l < r) {
  492. a = last - 1, b = middle - 1;
  493. t = *a;
  494. do {
  495. *a-- = *b, *b-- = *a;
  496. if(b < first) {
  497. *a = t;
  498. last = a;
  499. if((r -= l + 1) <= l) { break; }
  500. a -= 1, b = middle - 1;
  501. t = *a;
  502. }
  503. } while(1);
  504. } else {
  505. a = first, b = middle;
  506. t = *a;
  507. do {
  508. *a++ = *b, *b++ = *a;
  509. if(last <= b) {
  510. *a = t;
  511. first = a + 1;
  512. if((l -= r + 1) <= r) { break; }
  513. a += 1, b = middle;
  514. t = *a;
  515. }
  516. } while(1);
  517. }
  518. }
  519. }
  520. /*---------------------------------------------------------------------------*/
  521. static
  522. void
  523. ss_inplacemerge(const unsigned char *T, const int *PA,
  524. int *first, int *middle, int *last,
  525. int depth) {
  526. const int *p;
  527. int *a, *b;
  528. int len, half;
  529. int q, r;
  530. int x;
  531. for(;;) {
  532. if(*(last - 1) < 0) { x = 1; p = PA + ~*(last - 1); }
  533. else { x = 0; p = PA + *(last - 1); }
  534. for(a = first, len = middle - first, half = len >> 1, r = -1;
  535. 0 < len;
  536. len = half, half >>= 1) {
  537. b = a + half;
  538. q = ss_compare(T, PA + ((0 <= *b) ? *b : ~*b), p, depth);
  539. if(q < 0) {
  540. a = b + 1;
  541. half -= (len & 1) ^ 1;
  542. } else {
  543. r = q;
  544. }
  545. }
  546. if(a < middle) {
  547. if(r == 0) { *a = ~*a; }
  548. ss_rotate(a, middle, last);
  549. last -= middle - a;
  550. middle = a;
  551. if(first == middle) { break; }
  552. }
  553. --last;
  554. if(x != 0) { while(*--last < 0) { } }
  555. if(middle == last) { break; }
  556. }
  557. }
  558. /*---------------------------------------------------------------------------*/
  559. /* Merge-forward with internal buffer. */
  560. static
  561. void
  562. ss_mergeforward(const unsigned char *T, const int *PA,
  563. int *first, int *middle, int *last,
  564. int *buf, int depth) {
  565. int *a, *b, *c, *bufend;
  566. int t;
  567. int r;
  568. bufend = buf + (middle - first) - 1;
  569. ss_blockswap(buf, first, middle - first);
  570. for(t = *(a = first), b = buf, c = middle;;) {
  571. r = ss_compare(T, PA + *b, PA + *c, depth);
  572. if(r < 0) {
  573. do {
  574. *a++ = *b;
  575. if(bufend <= b) { *bufend = t; return; }
  576. *b++ = *a;
  577. } while(*b < 0);
  578. } else if(r > 0) {
  579. do {
  580. *a++ = *c, *c++ = *a;
  581. if(last <= c) {
  582. while(b < bufend) { *a++ = *b, *b++ = *a; }
  583. *a = *b, *b = t;
  584. return;
  585. }
  586. } while(*c < 0);
  587. } else {
  588. *c = ~*c;
  589. do {
  590. *a++ = *b;
  591. if(bufend <= b) { *bufend = t; return; }
  592. *b++ = *a;
  593. } while(*b < 0);
  594. do {
  595. *a++ = *c, *c++ = *a;
  596. if(last <= c) {
  597. while(b < bufend) { *a++ = *b, *b++ = *a; }
  598. *a = *b, *b = t;
  599. return;
  600. }
  601. } while(*c < 0);
  602. }
  603. }
  604. }
  605. /* Merge-backward with internal buffer. */
  606. static
  607. void
  608. ss_mergebackward(const unsigned char *T, const int *PA,
  609. int *first, int *middle, int *last,
  610. int *buf, int depth) {
  611. const int *p1, *p2;
  612. int *a, *b, *c, *bufend;
  613. int t;
  614. int r;
  615. int x;
  616. bufend = buf + (last - middle) - 1;
  617. ss_blockswap(buf, middle, last - middle);
  618. x = 0;
  619. if(*bufend < 0) { p1 = PA + ~*bufend; x |= 1; }
  620. else { p1 = PA + *bufend; }
  621. if(*(middle - 1) < 0) { p2 = PA + ~*(middle - 1); x |= 2; }
  622. else { p2 = PA + *(middle - 1); }
  623. for(t = *(a = last - 1), b = bufend, c = middle - 1;;) {
  624. r = ss_compare(T, p1, p2, depth);
  625. if(0 < r) {
  626. if(x & 1) { do { *a-- = *b, *b-- = *a; } while(*b < 0); x ^= 1; }
  627. *a-- = *b;
  628. if(b <= buf) { *buf = t; break; }
  629. *b-- = *a;
  630. if(*b < 0) { p1 = PA + ~*b; x |= 1; }
  631. else { p1 = PA + *b; }
  632. } else if(r < 0) {
  633. if(x & 2) { do { *a-- = *c, *c-- = *a; } while(*c < 0); x ^= 2; }
  634. *a-- = *c, *c-- = *a;
  635. if(c < first) {
  636. while(buf < b) { *a-- = *b, *b-- = *a; }
  637. *a = *b, *b = t;
  638. break;
  639. }
  640. if(*c < 0) { p2 = PA + ~*c; x |= 2; }
  641. else { p2 = PA + *c; }
  642. } else {
  643. if(x & 1) { do { *a-- = *b, *b-- = *a; } while(*b < 0); x ^= 1; }
  644. *a-- = ~*b;
  645. if(b <= buf) { *buf = t; break; }
  646. *b-- = *a;
  647. if(x & 2) { do { *a-- = *c, *c-- = *a; } while(*c < 0); x ^= 2; }
  648. *a-- = *c, *c-- = *a;
  649. if(c < first) {
  650. while(buf < b) { *a-- = *b, *b-- = *a; }
  651. *a = *b, *b = t;
  652. break;
  653. }
  654. if(*b < 0) { p1 = PA + ~*b; x |= 1; }
  655. else { p1 = PA + *b; }
  656. if(*c < 0) { p2 = PA + ~*c; x |= 2; }
  657. else { p2 = PA + *c; }
  658. }
  659. }
  660. }
  661. /* D&C based merge. */
  662. static
  663. void
  664. ss_swapmerge(const unsigned char *T, const int *PA,
  665. int *first, int *middle, int *last,
  666. int *buf, int bufsize, int depth) {
  667. #define STACK_SIZE SS_SMERGE_STACKSIZE
  668. #define GETIDX(a) ((0 <= (a)) ? (a) : (~(a)))
  669. #define MERGE_CHECK(a, b, c)\
  670. do {\
  671. if(((c) & 1) ||\
  672. (((c) & 2) && (ss_compare(T, PA + GETIDX(*((a) - 1)), PA + *(a), depth) == 0))) {\
  673. *(a) = ~*(a);\
  674. }\
  675. if(((c) & 4) && ((ss_compare(T, PA + GETIDX(*((b) - 1)), PA + *(b), depth) == 0))) {\
  676. *(b) = ~*(b);\
  677. }\
  678. } while(0)
  679. struct { int *a, *b, *c; int d; } stack[STACK_SIZE];
  680. int *l, *r, *lm, *rm;
  681. int m, len, half;
  682. int ssize;
  683. int check, next;
  684. for(check = 0, ssize = 0;;) {
  685. if((last - middle) <= bufsize) {
  686. if((first < middle) && (middle < last)) {
  687. ss_mergebackward(T, PA, first, middle, last, buf, depth);
  688. }
  689. MERGE_CHECK(first, last, check);
  690. STACK_POP(first, middle, last, check);
  691. continue;
  692. }
  693. if((middle - first) <= bufsize) {
  694. if(first < middle) {
  695. ss_mergeforward(T, PA, first, middle, last, buf, depth);
  696. }
  697. MERGE_CHECK(first, last, check);
  698. STACK_POP(first, middle, last, check);
  699. continue;
  700. }
  701. for(m = 0, len = MIN(middle - first, last - middle), half = len >> 1;
  702. 0 < len;
  703. len = half, half >>= 1) {
  704. if(ss_compare(T, PA + GETIDX(*(middle + m + half)),
  705. PA + GETIDX(*(middle - m - half - 1)), depth) < 0) {
  706. m += half + 1;
  707. half -= (len & 1) ^ 1;
  708. }
  709. }
  710. if(0 < m) {
  711. lm = middle - m, rm = middle + m;
  712. ss_blockswap(lm, middle, m);
  713. l = r = middle, next = 0;
  714. if(rm < last) {
  715. if(*rm < 0) {
  716. *rm = ~*rm;
  717. if(first < lm) { for(; *--l < 0;) { } next |= 4; }
  718. next |= 1;
  719. } else if(first < lm) {
  720. for(; *r < 0; ++r) { }
  721. next |= 2;
  722. }
  723. }
  724. if((l - first) <= (last - r)) {
  725. STACK_PUSH(r, rm, last, (next & 3) | (check & 4));
  726. middle = lm, last = l, check = (check & 3) | (next & 4);
  727. } else {
  728. if((next & 2) && (r == middle)) { next ^= 6; }
  729. STACK_PUSH(first, lm, l, (check & 3) | (next & 4));
  730. first = r, middle = rm, check = (next & 3) | (check & 4);
  731. }
  732. } else {
  733. if(ss_compare(T, PA + GETIDX(*(middle - 1)), PA + *middle, depth) == 0) {
  734. *middle = ~*middle;
  735. }
  736. MERGE_CHECK(first, last, check);
  737. STACK_POP(first, middle, last, check);
  738. }
  739. }
  740. #undef STACK_SIZE
  741. }
  742. #endif /* SS_BLOCKSIZE != 0 */
  743. /*---------------------------------------------------------------------------*/
  744. /* Substring sort */
  745. static
  746. void
  747. sssort(const unsigned char *T, const int *PA,
  748. int *first, int *last,
  749. int *buf, int bufsize,
  750. int depth, int n, int lastsuffix) {
  751. int *a;
  752. #if SS_BLOCKSIZE != 0
  753. int *b, *middle, *curbuf;
  754. int j, k, curbufsize, limit;
  755. #endif
  756. int i;
  757. if(lastsuffix != 0) { ++first; }
  758. #if SS_BLOCKSIZE == 0
  759. ss_mintrosort(T, PA, first, last, depth);
  760. #else
  761. if((bufsize < SS_BLOCKSIZE) &&
  762. (bufsize < (last - first)) &&
  763. (bufsize < (limit = ss_isqrt(last - first)))) {
  764. if(SS_BLOCKSIZE < limit) { limit = SS_BLOCKSIZE; }
  765. buf = middle = last - limit, bufsize = limit;
  766. } else {
  767. middle = last, limit = 0;
  768. }
  769. for(a = first, i = 0; SS_BLOCKSIZE < (middle - a); a += SS_BLOCKSIZE, ++i) {
  770. #if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE
  771. ss_mintrosort(T, PA, a, a + SS_BLOCKSIZE, depth);
  772. #elif 1 < SS_BLOCKSIZE
  773. ss_insertionsort(T, PA, a, a + SS_BLOCKSIZE, depth);
  774. #endif
  775. curbufsize = last - (a + SS_BLOCKSIZE);
  776. curbuf = a + SS_BLOCKSIZE;
  777. if(curbufsize <= bufsize) { curbufsize = bufsize, curbuf = buf; }
  778. for(b = a, k = SS_BLOCKSIZE, j = i; j & 1; b -= k, k <<= 1, j >>= 1) {
  779. ss_swapmerge(T, PA, b - k, b, b + k, curbuf, curbufsize, depth);
  780. }
  781. }
  782. #if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE
  783. ss_mintrosort(T, PA, a, middle, depth);
  784. #elif 1 < SS_BLOCKSIZE
  785. ss_insertionsort(T, PA, a, middle, depth);
  786. #endif
  787. for(k = SS_BLOCKSIZE; i != 0; k <<= 1, i >>= 1) {
  788. if(i & 1) {
  789. ss_swapmerge(T, PA, a - k, a, middle, buf, bufsize, depth);
  790. a -= k;
  791. }
  792. }
  793. if(limit != 0) {
  794. #if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE
  795. ss_mintrosort(T, PA, middle, last, depth);
  796. #elif 1 < SS_BLOCKSIZE
  797. ss_insertionsort(T, PA, middle, last, depth);
  798. #endif
  799. ss_inplacemerge(T, PA, first, middle, last, depth);
  800. }
  801. #endif
  802. if(lastsuffix != 0) {
  803. /* Insert last type B* suffix. */
  804. int PAi[2]; PAi[0] = PA[*(first - 1)], PAi[1] = n - 2;
  805. for(a = first, i = *(first - 1);
  806. (a < last) && ((*a < 0) || (0 < ss_compare(T, &(PAi[0]), PA + *a, depth)));
  807. ++a) {
  808. *(a - 1) = *a;
  809. }
  810. *(a - 1) = i;
  811. }
  812. }
  813. /*---------------------------------------------------------------------------*/
  814. static INLINE
  815. int
  816. tr_ilg(int n) {
  817. return (n & 0xffff0000) ?
  818. ((n & 0xff000000) ?
  819. 24 + lg_table[(n >> 24) & 0xff] :
  820. 16 + lg_table[(n >> 16) & 0xff]) :
  821. ((n & 0x0000ff00) ?
  822. 8 + lg_table[(n >> 8) & 0xff] :
  823. 0 + lg_table[(n >> 0) & 0xff]);
  824. }
  825. /*---------------------------------------------------------------------------*/
  826. /* Simple insertionsort for small size groups. */
  827. static
  828. void
  829. tr_insertionsort(const int *ISAd, int *first, int *last) {
  830. int *a, *b;
  831. int t, r;
  832. for(a = first + 1; a < last; ++a) {
  833. for(t = *a, b = a - 1; 0 > (r = ISAd[t] - ISAd[*b]);) {
  834. do { *(b + 1) = *b; } while((first <= --b) && (*b < 0));
  835. if(b < first) { break; }
  836. }
  837. if(r == 0) { *b = ~*b; }
  838. *(b + 1) = t;
  839. }
  840. }
  841. /*---------------------------------------------------------------------------*/
  842. static INLINE
  843. void
  844. tr_fixdown(const int *ISAd, int *SA, int i, int size) {
  845. int j, k;
  846. int v;
  847. int c, d, e;
  848. for(v = SA[i], c = ISAd[v]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) {
  849. d = ISAd[SA[k = j++]];
  850. if(d < (e = ISAd[SA[j]])) { k = j; d = e; }
  851. if(d <= c) { break; }
  852. }
  853. SA[i] = v;
  854. }
  855. /* Simple top-down heapsort. */
  856. static
  857. void
  858. tr_heapsort(const int *ISAd, int *SA, int size) {
  859. int i, m;
  860. int t;
  861. m = size;
  862. if((size % 2) == 0) {
  863. m--;
  864. if(ISAd[SA[m / 2]] < ISAd[SA[m]]) { SWAP(SA[m], SA[m / 2]); }
  865. }
  866. for(i = m / 2 - 1; 0 <= i; --i) { tr_fixdown(ISAd, SA, i, m); }
  867. if((size % 2) == 0) { SWAP(SA[0], SA[m]); tr_fixdown(ISAd, SA, 0, m); }
  868. for(i = m - 1; 0 < i; --i) {
  869. t = SA[0], SA[0] = SA[i];
  870. tr_fixdown(ISAd, SA, 0, i);
  871. SA[i] = t;
  872. }
  873. }
  874. /*---------------------------------------------------------------------------*/
  875. /* Returns the median of three elements. */
  876. static INLINE
  877. int *
  878. tr_median3(const int *ISAd, int *v1, int *v2, int *v3) {
  879. int *t;
  880. if(ISAd[*v1] > ISAd[*v2]) { SWAP(v1, v2); }
  881. if(ISAd[*v2] > ISAd[*v3]) {
  882. if(ISAd[*v1] > ISAd[*v3]) { return v1; }
  883. else { return v3; }
  884. }
  885. return v2;
  886. }
  887. /* Returns the median of five elements. */
  888. static INLINE
  889. int *
  890. tr_median5(const int *ISAd,
  891. int *v1, int *v2, int *v3, int *v4, int *v5) {
  892. int *t;
  893. if(ISAd[*v2] > ISAd[*v3]) { SWAP(v2, v3); }
  894. if(ISAd[*v4] > ISAd[*v5]) { SWAP(v4, v5); }
  895. if(ISAd[*v2] > ISAd[*v4]) { SWAP(v2, v4); SWAP(v3, v5); }
  896. if(ISAd[*v1] > ISAd[*v3]) { SWAP(v1, v3); }
  897. if(ISAd[*v1] > ISAd[*v4]) { SWAP(v1, v4); SWAP(v3, v5); }
  898. if(ISAd[*v3] > ISAd[*v4]) { return v4; }
  899. return v3;
  900. }
  901. /* Returns the pivot element. */
  902. static INLINE
  903. int *
  904. tr_pivot(const int *ISAd, int *first, int *last) {
  905. int *middle;
  906. int t;
  907. t = last - first;
  908. middle = first + t / 2;
  909. if(t <= 512) {
  910. if(t <= 32) {
  911. return tr_median3(ISAd, first, middle, last - 1);
  912. } else {
  913. t >>= 2;
  914. return tr_median5(ISAd, first, first + t, middle, last - 1 - t, last - 1);
  915. }
  916. }
  917. t >>= 3;
  918. first = tr_median3(ISAd, first, first + t, first + (t << 1));
  919. middle = tr_median3(ISAd, middle - t, middle, middle + t);
  920. last = tr_median3(ISAd, last - 1 - (t << 1), last - 1 - t, last - 1);
  921. return tr_median3(ISAd, first, middle, last);
  922. }
  923. /*---------------------------------------------------------------------------*/
  924. typedef struct _trbudget_t trbudget_t;
  925. struct _trbudget_t {
  926. int chance;
  927. int remain;
  928. int incval;
  929. int count;
  930. };
  931. static INLINE
  932. void
  933. trbudget_init(trbudget_t *budget, int chance, int incval) {
  934. budget->chance = chance;
  935. budget->remain = budget->incval = incval;
  936. }
  937. static INLINE
  938. int
  939. trbudget_check(trbudget_t *budget, int size) {
  940. if(size <= budget->remain) { budget->remain -= size; return 1; }
  941. if(budget->chance == 0) { budget->count += size; return 0; }
  942. budget->remain += budget->incval - size;
  943. budget->chance -= 1;
  944. return 1;
  945. }
  946. /*---------------------------------------------------------------------------*/
  947. static INLINE
  948. void
  949. tr_partition(const int *ISAd,
  950. int *first, int *middle, int *last,
  951. int **pa, int **pb, int v) {
  952. int *a, *b, *c, *d, *e, *f;
  953. int t, s;
  954. int x = 0;
  955. for(b = middle - 1; (++b < last) && ((x = ISAd[*b]) == v);) { }
  956. if(((a = b) < last) && (x < v)) {
  957. for(; (++b < last) && ((x = ISAd[*b]) <= v);) {
  958. if(x == v) { SWAP(*b, *a); ++a; }
  959. }
  960. }
  961. for(c = last; (b < --c) && ((x = ISAd[*c]) == v);) { }
  962. if((b < (d = c)) && (x > v)) {
  963. for(; (b < --c) && ((x = ISAd[*c]) >= v);) {
  964. if(x == v) { SWAP(*c, *d); --d; }
  965. }
  966. }
  967. for(; b < c;) {
  968. SWAP(*b, *c);
  969. for(; (++b < c) && ((x = ISAd[*b]) <= v);) {
  970. if(x == v) { SWAP(*b, *a); ++a; }
  971. }
  972. for(; (b < --c) && ((x = ISAd[*c]) >= v);) {
  973. if(x == v) { SWAP(*c, *d); --d; }
  974. }
  975. }
  976. if(a <= d) {
  977. c = b - 1;
  978. if((s = a - first) > (t = b - a)) { s = t; }
  979. for(e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
  980. if((s = d - c) > (t = last - d - 1)) { s = t; }
  981. for(e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
  982. first += (b - a), last -= (d - c);
  983. }
  984. *pa = first, *pb = last;
  985. }
  986. static
  987. void
  988. tr_copy(int *ISA, const int *SA,
  989. int *first, int *a, int *b, int *last,
  990. int depth) {
  991. /* sort suffixes of middle partition
  992. by using sorted order of suffixes of left and right partition. */
  993. int *c, *d, *e;
  994. int s, v;
  995. v = b - SA - 1;
  996. for(c = first, d = a - 1; c <= d; ++c) {
  997. if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
  998. *++d = s;
  999. ISA[s] = d - SA;
  1000. }
  1001. }
  1002. for(c = last - 1, e = d + 1, d = b; e < d; --c) {
  1003. if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
  1004. *--d = s;
  1005. ISA[s] = d - SA;
  1006. }
  1007. }
  1008. }
  1009. static
  1010. void
  1011. tr_partialcopy(int *ISA, const int *SA,
  1012. int *first, int *a, int *b, int *last,
  1013. int depth) {
  1014. int *c, *d, *e;
  1015. int s, v;
  1016. int rank, lastrank, newrank = -1;
  1017. v = b - SA - 1;
  1018. lastrank = -1;
  1019. for(c = first, d = a - 1; c <= d; ++c) {
  1020. if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
  1021. *++d = s;
  1022. rank = ISA[s + depth];
  1023. if(lastrank != rank) { lastrank = rank; newrank = d - SA; }
  1024. ISA[s] = newrank;
  1025. }
  1026. }
  1027. lastrank = -1;
  1028. for(e = d; first <= e; --e) {
  1029. rank = ISA[*e];
  1030. if(lastrank != rank) { lastrank = rank; newrank = e - SA; }
  1031. if(newrank != rank) { ISA[*e] = newrank; }
  1032. }
  1033. lastrank = -1;
  1034. for(c = last - 1, e = d + 1, d = b; e < d; --c) {
  1035. if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
  1036. *--d = s;
  1037. rank = ISA[s + depth];
  1038. if(lastrank != rank) { lastrank = rank; newrank = d - SA; }
  1039. ISA[s] = newrank;
  1040. }
  1041. }
  1042. }
  1043. static
  1044. void
  1045. tr_introsort(int *ISA, const int *ISAd,
  1046. int *SA, int *first, int *last,
  1047. trbudget_t *budget) {
  1048. #define STACK_SIZE TR_STACKSIZE
  1049. struct { const int *a; int *b, *c; int d, e; }stack[STACK_SIZE];
  1050. int *a, *b, *c;
  1051. int t;
  1052. int v, x = 0;
  1053. int incr = ISAd - ISA;
  1054. int limit, next;
  1055. int ssize, trlink = -1;
  1056. for(ssize = 0, limit = tr_ilg(last - first);;) {
  1057. if(limit < 0) {
  1058. if(limit == -1) {
  1059. /* tandem repeat partition */
  1060. tr_partition(ISAd - incr, first, first, last, &a, &b, last - SA - 1);
  1061. /* update ranks */
  1062. if(a < last) {
  1063. for(c = first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; }
  1064. }
  1065. if(b < last) {
  1066. for(c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; }
  1067. }
  1068. /* push */
  1069. if(1 < (b - a)) {
  1070. STACK_PUSH5(NULL, a, b, 0, 0);
  1071. STACK_PUSH5(ISAd - incr, first, last, -2, trlink);
  1072. trlink = ssize - 2;
  1073. }
  1074. if((a - first) <= (last - b)) {
  1075. if(1 < (a - first)) {
  1076. STACK_PUSH5(ISAd, b, last, tr_ilg(last - b), trlink);
  1077. last = a, limit = tr_ilg(a - first);
  1078. } else if(1 < (last - b)) {
  1079. first = b, limit = tr_ilg(last - b);
  1080. } else {
  1081. STACK_POP5(ISAd, first, last, limit, trlink);
  1082. }
  1083. } else {
  1084. if(1 < (last - b)) {
  1085. STACK_PUSH5(ISAd, first, a, tr_ilg(a - first), trlink);
  1086. first = b, limit = tr_ilg(last - b);
  1087. } else if(1 < (a - first)) {
  1088. last = a, limit = tr_ilg(a - first);
  1089. } else {
  1090. STACK_POP5(ISAd, first, last, limit, trlink);
  1091. }
  1092. }
  1093. } else if(limit == -2) {
  1094. /* tandem repeat copy */
  1095. a = stack[--ssize].b, b = stack[ssize].c;
  1096. if(stack[ssize].d == 0) {
  1097. tr_copy(ISA, SA, first, a, b, last, ISAd - ISA);
  1098. } else {
  1099. if(0 <= trlink) { stack[trlink].d = -1; }
  1100. tr_partialcopy(ISA, SA, first, a, b, last, ISAd - ISA);
  1101. }
  1102. STACK_POP5(ISAd, first, last, limit, trlink);
  1103. } else {
  1104. /* sorted partition */
  1105. if(0 <= *first) {
  1106. a = first;
  1107. do { ISA[*a] = a - SA; } while((++a < last) && (0 <= *a));
  1108. first = a;
  1109. }
  1110. if(first < last) {
  1111. a = first; do { *a = ~*a; } while(*++a < 0);
  1112. next = (ISA[*a] != ISAd[*a]) ? tr_ilg(a - first + 1) : -1;
  1113. if(++a < last) { for(b = first, v = a - SA - 1; b < a; ++b) { ISA[*b] = v; } }
  1114. /* push */
  1115. if(trbudget_check(budget, a - first)) {
  1116. if((a - first) <= (last - a)) {
  1117. STACK_PUSH5(ISAd, a, last, -3, trlink);
  1118. ISAd += incr, last = a, limit = next;
  1119. } else {
  1120. if(1 < (last - a)) {
  1121. STACK_PUSH5(ISAd + incr, first, a, next, trlink);
  1122. first = a, limit = -3;
  1123. } else {
  1124. ISAd += incr, last = a, limit = next;
  1125. }
  1126. }
  1127. } else {
  1128. if(0 <= trlink) { stack[trlink].d = -1; }
  1129. if(1 < (last - a)) {
  1130. first = a, limit = -3;
  1131. } else {
  1132. STACK_POP5(ISAd, first, last, limit, trlink);
  1133. }
  1134. }
  1135. } else {
  1136. STACK_POP5(ISAd, first, last, limit, trlink);
  1137. }
  1138. }
  1139. continue;
  1140. }
  1141. if((last - first) <= TR_INSERTIONSORT_THRESHOLD) {
  1142. tr_insertionsort(ISAd, first, last);
  1143. limit = -3;
  1144. continue;
  1145. }
  1146. if(limit-- == 0) {
  1147. tr_heapsort(ISAd, first, last - first);
  1148. for(a = last - 1; first < a; a = b) {
  1149. for(x = ISAd[*a], b = a - 1; (first <= b) && (ISAd[*b] == x); --b) { *b = ~*b; }
  1150. }
  1151. limit = -3;
  1152. continue;
  1153. }
  1154. /* choose pivot */
  1155. a = tr_pivot(ISAd, first, last);
  1156. SWAP(*first, *a);
  1157. v = ISAd[*first];
  1158. /* partition */
  1159. tr_partition(ISAd, first, first + 1, last, &a, &b, v);
  1160. if((last - first) != (b - a)) {
  1161. next = (ISA[*a] != v) ? tr_ilg(b - a) : -1;
  1162. /* update ranks */
  1163. for(c = first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; }
  1164. if(b < last) { for(c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; } }
  1165. /* push */
  1166. if((1 < (b - a)) && (trbudget_check(budget, b - a))) {
  1167. if((a - first) <= (last - b)) {
  1168. if((last - b) <= (b - a)) {
  1169. if(1 < (a - first)) {
  1170. STACK_PUSH5(ISAd + incr, a, b, next, trlink);
  1171. STACK_PUSH5(ISAd, b, last, limit, trlink);
  1172. last = a;
  1173. } else if(1 < (last - b)) {
  1174. STACK_PUSH5(ISAd + incr, a, b, next, trlink);
  1175. first = b;
  1176. } else {
  1177. ISAd += incr, first = a, last = b, limit = next;
  1178. }
  1179. } else if((a - first) <= (b - a)) {
  1180. if(1 < (a - first)) {
  1181. STACK_PUSH5(ISAd, b, last, limit, trlink);
  1182. STACK_PUSH5(ISAd + incr, a, b, next, trlink);
  1183. last = a;
  1184. } else {
  1185. STACK_PUSH5(ISAd, b, last, limit, trlink);
  1186. ISAd += incr, first = a, last = b, limit = next;
  1187. }
  1188. } else {
  1189. STACK_PUSH5(ISAd, b, last, limit, trlink);
  1190. STACK_PUSH5(ISAd, first, a, limit, trlink);
  1191. ISAd += incr, first = a, last = b, limit = next;
  1192. }
  1193. } else {
  1194. if((a - first) <= (b - a)) {
  1195. if(1 < (last - b)) {
  1196. STACK_PUSH5(ISAd + incr, a, b, next, trlink);
  1197. STACK_PUSH5(ISAd, first, a, limit, trlink);
  1198. first = b;
  1199. } else if(1 < (a - first)) {
  1200. STACK_PUSH5(ISAd + incr, a, b, next, trlink);
  1201. last = a;
  1202. } else {
  1203. ISAd += incr, first = a, last = b, limit = next;
  1204. }
  1205. } else if((last - b) <= (b - a)) {
  1206. if(1 < (last - b)) {
  1207. STACK_PUSH5(ISAd, first, a, limit, trlink);
  1208. STACK_PUSH5(ISAd + incr, a, b, next, trlink);
  1209. first = b;
  1210. } else {
  1211. STACK_PUSH5(ISAd, first, a, limit, trlink);
  1212. ISAd += incr, first = a, last = b, limit = next;
  1213. }
  1214. } else {
  1215. STACK_PUSH5(ISAd, first, a, limit, trlink);
  1216. STACK_PUSH5(ISAd, b, last, limit, trlink);
  1217. ISAd += incr, first = a, last = b, limit = next;
  1218. }
  1219. }
  1220. } else {
  1221. if((1 < (b - a)) && (0 <= trlink)) { stack[trlink].d = -1; }
  1222. if((a - first) <= (last - b)) {
  1223. if(1 < (a - first)) {
  1224. STACK_PUSH5(ISAd, b, last, limit, trlink);
  1225. last = a;
  1226. } else if(1 < (last - b)) {
  1227. first = b;
  1228. } else {
  1229. STACK_POP5(ISAd, first, last, limit, trlink);
  1230. }
  1231. } else {
  1232. if(1 < (last - b)) {
  1233. STACK_PUSH5(ISAd, first, a, limit, trlink);
  1234. first = b;
  1235. } else if(1 < (a - first)) {
  1236. last = a;
  1237. } else {
  1238. STACK_POP5(ISAd, first, last, limit, trlink);
  1239. }
  1240. }
  1241. }
  1242. } else {
  1243. if(trbudget_check(budget, last - first)) {
  1244. limit = tr_ilg(last - first), ISAd += incr;
  1245. } else {
  1246. if(0 <= trlink) { stack[trlink].d = -1; }
  1247. STACK_POP5(ISAd, first, last, limit, trlink);
  1248. }
  1249. }
  1250. }
  1251. #undef STACK_SIZE
  1252. }
  1253. /*---------------------------------------------------------------------------*/
  1254. /* Tandem repeat sort */
  1255. static
  1256. void
  1257. trsort(int *ISA, int *SA, int n, int depth) {
  1258. int *ISAd;
  1259. int *first, *last;
  1260. trbudget_t budget;
  1261. int t, skip, unsorted;
  1262. trbudget_init(&budget, tr_ilg(n) * 2 / 3, n);
  1263. /* trbudget_init(&budget, tr_ilg(n) * 3 / 4, n); */
  1264. for(ISAd = ISA + depth; -n < *SA; ISAd += ISAd - ISA) {
  1265. first = SA;
  1266. skip = 0;
  1267. unsorted = 0;
  1268. do {
  1269. if((t = *first) < 0) { first -= t; skip += t; }
  1270. else {
  1271. if(skip != 0) { *(first + skip) = skip; skip = 0; }
  1272. last = SA + ISA[t] + 1;
  1273. if(1 < (last - first)) {
  1274. budget.count = 0;
  1275. tr_introsort(ISA, ISAd, SA, first, last, &budget);
  1276. if(budget.count != 0) { unsorted += budget.count; }
  1277. else { skip = first - last; }
  1278. } else if((last - first) == 1) {
  1279. skip = -1;
  1280. }
  1281. first = last;
  1282. }
  1283. } while(first < (SA + n));
  1284. if(skip != 0) { *(first + skip) = skip; }
  1285. if(unsorted == 0) { break; }
  1286. }
  1287. }
  1288. /*---------------------------------------------------------------------------*/
  1289. /* Sorts suffixes of type B*. */
  1290. static
  1291. int
  1292. sort_typeBstar(const unsigned char *T, int *SA,
  1293. int *bucket_A, int *bucket_B,
  1294. int n, int openMP) {
  1295. int *PAb, *ISAb, *buf;
  1296. #ifdef LIBBSC_OPENMP
  1297. int *curbuf;
  1298. int l;
  1299. #endif
  1300. int i, j, k, t, m, bufsize;
  1301. int c0, c1;
  1302. #ifdef LIBBSC_OPENMP
  1303. int d0, d1;
  1304. #endif
  1305. (void)openMP;
  1306. /* Initialize bucket arrays. */
  1307. for(i = 0; i < BUCKET_A_SIZE; ++i) { bucket_A[i] = 0; }
  1308. for(i = 0; i < BUCKET_B_SIZE; ++i) { bucket_B[i] = 0; }
  1309. /* Count the number of occurrences of the first one or two characters of each
  1310. type A, B and B* suffix. Moreover, store the beginning position of all
  1311. type B* suffixes into the array SA. */
  1312. for(i = n - 1, m = n, c0 = T[n - 1]; 0 <= i;) {
  1313. /* type A suffix. */
  1314. do { ++BUCKET_A(c1 = c0); } while((0 <= --i) && ((c0 = T[i]) >= c1));
  1315. if(0 <= i) {
  1316. /* type B* suffix. */
  1317. ++BUCKET_BSTAR(c0, c1);
  1318. SA[--m] = i;
  1319. /* type B suffix. */
  1320. for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) {
  1321. ++BUCKET_B(c0, c1);
  1322. }
  1323. }
  1324. }
  1325. m = n - m;
  1326. /*
  1327. note:
  1328. A type B* suffix is lexicographically smaller than a type B suffix that
  1329. begins with the same first two characters.
  1330. */
  1331. /* Calculate the index of start/end point of each bucket. */
  1332. for(c0 = 0, i = 0, j = 0; c0 < ALPHABET_SIZE; ++c0) {
  1333. t = i + BUCKET_A(c0);
  1334. BUCKET_A(c0) = i + j; /* start point */
  1335. i = t + BUCKET_B(c0, c0);
  1336. for(c1 = c0 + 1; c1 < ALPHABET_SIZE; ++c1) {
  1337. j += BUCKET_BSTAR(c0, c1);
  1338. BUCKET_BSTAR(c0, c1) = j; /* end point */
  1339. i += BUCKET_B(c0, c1);
  1340. }
  1341. }
  1342. if(0 < m) {
  1343. /* Sort the type B* suffixes by their first two characters. */
  1344. PAb = SA + n - m; ISAb = SA + m;
  1345. for(i = m - 2; 0 <= i; --i) {
  1346. t = PAb[i], c0 = T[t], c1 = T[t + 1];
  1347. SA[--BUCKET_BSTAR(c0, c1)] = i;
  1348. }
  1349. t = PAb[m - 1], c0 = T[t], c1 = T[t + 1];
  1350. SA[--BUCKET_BSTAR(c0, c1)] = m - 1;
  1351. /* Sort the type B* substrings using sssort. */
  1352. #ifdef LIBBSC_OPENMP
  1353. if (openMP)
  1354. {
  1355. buf = SA + m;
  1356. c0 = ALPHABET_SIZE - 2, c1 = ALPHABET_SIZE - 1, j = m;
  1357. #pragma omp parallel default(shared) private(bufsize, curbuf, k, l, d0, d1)
  1358. {
  1359. bufsize = (n - (2 * m)) / omp_get_num_threads();
  1360. curbuf = buf + omp_get_thread_num() * bufsize;
  1361. k = 0;
  1362. for(;;) {
  1363. #pragma omp critical(sssort_lock)
  1364. {
  1365. if(0 < (l = j)) {
  1366. d0 = c0, d1 = c1;
  1367. do {
  1368. k = BUCKET_BSTAR(d0, d1);
  1369. if(--d1 <= d0) {
  1370. d1 = ALPHABET_SIZE - 1;
  1371. if(--d0 < 0) { break; }
  1372. }
  1373. } while(((l - k) <= 1) && (0 < (l = k)));
  1374. c0 = d0, c1 = d1, j = k;
  1375. }
  1376. }
  1377. if(l == 0) { break; }
  1378. sssort(T, PAb, SA + k, SA + l,
  1379. curbuf, bufsize, 2, n, *(SA + k) == (m - 1));
  1380. }
  1381. }
  1382. }
  1383. else
  1384. {
  1385. buf = SA + m, bufsize = n - (2 * m);
  1386. for(c0 = ALPHABET_SIZE - 2, j = m; 0 < j; --c0) {
  1387. for(c1 = ALPHABET_SIZE - 1; c0 < c1; j = i, --c1) {
  1388. i = BUCKET_BSTAR(c0, c1);
  1389. if(1 < (j - i)) {
  1390. sssort(T, PAb, SA + i, SA + j,
  1391. buf, bufsize, 2, n, *(SA + i) == (m - 1));
  1392. }
  1393. }
  1394. }
  1395. }
  1396. #else
  1397. buf = SA + m, bufsize = n - (2 * m);
  1398. for(c0 = ALPHABET_SIZE - 2, j = m; 0 < j; --c0) {
  1399. for(c1 = ALPHABET_SIZE - 1; c0 < c1; j = i, --c1) {
  1400. i = BUCKET_BSTAR(c0, c1);
  1401. if(1 < (j - i)) {
  1402. sssort(T, PAb, SA + i, SA + j,
  1403. buf, bufsize, 2, n, *(SA + i) == (m - 1));
  1404. }
  1405. }
  1406. }
  1407. #endif
  1408. /* Compute ranks of type B* substrings. */
  1409. for(i = m - 1; 0 <= i; --i) {
  1410. if(0 <= SA[i]) {
  1411. j = i;
  1412. do { ISAb[SA[i]] = i; } while((0 <= --i) && (0 <= SA[i]));
  1413. SA[i + 1] = i - j;
  1414. if(i <= 0) { break; }
  1415. }
  1416. j = i;
  1417. do { ISAb[SA[i] = ~SA[i]] = j; } while(SA[--i] < 0);
  1418. ISAb[SA[i]] = j;
  1419. }
  1420. /* Construct the inverse suffix array of type B* suffixes using trsort. */
  1421. trsort(ISAb, SA, m, 1);
  1422. /* Set the sorted order of tyoe B* suffixes. */
  1423. for(i = n - 1, j = m, c0 = T[n - 1]; 0 <= i;) {
  1424. for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) >= c1); --i, c1 = c0) { }
  1425. if(0 <= i) {
  1426. t = i;
  1427. for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) { }
  1428. SA[ISAb[--j]] = ((t == 0) || (1 < (t - i))) ? t : ~t;
  1429. }
  1430. }
  1431. /* Calculate the index of start/end point of each bucket. */
  1432. BUCKET_B(ALPHABET_SIZE - 1, ALPHABET_SIZE - 1) = n; /* end point */
  1433. for(c0 = ALPHABET_SIZE - 2, k = m - 1; 0 <= c0; --c0) {
  1434. i = BUCKET_A(c0 + 1) - 1;
  1435. for(c1 = ALPHABET_SIZE - 1; c0 < c1; --c1) {
  1436. t = i - BUCKET_B(c0, c1);
  1437. BUCKET_B(c0, c1) = i; /* end point */
  1438. /* Move all type B* suffixes to the correct position. */
  1439. for(i = t, j = BUCKET_BSTAR(c0, c1);
  1440. j <= k;
  1441. --i, --k) { SA[i] = SA[k]; }
  1442. }
  1443. BUCKET_BSTAR(c0, c0 + 1) = i - BUCKET_B(c0, c0) + 1; /* start point */
  1444. BUCKET_B(c0, c0) = i; /* end point */
  1445. }
  1446. }
  1447. return m;
  1448. }
  1449. /* Constructs the suffix array by using the sorted order of type B* suffixes. */
  1450. static
  1451. void
  1452. construct_SA(const unsigned char *T, int *SA,
  1453. int *bucket_A, int *bucket_B,
  1454. int n, int m) {
  1455. int *i, *j, *k;
  1456. int s;
  1457. int c0, c1, c2;
  1458. if(0 < m) {
  1459. /* Construct the sorted order of type B suffixes by using
  1460. the sorted order of type B* suffixes. */
  1461. for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) {
  1462. /* Scan the suffix array from right to left. */
  1463. for(i = SA + BUCKET_BSTAR(c1, c1 + 1),
  1464. j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1;
  1465. i <= j;
  1466. --j) {
  1467. if(0 < (s = *j)) {
  1468. assert(T[s] == c1);
  1469. assert(((s + 1) < n) && (T[s] <= T[s + 1]));
  1470. assert(T[s - 1] <= T[s]);
  1471. *j = ~s;
  1472. c0 = T[--s];
  1473. if((0 < s) && (T[s - 1] > c0)) { s = ~s; }
  1474. if(c0 != c2) {
  1475. if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; }
  1476. k = SA + BUCKET_B(c2 = c0, c1);
  1477. }
  1478. assert(k < j);
  1479. *k-- = s;
  1480. } else {
  1481. assert(((s == 0) && (T[s] == c1)) || (s < 0));
  1482. *j = ~s;
  1483. }
  1484. }
  1485. }
  1486. }
  1487. /* Construct the suffix array by using
  1488. the sorted order of type B suffixes. */
  1489. k = SA + BUCKET_A(c2 = T[n - 1]);
  1490. *k++ = (T[n - 2] < c2) ? ~(n - 1) : (n - 1);
  1491. /* Scan the suffix array from left to right. */
  1492. for(i = SA, j = SA + n; i < j; ++i) {
  1493. if(0 < (s = *i)) {
  1494. assert(T[s - 1] >= T[s]);
  1495. c0 = T[--s];
  1496. if((s == 0) || (T[s - 1] < c0)) { s = ~s; }
  1497. if(c0 != c2) {
  1498. BUCKET_A(c2) = k - SA;
  1499. k = SA + BUCKET_A(c2 = c0);
  1500. }
  1501. assert(i < k);
  1502. *k++ = s;
  1503. } else {
  1504. assert(s < 0);
  1505. *i = ~s;
  1506. }
  1507. }
  1508. }
  1509. /* Constructs the burrows-wheeler transformed string directly
  1510. by using the sorted order of type B* suffixes. */
  1511. static
  1512. int
  1513. construct_BWT(const unsigned char *T, int *SA,
  1514. int *bucket_A, int *bucket_B,
  1515. int n, int m) {
  1516. int *i, *j, *k, *orig;
  1517. int s;
  1518. int c0, c1, c2;
  1519. if(0 < m) {
  1520. /* Construct the sorted order of type B suffixes by using
  1521. the sorted order of type B* suffixes. */
  1522. for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) {
  1523. /* Scan the suffix array from right to left. */
  1524. for(i = SA + BUCKET_BSTAR(c1, c1 + 1),
  1525. j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1;
  1526. i <= j;
  1527. --j) {
  1528. if(0 < (s = *j)) {
  1529. assert(T[s] == c1);
  1530. assert(((s + 1) < n) && (T[s] <= T[s + 1]));
  1531. assert(T[s - 1] <= T[s]);
  1532. c0 = T[--s];
  1533. *j = ~((int)c0);
  1534. if((0 < s) && (T[s - 1] > c0)) { s = ~s; }
  1535. if(c0 != c2) {
  1536. if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; }
  1537. k = SA + BUCKET_B(c2 = c0, c1);
  1538. }
  1539. assert(k < j);
  1540. *k-- = s;
  1541. } else if(s != 0) {
  1542. *j = ~s;
  1543. #ifndef NDEBUG
  1544. } else {
  1545. assert(T[s] == c1);
  1546. #endif
  1547. }
  1548. }
  1549. }
  1550. }
  1551. /* Construct the BWTed string by using
  1552. the sorted order of type B suffixes. */
  1553. k = SA + BUCKET_A(c2 = T[n - 1]);
  1554. *k++ = (T[n - 2] < c2) ? ~((int)T[n - 2]) : (n - 1);
  1555. /* Scan the suffix array from left to right. */
  1556. for(i = SA, j = SA + n, orig = SA; i < j; ++i) {
  1557. if(0 < (s = *i)) {
  1558. assert(T[s - 1] >= T[s]);
  1559. c0 = T[--s];
  1560. *i = c0;
  1561. if((0 < s) && (T[s - 1] < c0)) { s = ~((int)T[s - 1]); }
  1562. if(c0 != c2) {
  1563. BUCKET_A(c2) = k - SA;
  1564. k = SA + BUCKET_A(c2 = c0);
  1565. }
  1566. assert(i < k);
  1567. *k++ = s;
  1568. } else if(s != 0) {
  1569. *i = ~s;
  1570. } else {
  1571. orig = i;
  1572. }
  1573. }
  1574. return orig - SA;
  1575. }
  1576. /* Constructs the burrows-wheeler transformed string directly
  1577. by using the sorted order of type B* suffixes. */
  1578. static
  1579. int
  1580. construct_BWT_indexes(const unsigned char *T, int *SA,
  1581. int *bucket_A, int *bucket_B,
  1582. int n, int m,
  1583. unsigned char * num_indexes, int * indexes) {
  1584. int *i, *j, *k, *orig;
  1585. int s;
  1586. int c0, c1, c2;
  1587. int mod = n / 8;
  1588. {
  1589. mod |= mod >> 1; mod |= mod >> 2;
  1590. mod |= mod >> 4; mod |= mod >> 8;
  1591. mod |= mod >> 16; mod >>= 1;
  1592. *num_indexes = (unsigned char)((n - 1) / (mod + 1));
  1593. }
  1594. if(0 < m) {
  1595. /* Construct the sorted order of type B suffixes by using
  1596. the sorted order of type B* suffixes. */
  1597. for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) {
  1598. /* Scan the suffix array from right to left. */
  1599. for(i = SA + BUCKET_BSTAR(c1, c1 + 1),
  1600. j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1;
  1601. i <= j;
  1602. --j) {
  1603. if(0 < (s = *j)) {
  1604. assert(T[s] == c1);
  1605. assert(((s + 1) < n) && (T[s] <= T[s + 1]));
  1606. assert(T[s - 1] <= T[s]);
  1607. if ((s & mod) == 0) indexes[s / (mod + 1) - 1] = j - SA;
  1608. c0 = T[--s];
  1609. *j = ~((int)c0);
  1610. if((0 < s) && (T[s - 1] > c0)) { s = ~s; }
  1611. if(c0 != c2) {
  1612. if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; }
  1613. k = SA + BUCKET_B(c2 = c0, c1);
  1614. }
  1615. assert(k < j);
  1616. *k-- = s;
  1617. } else if(s != 0) {
  1618. *j = ~s;
  1619. #ifndef NDEBUG
  1620. } else {
  1621. assert(T[s] == c1);
  1622. #endif
  1623. }
  1624. }
  1625. }
  1626. }
  1627. /* Construct the BWTed string by using
  1628. the sorted order of type B suffixes. */
  1629. k = SA + BUCKET_A(c2 = T[n - 1]);
  1630. if (T[n - 2] < c2) {
  1631. if (((n - 1) & mod) == 0) indexes[(n - 1) / (mod + 1) - 1] = k - SA;
  1632. *k++ = ~((int)T[n - 2]);
  1633. }
  1634. else {
  1635. *k++ = n - 1;
  1636. }
  1637. /* Scan the suffix array from left to right. */
  1638. for(i = SA, j = SA + n, orig = SA; i < j; ++i) {
  1639. if(0 < (s = *i)) {
  1640. assert(T[s - 1] >= T[s]);
  1641. if ((s & mod) == 0) indexes[s / (mod + 1) - 1] = i - SA;
  1642. c0 = T[--s];
  1643. *i = c0;
  1644. if(c0 != c2) {
  1645. BUCKET_A(c2) = k - SA;
  1646. k = SA + BUCKET_A(c2 = c0);
  1647. }
  1648. assert(i < k);
  1649. if((0 < s) && (T[s - 1] < c0)) {
  1650. if ((s & mod) == 0) indexes[s / (mod + 1) - 1] = k - SA;
  1651. *k++ = ~((int)T[s - 1]);
  1652. } else
  1653. *k++ = s;
  1654. } else if(s != 0) {
  1655. *i = ~s;
  1656. } else {
  1657. orig = i;
  1658. }
  1659. }
  1660. return orig - SA;
  1661. }
  1662. /*---------------------------------------------------------------------------*/
  1663. /*- Function -*/
  1664. int
  1665. divsufsort(const unsigned char *T, int *SA, int n, int openMP) {
  1666. int *bucket_A, *bucket_B;
  1667. int m;
  1668. int err = 0;
  1669. /* Check arguments. */
  1670. if((T == NULL) || (SA == NULL) || (n < 0)) { return -1; }
  1671. else if(n == 0) { return 0; }
  1672. else if(n == 1) { SA[0] = 0; return 0; }
  1673. else if(n == 2) { m = (T[0] < T[1]); SA[m ^ 1] = 0, SA[m] = 1; return 0; }
  1674. bucket_A = (int *)malloc(BUCKET_A_SIZE * sizeof(int));
  1675. bucket_B = (int *)malloc(BUCKET_B_SIZE * sizeof(int));
  1676. /* Suffixsort. */
  1677. if((bucket_A != NULL) && (bucket_B != NULL)) {
  1678. m = sort_typeBstar(T, SA, bucket_A, bucket_B, n, openMP);
  1679. construct_SA(T, SA, bucket_A, bucket_B, n, m);
  1680. } else {
  1681. err = -2;
  1682. }
  1683. free(bucket_B);
  1684. free(bucket_A);
  1685. return err;
  1686. }
  1687. int
  1688. divbwt(const unsigned char *T, unsigned char *U, int *A, int n, unsigned char * num_indexes, int * indexes, int openMP) {
  1689. int *B;
  1690. int *bucket_A, *bucket_B;
  1691. int m, pidx, i;
  1692. /* Check arguments. */
  1693. if((T == NULL) || (U == NULL) || (n < 0)) { return -1; }
  1694. else if(n <= 1) { if(n == 1) { U[0] = T[0]; } return n; }
  1695. if((B = A) == NULL) { B = (int *)malloc((size_t)(n + 1) * sizeof(int)); }
  1696. bucket_A = (int *)malloc(BUCKET_A_SIZE * sizeof(int));
  1697. bucket_B = (int *)malloc(BUCKET_B_SIZE * sizeof(int));
  1698. /* Burrows-Wheeler Transform. */
  1699. if((B != NULL) && (bucket_A != NULL) && (bucket_B != NULL)) {
  1700. m = sort_typeBstar(T, B, bucket_A, bucket_B, n, openMP);
  1701. if (num_indexes == NULL || indexes == NULL) {
  1702. pidx = construct_BWT(T, B, bucket_A, bucket_B, n, m);
  1703. } else {
  1704. pidx = construct_BWT_indexes(T, B, bucket_A, bucket_B, n, m, num_indexes, indexes);
  1705. }
  1706. /* Copy to output string. */
  1707. U[0] = T[n - 1];
  1708. for(i = 0; i < pidx; ++i) { U[i + 1] = (unsigned char)B[i]; }
  1709. for(i += 1; i < n; ++i) { U[i] = (unsigned char)B[i]; }
  1710. pidx += 1;
  1711. } else {
  1712. pidx = -2;
  1713. }
  1714. free(bucket_B);
  1715. free(bucket_A);
  1716. if(A == NULL) { free(B); }
  1717. return pidx;
  1718. }