ubidiln.cpp 48 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347
  1. // © 2016 and later: Unicode, Inc. and others.
  2. // License & terms of use: http://www.unicode.org/copyright.html
  3. /*
  4. ******************************************************************************
  5. *
  6. * Copyright (C) 1999-2015, International Business Machines
  7. * Corporation and others. All Rights Reserved.
  8. *
  9. ******************************************************************************
  10. * file name: ubidiln.c
  11. * encoding: UTF-8
  12. * tab size: 8 (not used)
  13. * indentation:4
  14. *
  15. * created on: 1999aug06
  16. * created by: Markus W. Scherer, updated by Matitiahu Allouche
  17. */
  18. #include "cmemory.h"
  19. #include "unicode/utypes.h"
  20. #include "unicode/ustring.h"
  21. #include "unicode/uchar.h"
  22. #include "unicode/ubidi.h"
  23. #include "ubidiimp.h"
  24. #include "uassert.h"
  25. /*
  26. * General remarks about the functions in this file:
  27. *
  28. * These functions deal with the aspects of potentially mixed-directional
  29. * text in a single paragraph or in a line of a single paragraph
  30. * which has already been processed according to
  31. * the Unicode 6.3 BiDi algorithm as defined in
  32. * https://www.unicode.org/reports/tr9/ , version 28,
  33. * also described in The Unicode Standard, Version 6.3.0 .
  34. *
  35. * This means that there is a UBiDi object with a levels
  36. * and a dirProps array.
  37. * paraLevel and direction are also set.
  38. * Only if the length of the text is zero, then levels==dirProps==nullptr.
  39. *
  40. * The overall directionality of the paragraph
  41. * or line is used to bypass the reordering steps if possible.
  42. * Even purely RTL text does not need reordering there because
  43. * the ubidi_getLogical/VisualIndex() functions can compute the
  44. * index on the fly in such a case.
  45. *
  46. * The implementation of the access to same-level-runs and of the reordering
  47. * do attempt to provide better performance and less memory usage compared to
  48. * a direct implementation of especially rule (L2) with an array of
  49. * one (32-bit) integer per text character.
  50. *
  51. * Here, the levels array is scanned as soon as necessary, and a vector of
  52. * same-level-runs is created. Reordering then is done on this vector.
  53. * For each run of text positions that were resolved to the same level,
  54. * only 8 bytes are stored: the first text position of the run and the visual
  55. * position behind the run after reordering.
  56. * One sign bit is used to hold the directionality of the run.
  57. * This is inefficient if there are many very short runs. If the average run
  58. * length is <2, then this uses more memory.
  59. *
  60. * In a further attempt to save memory, the levels array is never changed
  61. * after all the resolution rules (Xn, Wn, Nn, In).
  62. * Many functions have to consider the field trailingWSStart:
  63. * if it is less than length, then there is an implicit trailing run
  64. * at the paraLevel,
  65. * which is not reflected in the levels array.
  66. * This allows a line UBiDi object to use the same levels array as
  67. * its paragraph parent object.
  68. *
  69. * When a UBiDi object is created for a line of a paragraph, then the
  70. * paragraph's levels and dirProps arrays are reused by way of setting
  71. * a pointer into them, not by copying. This again saves memory and forbids to
  72. * change the now shared levels for (L1).
  73. */
  74. /* handle trailing WS (L1) -------------------------------------------------- */
  75. /*
  76. * setTrailingWSStart() sets the start index for a trailing
  77. * run of WS in the line. This is necessary because we do not modify
  78. * the paragraph's levels array that we just point into.
  79. * Using trailingWSStart is another form of performing (L1).
  80. *
  81. * To make subsequent operations easier, we also include the run
  82. * before the WS if it is at the paraLevel - we merge the two here.
  83. *
  84. * This function is called only from ubidi_setLine(), so pBiDi->paraLevel is
  85. * set correctly for the line even when contextual multiple paragraphs.
  86. */
  87. static void
  88. setTrailingWSStart(UBiDi *pBiDi) {
  89. /* pBiDi->direction!=UBIDI_MIXED */
  90. const DirProp *dirProps=pBiDi->dirProps;
  91. UBiDiLevel *levels=pBiDi->levels;
  92. int32_t start=pBiDi->length;
  93. UBiDiLevel paraLevel=pBiDi->paraLevel;
  94. /* If the line is terminated by a block separator, all preceding WS etc...
  95. are already set to paragraph level.
  96. Setting trailingWSStart to pBidi->length will avoid changing the
  97. level of B chars from 0 to paraLevel in ubidi_getLevels when
  98. orderParagraphsLTR==true.
  99. */
  100. if(dirProps[start-1]==B) {
  101. pBiDi->trailingWSStart=start; /* currently == pBiDi->length */
  102. return;
  103. }
  104. /* go backwards across all WS, BN, explicit codes */
  105. while(start>0 && DIRPROP_FLAG(dirProps[start-1])&MASK_WS) {
  106. --start;
  107. }
  108. /* if the WS run can be merged with the previous run then do so here */
  109. while(start>0 && levels[start-1]==paraLevel) {
  110. --start;
  111. }
  112. pBiDi->trailingWSStart=start;
  113. }
  114. /* ubidi_setLine ------------------------------------------------------------ */
  115. U_CAPI void U_EXPORT2
  116. ubidi_setLine(const UBiDi *pParaBiDi,
  117. int32_t start, int32_t limit,
  118. UBiDi *pLineBiDi,
  119. UErrorCode *pErrorCode) {
  120. int32_t length;
  121. /* check the argument values */
  122. RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
  123. RETURN_VOID_IF_NOT_VALID_PARA(pParaBiDi, *pErrorCode);
  124. RETURN_VOID_IF_BAD_RANGE(start, 0, limit, *pErrorCode);
  125. RETURN_VOID_IF_BAD_RANGE(limit, 0, pParaBiDi->length+1, *pErrorCode);
  126. if(pLineBiDi==nullptr) {
  127. *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
  128. return;
  129. }
  130. if(ubidi_getParagraph(pParaBiDi, start, nullptr, nullptr, nullptr, pErrorCode) !=
  131. ubidi_getParagraph(pParaBiDi, limit-1, nullptr, nullptr, nullptr, pErrorCode)) {
  132. /* the line crosses a paragraph boundary */
  133. *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
  134. return;
  135. }
  136. /* set the values in pLineBiDi from its pParaBiDi parent */
  137. pLineBiDi->pParaBiDi=nullptr; /* mark unfinished setLine */
  138. pLineBiDi->text=pParaBiDi->text+start;
  139. length=pLineBiDi->length=limit-start;
  140. pLineBiDi->resultLength=pLineBiDi->originalLength=length;
  141. pLineBiDi->paraLevel=GET_PARALEVEL(pParaBiDi, start);
  142. pLineBiDi->paraCount=pParaBiDi->paraCount;
  143. pLineBiDi->runs=nullptr;
  144. pLineBiDi->flags=0;
  145. pLineBiDi->reorderingMode=pParaBiDi->reorderingMode;
  146. pLineBiDi->reorderingOptions=pParaBiDi->reorderingOptions;
  147. pLineBiDi->controlCount=0;
  148. if(pParaBiDi->controlCount>0) {
  149. int32_t j;
  150. for(j=start; j<limit; j++) {
  151. if(IS_BIDI_CONTROL_CHAR(pParaBiDi->text[j])) {
  152. pLineBiDi->controlCount++;
  153. }
  154. }
  155. pLineBiDi->resultLength-=pLineBiDi->controlCount;
  156. }
  157. pLineBiDi->dirProps=pParaBiDi->dirProps+start;
  158. pLineBiDi->levels=pParaBiDi->levels+start;
  159. pLineBiDi->runCount=-1;
  160. if(pParaBiDi->direction!=UBIDI_MIXED) {
  161. /* the parent is already trivial */
  162. pLineBiDi->direction=pParaBiDi->direction;
  163. /*
  164. * The parent's levels are all either
  165. * implicitly or explicitly ==paraLevel;
  166. * do the same here.
  167. */
  168. if(pParaBiDi->trailingWSStart<=start) {
  169. pLineBiDi->trailingWSStart=0;
  170. } else if(pParaBiDi->trailingWSStart<limit) {
  171. pLineBiDi->trailingWSStart=pParaBiDi->trailingWSStart-start;
  172. } else {
  173. pLineBiDi->trailingWSStart=length;
  174. }
  175. } else {
  176. const UBiDiLevel *levels=pLineBiDi->levels;
  177. int32_t i, trailingWSStart;
  178. UBiDiLevel level;
  179. setTrailingWSStart(pLineBiDi);
  180. trailingWSStart=pLineBiDi->trailingWSStart;
  181. /* recalculate pLineBiDi->direction */
  182. if(trailingWSStart==0) {
  183. /* all levels are at paraLevel */
  184. pLineBiDi->direction=(UBiDiDirection)(pLineBiDi->paraLevel&1);
  185. } else {
  186. /* get the level of the first character */
  187. level=(UBiDiLevel)(levels[0]&1);
  188. /* if there is anything of a different level, then the line is mixed */
  189. if(trailingWSStart<length && (pLineBiDi->paraLevel&1)!=level) {
  190. /* the trailing WS is at paraLevel, which differs from levels[0] */
  191. pLineBiDi->direction=UBIDI_MIXED;
  192. } else {
  193. /* see if levels[1..trailingWSStart-1] have the same direction as levels[0] and paraLevel */
  194. i=1;
  195. for(;;) {
  196. if(i==trailingWSStart) {
  197. /* the direction values match those in level */
  198. pLineBiDi->direction=(UBiDiDirection)level;
  199. break;
  200. } else if((levels[i]&1)!=level) {
  201. pLineBiDi->direction=UBIDI_MIXED;
  202. break;
  203. }
  204. ++i;
  205. }
  206. }
  207. }
  208. switch(pLineBiDi->direction) {
  209. case UBIDI_LTR:
  210. /* make sure paraLevel is even */
  211. pLineBiDi->paraLevel=(UBiDiLevel)((pLineBiDi->paraLevel+1)&~1);
  212. /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */
  213. pLineBiDi->trailingWSStart=0;
  214. break;
  215. case UBIDI_RTL:
  216. /* make sure paraLevel is odd */
  217. pLineBiDi->paraLevel|=1;
  218. /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */
  219. pLineBiDi->trailingWSStart=0;
  220. break;
  221. default:
  222. break;
  223. }
  224. }
  225. pLineBiDi->pParaBiDi=pParaBiDi; /* mark successful setLine */
  226. return;
  227. }
  228. U_CAPI UBiDiLevel U_EXPORT2
  229. ubidi_getLevelAt(const UBiDi *pBiDi, int32_t charIndex) {
  230. /* return paraLevel if in the trailing WS run, otherwise the real level */
  231. if(!IS_VALID_PARA_OR_LINE(pBiDi) || charIndex<0 || pBiDi->length<=charIndex) {
  232. return 0;
  233. } else if(pBiDi->direction!=UBIDI_MIXED || charIndex>=pBiDi->trailingWSStart) {
  234. return GET_PARALEVEL(pBiDi, charIndex);
  235. } else {
  236. return pBiDi->levels[charIndex];
  237. }
  238. }
  239. U_CAPI const UBiDiLevel * U_EXPORT2
  240. ubidi_getLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) {
  241. int32_t start, length;
  242. RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode, nullptr);
  243. RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode, nullptr);
  244. if((length=pBiDi->length)<=0) {
  245. *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
  246. return nullptr;
  247. }
  248. if((start=pBiDi->trailingWSStart)==length) {
  249. /* the current levels array reflects the WS run */
  250. return pBiDi->levels;
  251. }
  252. /*
  253. * After the previous if(), we know that the levels array
  254. * has an implicit trailing WS run and therefore does not fully
  255. * reflect itself all the levels.
  256. * This must be a UBiDi object for a line, and
  257. * we need to create a new levels array.
  258. */
  259. if(getLevelsMemory(pBiDi, length)) {
  260. UBiDiLevel *levels=pBiDi->levelsMemory;
  261. if(start>0 && levels!=pBiDi->levels) {
  262. uprv_memcpy(levels, pBiDi->levels, start);
  263. }
  264. /* pBiDi->paraLevel is ok even if contextual multiple paragraphs,
  265. since pBidi is a line object */
  266. uprv_memset(levels+start, pBiDi->paraLevel, length-start);
  267. /* this new levels array is set for the line and reflects the WS run */
  268. pBiDi->trailingWSStart=length;
  269. return pBiDi->levels=levels;
  270. } else {
  271. /* out of memory */
  272. *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
  273. return nullptr;
  274. }
  275. }
  276. U_CAPI void U_EXPORT2
  277. ubidi_getLogicalRun(const UBiDi *pBiDi, int32_t logicalPosition,
  278. int32_t *pLogicalLimit, UBiDiLevel *pLevel) {
  279. UErrorCode errorCode;
  280. int32_t runCount, visualStart, logicalLimit, logicalFirst, i;
  281. Run iRun;
  282. errorCode=U_ZERO_ERROR;
  283. RETURN_VOID_IF_BAD_RANGE(logicalPosition, 0, pBiDi->length, errorCode);
  284. /* ubidi_countRuns will check VALID_PARA_OR_LINE */
  285. runCount=ubidi_countRuns((UBiDi *)pBiDi, &errorCode);
  286. if(U_FAILURE(errorCode)) {
  287. return;
  288. }
  289. /* this is done based on runs rather than on levels since levels have
  290. a special interpretation when UBIDI_REORDER_RUNS_ONLY
  291. */
  292. visualStart=logicalLimit=0;
  293. iRun=pBiDi->runs[0];
  294. for(i=0; i<runCount; i++) {
  295. iRun = pBiDi->runs[i];
  296. logicalFirst=GET_INDEX(iRun.logicalStart);
  297. logicalLimit=logicalFirst+iRun.visualLimit-visualStart;
  298. if((logicalPosition>=logicalFirst) &&
  299. (logicalPosition<logicalLimit)) {
  300. break;
  301. }
  302. visualStart = iRun.visualLimit;
  303. }
  304. if(pLogicalLimit) {
  305. *pLogicalLimit=logicalLimit;
  306. }
  307. if(pLevel) {
  308. if(pBiDi->reorderingMode==UBIDI_REORDER_RUNS_ONLY) {
  309. *pLevel=(UBiDiLevel)GET_ODD_BIT(iRun.logicalStart);
  310. }
  311. else if(pBiDi->direction!=UBIDI_MIXED || logicalPosition>=pBiDi->trailingWSStart) {
  312. *pLevel=GET_PARALEVEL(pBiDi, logicalPosition);
  313. } else {
  314. *pLevel=pBiDi->levels[logicalPosition];
  315. }
  316. }
  317. }
  318. /* runs API functions ------------------------------------------------------- */
  319. U_CAPI int32_t U_EXPORT2
  320. ubidi_countRuns(UBiDi *pBiDi, UErrorCode *pErrorCode) {
  321. RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode, -1);
  322. RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode, -1);
  323. ubidi_getRuns(pBiDi, pErrorCode);
  324. if(U_FAILURE(*pErrorCode)) {
  325. return -1;
  326. }
  327. return pBiDi->runCount;
  328. }
  329. U_CAPI UBiDiDirection U_EXPORT2
  330. ubidi_getVisualRun(UBiDi *pBiDi, int32_t runIndex,
  331. int32_t *pLogicalStart, int32_t *pLength)
  332. {
  333. int32_t start;
  334. UErrorCode errorCode = U_ZERO_ERROR;
  335. RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi, errorCode, UBIDI_LTR);
  336. ubidi_getRuns(pBiDi, &errorCode);
  337. if(U_FAILURE(errorCode)) {
  338. return UBIDI_LTR;
  339. }
  340. RETURN_IF_BAD_RANGE(runIndex, 0, pBiDi->runCount, errorCode, UBIDI_LTR);
  341. start=pBiDi->runs[runIndex].logicalStart;
  342. if(pLogicalStart!=nullptr) {
  343. *pLogicalStart=GET_INDEX(start);
  344. }
  345. if(pLength!=nullptr) {
  346. if(runIndex>0) {
  347. *pLength=pBiDi->runs[runIndex].visualLimit-
  348. pBiDi->runs[runIndex-1].visualLimit;
  349. } else {
  350. *pLength=pBiDi->runs[0].visualLimit;
  351. }
  352. }
  353. return (UBiDiDirection)GET_ODD_BIT(start);
  354. }
  355. /* in trivial cases there is only one trivial run; called by ubidi_getRuns() */
  356. static void
  357. getSingleRun(UBiDi *pBiDi, UBiDiLevel level) {
  358. /* simple, single-run case */
  359. pBiDi->runs=pBiDi->simpleRuns;
  360. pBiDi->runCount=1;
  361. /* fill and reorder the single run */
  362. pBiDi->runs[0].logicalStart=MAKE_INDEX_ODD_PAIR(0, level);
  363. pBiDi->runs[0].visualLimit=pBiDi->length;
  364. pBiDi->runs[0].insertRemove=0;
  365. }
  366. /* reorder the runs array (L2) ---------------------------------------------- */
  367. /*
  368. * Reorder the same-level runs in the runs array.
  369. * Here, runCount>1 and maxLevel>=minLevel>=paraLevel.
  370. * All the visualStart fields=logical start before reordering.
  371. * The "odd" bits are not set yet.
  372. *
  373. * Reordering with this data structure lends itself to some handy shortcuts:
  374. *
  375. * Since each run is moved but not modified, and since at the initial maxLevel
  376. * each sequence of same-level runs consists of only one run each, we
  377. * don't need to do anything there and can predecrement maxLevel.
  378. * In many simple cases, the reordering is thus done entirely in the
  379. * index mapping.
  380. * Also, reordering occurs only down to the lowest odd level that occurs,
  381. * which is minLevel|1. However, if the lowest level itself is odd, then
  382. * in the last reordering the sequence of the runs at this level or higher
  383. * will be all runs, and we don't need the elaborate loop to search for them.
  384. * This is covered by ++minLevel instead of minLevel|=1 followed
  385. * by an extra reorder-all after the reorder-some loop.
  386. * About a trailing WS run:
  387. * Such a run would need special treatment because its level is not
  388. * reflected in levels[] if this is not a paragraph object.
  389. * Instead, all characters from trailingWSStart on are implicitly at
  390. * paraLevel.
  391. * However, for all maxLevel>paraLevel, this run will never be reordered
  392. * and does not need to be taken into account. maxLevel==paraLevel is only reordered
  393. * if minLevel==paraLevel is odd, which is done in the extra segment.
  394. * This means that for the main reordering loop we don't need to consider
  395. * this run and can --runCount. If it is later part of the all-runs
  396. * reordering, then runCount is adjusted accordingly.
  397. */
  398. static void
  399. reorderLine(UBiDi *pBiDi, UBiDiLevel minLevel, UBiDiLevel maxLevel) {
  400. Run *runs, tempRun;
  401. UBiDiLevel *levels;
  402. int32_t firstRun, endRun, limitRun, runCount;
  403. /* nothing to do? */
  404. if(maxLevel<=(minLevel|1)) {
  405. return;
  406. }
  407. /*
  408. * Reorder only down to the lowest odd level
  409. * and reorder at an odd minLevel in a separate, simpler loop.
  410. * See comments above for why minLevel is always incremented.
  411. */
  412. ++minLevel;
  413. runs=pBiDi->runs;
  414. levels=pBiDi->levels;
  415. runCount=pBiDi->runCount;
  416. /* do not include the WS run at paraLevel<=old minLevel except in the simple loop */
  417. if(pBiDi->trailingWSStart<pBiDi->length) {
  418. --runCount;
  419. }
  420. while(--maxLevel>=minLevel) {
  421. firstRun=0;
  422. /* loop for all sequences of runs */
  423. for(;;) {
  424. /* look for a sequence of runs that are all at >=maxLevel */
  425. /* look for the first run of such a sequence */
  426. while(firstRun<runCount && levels[runs[firstRun].logicalStart]<maxLevel) {
  427. ++firstRun;
  428. }
  429. if(firstRun>=runCount) {
  430. break; /* no more such runs */
  431. }
  432. /* look for the limit run of such a sequence (the run behind it) */
  433. for(limitRun=firstRun; ++limitRun<runCount && levels[runs[limitRun].logicalStart]>=maxLevel;) {}
  434. /* Swap the entire sequence of runs from firstRun to limitRun-1. */
  435. endRun=limitRun-1;
  436. while(firstRun<endRun) {
  437. tempRun = runs[firstRun];
  438. runs[firstRun]=runs[endRun];
  439. runs[endRun]=tempRun;
  440. ++firstRun;
  441. --endRun;
  442. }
  443. if(limitRun==runCount) {
  444. break; /* no more such runs */
  445. } else {
  446. firstRun=limitRun+1;
  447. }
  448. }
  449. }
  450. /* now do maxLevel==old minLevel (==odd!), see above */
  451. if(!(minLevel&1)) {
  452. firstRun=0;
  453. /* include the trailing WS run in this complete reordering */
  454. if(pBiDi->trailingWSStart==pBiDi->length) {
  455. --runCount;
  456. }
  457. /* Swap the entire sequence of all runs. (endRun==runCount) */
  458. while(firstRun<runCount) {
  459. tempRun=runs[firstRun];
  460. runs[firstRun]=runs[runCount];
  461. runs[runCount]=tempRun;
  462. ++firstRun;
  463. --runCount;
  464. }
  465. }
  466. }
  467. /* compute the runs array --------------------------------------------------- */
  468. static int32_t getRunFromLogicalIndex(UBiDi *pBiDi, int32_t logicalIndex) {
  469. Run *runs=pBiDi->runs;
  470. int32_t runCount=pBiDi->runCount, visualStart=0, i, length, logicalStart;
  471. for(i=0; i<runCount; i++) {
  472. length=runs[i].visualLimit-visualStart;
  473. logicalStart=GET_INDEX(runs[i].logicalStart);
  474. if((logicalIndex>=logicalStart) && (logicalIndex<(logicalStart+length))) {
  475. return i;
  476. }
  477. visualStart+=length;
  478. }
  479. /* we should never get here */
  480. UPRV_UNREACHABLE_EXIT;
  481. }
  482. /*
  483. * Compute the runs array from the levels array.
  484. * After ubidi_getRuns() returns true, runCount is guaranteed to be >0
  485. * and the runs are reordered.
  486. * Odd-level runs have visualStart on their visual right edge and
  487. * they progress visually to the left.
  488. * If option UBIDI_OPTION_INSERT_MARKS is set, insertRemove will contain the
  489. * sum of appropriate LRM/RLM_BEFORE/AFTER flags.
  490. * If option UBIDI_OPTION_REMOVE_CONTROLS is set, insertRemove will contain the
  491. * negative number of BiDi control characters within this run.
  492. */
  493. U_CFUNC UBool
  494. ubidi_getRuns(UBiDi *pBiDi, UErrorCode*) {
  495. /*
  496. * This method returns immediately if the runs are already set. This
  497. * includes the case of length==0 (handled in setPara)..
  498. */
  499. if (pBiDi->runCount>=0) {
  500. return true;
  501. }
  502. if(pBiDi->direction!=UBIDI_MIXED) {
  503. /* simple, single-run case - this covers length==0 */
  504. /* pBiDi->paraLevel is ok even for contextual multiple paragraphs */
  505. getSingleRun(pBiDi, pBiDi->paraLevel);
  506. } else /* UBIDI_MIXED, length>0 */ {
  507. /* mixed directionality */
  508. int32_t length=pBiDi->length, limit;
  509. UBiDiLevel *levels=pBiDi->levels;
  510. int32_t i, runCount;
  511. UBiDiLevel level=UBIDI_DEFAULT_LTR; /* initialize with no valid level */
  512. /*
  513. * If there are WS characters at the end of the line
  514. * and the run preceding them has a level different from
  515. * paraLevel, then they will form their own run at paraLevel (L1).
  516. * Count them separately.
  517. * We need some special treatment for this in order to not
  518. * modify the levels array which a line UBiDi object shares
  519. * with its paragraph parent and its other line siblings.
  520. * In other words, for the trailing WS, it may be
  521. * levels[]!=paraLevel but we have to treat it like it were so.
  522. */
  523. limit=pBiDi->trailingWSStart;
  524. /* count the runs, there is at least one non-WS run, and limit>0 */
  525. runCount=0;
  526. for(i=0; i<limit; ++i) {
  527. /* increment runCount at the start of each run */
  528. if(levels[i]!=level) {
  529. ++runCount;
  530. level=levels[i];
  531. }
  532. }
  533. /*
  534. * We don't need to see if the last run can be merged with a trailing
  535. * WS run because setTrailingWSStart() would have done that.
  536. */
  537. if(runCount==1 && limit==length) {
  538. /* There is only one non-WS run and no trailing WS-run. */
  539. getSingleRun(pBiDi, levels[0]);
  540. } else /* runCount>1 || limit<length */ {
  541. /* allocate and set the runs */
  542. Run *runs;
  543. int32_t runIndex, start;
  544. UBiDiLevel minLevel=UBIDI_MAX_EXPLICIT_LEVEL+1, maxLevel=0;
  545. /* now, count a (non-mergeable) WS run */
  546. if(limit<length) {
  547. ++runCount;
  548. }
  549. /* runCount>1 */
  550. if(getRunsMemory(pBiDi, runCount)) {
  551. runs=pBiDi->runsMemory;
  552. } else {
  553. return false;
  554. }
  555. /* set the runs */
  556. /* FOOD FOR THOUGHT: this could be optimized, e.g.:
  557. * 464->444, 484->444, 575->555, 595->555
  558. * However, that would take longer. Check also how it would
  559. * interact with BiDi control removal and inserting Marks.
  560. */
  561. runIndex=0;
  562. /* search for the run limits and initialize visualLimit values with the run lengths */
  563. i=0;
  564. do {
  565. /* prepare this run */
  566. start=i;
  567. level=levels[i];
  568. if(level<minLevel) {
  569. minLevel=level;
  570. }
  571. if(level>maxLevel) {
  572. maxLevel=level;
  573. }
  574. /* look for the run limit */
  575. while(++i<limit && levels[i]==level) {}
  576. /* i is another run limit */
  577. runs[runIndex].logicalStart=start;
  578. runs[runIndex].visualLimit=i-start;
  579. runs[runIndex].insertRemove=0;
  580. ++runIndex;
  581. } while(i<limit);
  582. if(limit<length) {
  583. /* there is a separate WS run */
  584. runs[runIndex].logicalStart=limit;
  585. runs[runIndex].visualLimit=length-limit;
  586. /* For the trailing WS run, pBiDi->paraLevel is ok even
  587. if contextual multiple paragraphs. */
  588. if(pBiDi->paraLevel<minLevel) {
  589. minLevel=pBiDi->paraLevel;
  590. }
  591. }
  592. /* set the object fields */
  593. pBiDi->runs=runs;
  594. pBiDi->runCount=runCount;
  595. reorderLine(pBiDi, minLevel, maxLevel);
  596. /* now add the direction flags and adjust the visualLimit's to be just that */
  597. /* this loop will also handle the trailing WS run */
  598. limit=0;
  599. for(i=0; i<runCount; ++i) {
  600. ADD_ODD_BIT_FROM_LEVEL(runs[i].logicalStart, levels[runs[i].logicalStart]);
  601. limit+=runs[i].visualLimit;
  602. runs[i].visualLimit=limit;
  603. }
  604. /* Set the "odd" bit for the trailing WS run. */
  605. /* For a RTL paragraph, it will be the *first* run in visual order. */
  606. /* For the trailing WS run, pBiDi->paraLevel is ok even if
  607. contextual multiple paragraphs. */
  608. if(runIndex<runCount) {
  609. int32_t trailingRun = ((pBiDi->paraLevel & 1) != 0)? 0 : runIndex;
  610. ADD_ODD_BIT_FROM_LEVEL(runs[trailingRun].logicalStart, pBiDi->paraLevel);
  611. }
  612. }
  613. }
  614. /* handle insert LRM/RLM BEFORE/AFTER run */
  615. if(pBiDi->insertPoints.size>0) {
  616. Point *point, *start=pBiDi->insertPoints.points,
  617. *limit=start+pBiDi->insertPoints.size;
  618. int32_t runIndex;
  619. for(point=start; point<limit; point++) {
  620. runIndex=getRunFromLogicalIndex(pBiDi, point->pos);
  621. pBiDi->runs[runIndex].insertRemove|=point->flag;
  622. }
  623. }
  624. /* handle remove BiDi control characters */
  625. if(pBiDi->controlCount>0) {
  626. int32_t runIndex;
  627. const char16_t *start=pBiDi->text, *limit=start+pBiDi->length, *pu;
  628. for(pu=start; pu<limit; pu++) {
  629. if(IS_BIDI_CONTROL_CHAR(*pu)) {
  630. runIndex=getRunFromLogicalIndex(pBiDi, (int32_t)(pu-start));
  631. pBiDi->runs[runIndex].insertRemove--;
  632. }
  633. }
  634. }
  635. return true;
  636. }
  637. static UBool
  638. prepareReorder(const UBiDiLevel *levels, int32_t length,
  639. int32_t *indexMap,
  640. UBiDiLevel *pMinLevel, UBiDiLevel *pMaxLevel) {
  641. int32_t start;
  642. UBiDiLevel level, minLevel, maxLevel;
  643. if(levels==nullptr || length<=0) {
  644. return false;
  645. }
  646. /* determine minLevel and maxLevel */
  647. minLevel=UBIDI_MAX_EXPLICIT_LEVEL+1;
  648. maxLevel=0;
  649. for(start=length; start>0;) {
  650. level=levels[--start];
  651. if(level>UBIDI_MAX_EXPLICIT_LEVEL+1) {
  652. return false;
  653. }
  654. if(level<minLevel) {
  655. minLevel=level;
  656. }
  657. if(level>maxLevel) {
  658. maxLevel=level;
  659. }
  660. }
  661. *pMinLevel=minLevel;
  662. *pMaxLevel=maxLevel;
  663. /* initialize the index map */
  664. for(start=length; start>0;) {
  665. --start;
  666. indexMap[start]=start;
  667. }
  668. return true;
  669. }
  670. /* reorder a line based on a levels array (L2) ------------------------------ */
  671. U_CAPI void U_EXPORT2
  672. ubidi_reorderLogical(const UBiDiLevel *levels, int32_t length, int32_t *indexMap) {
  673. int32_t start, limit, sumOfSosEos;
  674. UBiDiLevel minLevel = 0, maxLevel = 0;
  675. if(indexMap==nullptr || !prepareReorder(levels, length, indexMap, &minLevel, &maxLevel)) {
  676. return;
  677. }
  678. /* nothing to do? */
  679. if(minLevel==maxLevel && (minLevel&1)==0) {
  680. return;
  681. }
  682. /* reorder only down to the lowest odd level */
  683. minLevel|=1;
  684. /* loop maxLevel..minLevel */
  685. do {
  686. start=0;
  687. /* loop for all sequences of levels to reorder at the current maxLevel */
  688. for(;;) {
  689. /* look for a sequence of levels that are all at >=maxLevel */
  690. /* look for the first index of such a sequence */
  691. while(start<length && levels[start]<maxLevel) {
  692. ++start;
  693. }
  694. if(start>=length) {
  695. break; /* no more such sequences */
  696. }
  697. /* look for the limit of such a sequence (the index behind it) */
  698. for(limit=start; ++limit<length && levels[limit]>=maxLevel;) {}
  699. /*
  700. * sos=start of sequence, eos=end of sequence
  701. *
  702. * The closed (inclusive) interval from sos to eos includes all the logical
  703. * and visual indexes within this sequence. They are logically and
  704. * visually contiguous and in the same range.
  705. *
  706. * For each run, the new visual index=sos+eos-old visual index;
  707. * we pre-add sos+eos into sumOfSosEos ->
  708. * new visual index=sumOfSosEos-old visual index;
  709. */
  710. sumOfSosEos=start+limit-1;
  711. /* reorder each index in the sequence */
  712. do {
  713. indexMap[start]=sumOfSosEos-indexMap[start];
  714. } while(++start<limit);
  715. /* start==limit */
  716. if(limit==length) {
  717. break; /* no more such sequences */
  718. } else {
  719. start=limit+1;
  720. }
  721. }
  722. } while(--maxLevel>=minLevel);
  723. }
  724. U_CAPI void U_EXPORT2
  725. ubidi_reorderVisual(const UBiDiLevel *levels, int32_t length, int32_t *indexMap) {
  726. int32_t start, end, limit, temp;
  727. UBiDiLevel minLevel = 0, maxLevel = 0;
  728. if(indexMap==nullptr || !prepareReorder(levels, length, indexMap, &minLevel, &maxLevel)) {
  729. return;
  730. }
  731. /* nothing to do? */
  732. if(minLevel==maxLevel && (minLevel&1)==0) {
  733. return;
  734. }
  735. /* reorder only down to the lowest odd level */
  736. minLevel|=1;
  737. /* loop maxLevel..minLevel */
  738. do {
  739. start=0;
  740. /* loop for all sequences of levels to reorder at the current maxLevel */
  741. for(;;) {
  742. /* look for a sequence of levels that are all at >=maxLevel */
  743. /* look for the first index of such a sequence */
  744. while(start<length && levels[start]<maxLevel) {
  745. ++start;
  746. }
  747. if(start>=length) {
  748. break; /* no more such runs */
  749. }
  750. /* look for the limit of such a sequence (the index behind it) */
  751. for(limit=start; ++limit<length && levels[limit]>=maxLevel;) {}
  752. /*
  753. * Swap the entire interval of indexes from start to limit-1.
  754. * We don't need to swap the levels for the purpose of this
  755. * algorithm: the sequence of levels that we look at does not
  756. * move anyway.
  757. */
  758. end=limit-1;
  759. while(start<end) {
  760. temp=indexMap[start];
  761. indexMap[start]=indexMap[end];
  762. indexMap[end]=temp;
  763. ++start;
  764. --end;
  765. }
  766. if(limit==length) {
  767. break; /* no more such sequences */
  768. } else {
  769. start=limit+1;
  770. }
  771. }
  772. } while(--maxLevel>=minLevel);
  773. }
  774. /* API functions for logical<->visual mapping ------------------------------- */
  775. U_CAPI int32_t U_EXPORT2
  776. ubidi_getVisualIndex(UBiDi *pBiDi, int32_t logicalIndex, UErrorCode *pErrorCode) {
  777. int32_t visualIndex=UBIDI_MAP_NOWHERE;
  778. RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode, -1);
  779. RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode, -1);
  780. RETURN_IF_BAD_RANGE(logicalIndex, 0, pBiDi->length, *pErrorCode, -1);
  781. /* we can do the trivial cases without the runs array */
  782. switch(pBiDi->direction) {
  783. case UBIDI_LTR:
  784. visualIndex=logicalIndex;
  785. break;
  786. case UBIDI_RTL:
  787. visualIndex=pBiDi->length-logicalIndex-1;
  788. break;
  789. default:
  790. if(!ubidi_getRuns(pBiDi, pErrorCode)) {
  791. *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
  792. return -1;
  793. } else {
  794. Run *runs=pBiDi->runs;
  795. int32_t i, visualStart=0, offset, length;
  796. /* linear search for the run, search on the visual runs */
  797. for(i=0; i<pBiDi->runCount; ++i) {
  798. length=runs[i].visualLimit-visualStart;
  799. offset=logicalIndex-GET_INDEX(runs[i].logicalStart);
  800. if(offset>=0 && offset<length) {
  801. if(IS_EVEN_RUN(runs[i].logicalStart)) {
  802. /* LTR */
  803. visualIndex=visualStart+offset;
  804. } else {
  805. /* RTL */
  806. visualIndex=visualStart+length-offset-1;
  807. }
  808. break; /* exit for loop */
  809. }
  810. visualStart+=length;
  811. }
  812. if(i>=pBiDi->runCount) {
  813. return UBIDI_MAP_NOWHERE;
  814. }
  815. }
  816. }
  817. if(pBiDi->insertPoints.size>0) {
  818. /* add the number of added marks until the calculated visual index */
  819. Run *runs=pBiDi->runs;
  820. int32_t i, length, insertRemove;
  821. int32_t visualStart=0, markFound=0;
  822. for(i=0; ; i++, visualStart+=length) {
  823. length=runs[i].visualLimit-visualStart;
  824. insertRemove=runs[i].insertRemove;
  825. if(insertRemove & (LRM_BEFORE|RLM_BEFORE)) {
  826. markFound++;
  827. }
  828. /* is it the run containing the visual index? */
  829. if(visualIndex<runs[i].visualLimit) {
  830. return visualIndex+markFound;
  831. }
  832. if(insertRemove & (LRM_AFTER|RLM_AFTER)) {
  833. markFound++;
  834. }
  835. }
  836. }
  837. else if(pBiDi->controlCount>0) {
  838. /* subtract the number of controls until the calculated visual index */
  839. Run *runs=pBiDi->runs;
  840. int32_t i, j, start, limit, length, insertRemove;
  841. int32_t visualStart=0, controlFound=0;
  842. char16_t uchar=pBiDi->text[logicalIndex];
  843. /* is the logical index pointing to a control ? */
  844. if(IS_BIDI_CONTROL_CHAR(uchar)) {
  845. return UBIDI_MAP_NOWHERE;
  846. }
  847. /* loop on runs */
  848. for(i=0; ; i++, visualStart+=length) {
  849. length=runs[i].visualLimit-visualStart;
  850. insertRemove=runs[i].insertRemove;
  851. /* calculated visual index is beyond this run? */
  852. if(visualIndex>=runs[i].visualLimit) {
  853. controlFound-=insertRemove;
  854. continue;
  855. }
  856. /* calculated visual index must be within current run */
  857. if(insertRemove==0) {
  858. return visualIndex-controlFound;
  859. }
  860. if(IS_EVEN_RUN(runs[i].logicalStart)) {
  861. /* LTR: check from run start to logical index */
  862. start=runs[i].logicalStart;
  863. limit=logicalIndex;
  864. } else {
  865. /* RTL: check from logical index to run end */
  866. start=logicalIndex+1;
  867. limit=GET_INDEX(runs[i].logicalStart)+length;
  868. }
  869. for(j=start; j<limit; j++) {
  870. uchar=pBiDi->text[j];
  871. if(IS_BIDI_CONTROL_CHAR(uchar)) {
  872. controlFound++;
  873. }
  874. }
  875. return visualIndex-controlFound;
  876. }
  877. }
  878. return visualIndex;
  879. }
  880. U_CAPI int32_t U_EXPORT2
  881. ubidi_getLogicalIndex(UBiDi *pBiDi, int32_t visualIndex, UErrorCode *pErrorCode) {
  882. Run *runs;
  883. int32_t i, runCount, start;
  884. RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode, -1);
  885. RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode, -1);
  886. RETURN_IF_BAD_RANGE(visualIndex, 0, pBiDi->resultLength, *pErrorCode, -1);
  887. /* we can do the trivial cases without the runs array */
  888. if(pBiDi->insertPoints.size==0 && pBiDi->controlCount==0) {
  889. if(pBiDi->direction==UBIDI_LTR) {
  890. return visualIndex;
  891. }
  892. else if(pBiDi->direction==UBIDI_RTL) {
  893. return pBiDi->length-visualIndex-1;
  894. }
  895. }
  896. if(!ubidi_getRuns(pBiDi, pErrorCode)) {
  897. *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
  898. return -1;
  899. }
  900. runs=pBiDi->runs;
  901. runCount=pBiDi->runCount;
  902. if(pBiDi->insertPoints.size>0) {
  903. /* handle inserted LRM/RLM */
  904. int32_t markFound=0, insertRemove;
  905. int32_t visualStart=0, length;
  906. runs=pBiDi->runs;
  907. /* subtract number of marks until visual index */
  908. for(i=0; ; i++, visualStart+=length) {
  909. length=runs[i].visualLimit-visualStart;
  910. insertRemove=runs[i].insertRemove;
  911. if(insertRemove&(LRM_BEFORE|RLM_BEFORE)) {
  912. if(visualIndex<=(visualStart+markFound)) {
  913. return UBIDI_MAP_NOWHERE;
  914. }
  915. markFound++;
  916. }
  917. /* is adjusted visual index within this run? */
  918. if(visualIndex<(runs[i].visualLimit+markFound)) {
  919. visualIndex-=markFound;
  920. break;
  921. }
  922. if(insertRemove&(LRM_AFTER|RLM_AFTER)) {
  923. if(visualIndex==(visualStart+length+markFound)) {
  924. return UBIDI_MAP_NOWHERE;
  925. }
  926. markFound++;
  927. }
  928. }
  929. }
  930. else if(pBiDi->controlCount>0) {
  931. /* handle removed BiDi control characters */
  932. int32_t controlFound=0, insertRemove, length;
  933. int32_t logicalStart, logicalEnd, visualStart=0, j, k;
  934. char16_t uchar;
  935. UBool evenRun;
  936. /* add number of controls until visual index */
  937. for(i=0; ; i++, visualStart+=length) {
  938. length=runs[i].visualLimit-visualStart;
  939. insertRemove=runs[i].insertRemove;
  940. /* is adjusted visual index beyond current run? */
  941. if(visualIndex>=(runs[i].visualLimit-controlFound+insertRemove)) {
  942. controlFound-=insertRemove;
  943. continue;
  944. }
  945. /* adjusted visual index is within current run */
  946. if(insertRemove==0) {
  947. visualIndex+=controlFound;
  948. break;
  949. }
  950. /* count non-control chars until visualIndex */
  951. logicalStart=runs[i].logicalStart;
  952. evenRun=IS_EVEN_RUN(logicalStart);
  953. REMOVE_ODD_BIT(logicalStart);
  954. logicalEnd=logicalStart+length-1;
  955. for(j=0; j<length; j++) {
  956. k= evenRun ? logicalStart+j : logicalEnd-j;
  957. uchar=pBiDi->text[k];
  958. if(IS_BIDI_CONTROL_CHAR(uchar)) {
  959. controlFound++;
  960. }
  961. if((visualIndex+controlFound)==(visualStart+j)) {
  962. break;
  963. }
  964. }
  965. visualIndex+=controlFound;
  966. break;
  967. }
  968. }
  969. /* handle all cases */
  970. if(runCount<=10) {
  971. /* linear search for the run */
  972. for(i=0; visualIndex>=runs[i].visualLimit; ++i) {}
  973. } else {
  974. /* binary search for the run */
  975. int32_t begin=0, limit=runCount;
  976. /* the middle if() is guaranteed to find the run, we don't need a loop limit */
  977. for(;;) {
  978. i=(begin+limit)/2;
  979. if(visualIndex>=runs[i].visualLimit) {
  980. begin=i+1;
  981. } else if(i==0 || visualIndex>=runs[i-1].visualLimit) {
  982. break;
  983. } else {
  984. limit=i;
  985. }
  986. }
  987. }
  988. start=runs[i].logicalStart;
  989. if(IS_EVEN_RUN(start)) {
  990. /* LTR */
  991. /* the offset in runs[i] is visualIndex-runs[i-1].visualLimit */
  992. if(i>0) {
  993. visualIndex-=runs[i-1].visualLimit;
  994. }
  995. return start+visualIndex;
  996. } else {
  997. /* RTL */
  998. return GET_INDEX(start)+runs[i].visualLimit-visualIndex-1;
  999. }
  1000. }
  1001. U_CAPI void U_EXPORT2
  1002. ubidi_getLogicalMap(UBiDi *pBiDi, int32_t *indexMap, UErrorCode *pErrorCode) {
  1003. RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
  1004. /* ubidi_countRuns() checks for VALID_PARA_OR_LINE */
  1005. ubidi_countRuns(pBiDi, pErrorCode);
  1006. if(U_FAILURE(*pErrorCode)) {
  1007. /* no op */
  1008. } else if(indexMap==nullptr) {
  1009. *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
  1010. } else {
  1011. /* fill a logical-to-visual index map using the runs[] */
  1012. int32_t visualStart, visualLimit, i, j, k;
  1013. int32_t logicalStart, logicalLimit;
  1014. Run *runs=pBiDi->runs;
  1015. if (pBiDi->length<=0) {
  1016. return;
  1017. }
  1018. if (pBiDi->length>pBiDi->resultLength) {
  1019. uprv_memset(indexMap, 0xFF, pBiDi->length*sizeof(int32_t));
  1020. }
  1021. visualStart=0;
  1022. for(j=0; j<pBiDi->runCount; ++j) {
  1023. logicalStart=GET_INDEX(runs[j].logicalStart);
  1024. visualLimit=runs[j].visualLimit;
  1025. if(IS_EVEN_RUN(runs[j].logicalStart)) {
  1026. do { /* LTR */
  1027. indexMap[logicalStart++]=visualStart++;
  1028. } while(visualStart<visualLimit);
  1029. } else {
  1030. logicalStart+=visualLimit-visualStart; /* logicalLimit */
  1031. do { /* RTL */
  1032. indexMap[--logicalStart]=visualStart++;
  1033. } while(visualStart<visualLimit);
  1034. }
  1035. /* visualStart==visualLimit; */
  1036. }
  1037. if(pBiDi->insertPoints.size>0) {
  1038. int32_t markFound=0, runCount=pBiDi->runCount;
  1039. int32_t length, insertRemove;
  1040. visualStart=0;
  1041. /* add number of marks found until each index */
  1042. for(i=0; i<runCount; i++, visualStart+=length) {
  1043. length=runs[i].visualLimit-visualStart;
  1044. insertRemove=runs[i].insertRemove;
  1045. if(insertRemove&(LRM_BEFORE|RLM_BEFORE)) {
  1046. markFound++;
  1047. }
  1048. if(markFound>0) {
  1049. logicalStart=GET_INDEX(runs[i].logicalStart);
  1050. logicalLimit=logicalStart+length;
  1051. for(j=logicalStart; j<logicalLimit; j++) {
  1052. indexMap[j]+=markFound;
  1053. }
  1054. }
  1055. if(insertRemove&(LRM_AFTER|RLM_AFTER)) {
  1056. markFound++;
  1057. }
  1058. }
  1059. }
  1060. else if(pBiDi->controlCount>0) {
  1061. int32_t controlFound=0, runCount=pBiDi->runCount;
  1062. int32_t length, insertRemove;
  1063. UBool evenRun;
  1064. char16_t uchar;
  1065. visualStart=0;
  1066. /* subtract number of controls found until each index */
  1067. for(i=0; i<runCount; i++, visualStart+=length) {
  1068. length=runs[i].visualLimit-visualStart;
  1069. insertRemove=runs[i].insertRemove;
  1070. /* no control found within previous runs nor within this run */
  1071. if((controlFound-insertRemove)==0) {
  1072. continue;
  1073. }
  1074. logicalStart=runs[i].logicalStart;
  1075. evenRun=IS_EVEN_RUN(logicalStart);
  1076. REMOVE_ODD_BIT(logicalStart);
  1077. logicalLimit=logicalStart+length;
  1078. /* if no control within this run */
  1079. if(insertRemove==0) {
  1080. for(j=logicalStart; j<logicalLimit; j++) {
  1081. indexMap[j]-=controlFound;
  1082. }
  1083. continue;
  1084. }
  1085. for(j=0; j<length; j++) {
  1086. k= evenRun ? logicalStart+j : logicalLimit-j-1;
  1087. uchar=pBiDi->text[k];
  1088. if(IS_BIDI_CONTROL_CHAR(uchar)) {
  1089. controlFound++;
  1090. indexMap[k]=UBIDI_MAP_NOWHERE;
  1091. continue;
  1092. }
  1093. indexMap[k]-=controlFound;
  1094. }
  1095. }
  1096. }
  1097. }
  1098. }
  1099. U_CAPI void U_EXPORT2
  1100. ubidi_getVisualMap(UBiDi *pBiDi, int32_t *indexMap, UErrorCode *pErrorCode) {
  1101. RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
  1102. if(indexMap==nullptr) {
  1103. *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
  1104. return;
  1105. }
  1106. /* ubidi_countRuns() checks for VALID_PARA_OR_LINE */
  1107. ubidi_countRuns(pBiDi, pErrorCode);
  1108. if(U_SUCCESS(*pErrorCode)) {
  1109. /* fill a visual-to-logical index map using the runs[] */
  1110. Run *runs=pBiDi->runs, *runsLimit=runs+pBiDi->runCount;
  1111. int32_t logicalStart, visualStart, visualLimit, *pi=indexMap;
  1112. if (pBiDi->resultLength<=0) {
  1113. return;
  1114. }
  1115. visualStart=0;
  1116. for(; runs<runsLimit; ++runs) {
  1117. logicalStart=runs->logicalStart;
  1118. visualLimit=runs->visualLimit;
  1119. if(IS_EVEN_RUN(logicalStart)) {
  1120. do { /* LTR */
  1121. *pi++ = logicalStart++;
  1122. } while(++visualStart<visualLimit);
  1123. } else {
  1124. REMOVE_ODD_BIT(logicalStart);
  1125. logicalStart+=visualLimit-visualStart; /* logicalLimit */
  1126. do { /* RTL */
  1127. *pi++ = --logicalStart;
  1128. } while(++visualStart<visualLimit);
  1129. }
  1130. /* visualStart==visualLimit; */
  1131. }
  1132. if(pBiDi->insertPoints.size>0) {
  1133. int32_t markFound=0, runCount=pBiDi->runCount;
  1134. int32_t insertRemove, i, j, k;
  1135. runs=pBiDi->runs;
  1136. /* count all inserted marks */
  1137. for(i=0; i<runCount; i++) {
  1138. insertRemove=runs[i].insertRemove;
  1139. if(insertRemove&(LRM_BEFORE|RLM_BEFORE)) {
  1140. markFound++;
  1141. }
  1142. if(insertRemove&(LRM_AFTER|RLM_AFTER)) {
  1143. markFound++;
  1144. }
  1145. }
  1146. /* move back indexes by number of preceding marks */
  1147. k=pBiDi->resultLength;
  1148. for(i=runCount-1; i>=0 && markFound>0; i--) {
  1149. insertRemove=runs[i].insertRemove;
  1150. if(insertRemove&(LRM_AFTER|RLM_AFTER)) {
  1151. indexMap[--k]= UBIDI_MAP_NOWHERE;
  1152. markFound--;
  1153. }
  1154. visualStart= i>0 ? runs[i-1].visualLimit : 0;
  1155. for(j=runs[i].visualLimit-1; j>=visualStart && markFound>0; j--) {
  1156. indexMap[--k]=indexMap[j];
  1157. }
  1158. if(insertRemove&(LRM_BEFORE|RLM_BEFORE)) {
  1159. indexMap[--k]= UBIDI_MAP_NOWHERE;
  1160. markFound--;
  1161. }
  1162. }
  1163. }
  1164. else if(pBiDi->controlCount>0) {
  1165. int32_t runCount=pBiDi->runCount, logicalEnd;
  1166. int32_t insertRemove, length, i, j, k, m;
  1167. char16_t uchar;
  1168. UBool evenRun;
  1169. runs=pBiDi->runs;
  1170. visualStart=0;
  1171. /* move forward indexes by number of preceding controls */
  1172. k=0;
  1173. for(i=0; i<runCount; i++, visualStart+=length) {
  1174. length=runs[i].visualLimit-visualStart;
  1175. insertRemove=runs[i].insertRemove;
  1176. /* if no control found yet, nothing to do in this run */
  1177. if((insertRemove==0)&&(k==visualStart)) {
  1178. k+=length;
  1179. continue;
  1180. }
  1181. /* if no control in this run */
  1182. if(insertRemove==0) {
  1183. visualLimit=runs[i].visualLimit;
  1184. for(j=visualStart; j<visualLimit; j++) {
  1185. indexMap[k++]=indexMap[j];
  1186. }
  1187. continue;
  1188. }
  1189. logicalStart=runs[i].logicalStart;
  1190. evenRun=IS_EVEN_RUN(logicalStart);
  1191. REMOVE_ODD_BIT(logicalStart);
  1192. logicalEnd=logicalStart+length-1;
  1193. for(j=0; j<length; j++) {
  1194. m= evenRun ? logicalStart+j : logicalEnd-j;
  1195. uchar=pBiDi->text[m];
  1196. if(!IS_BIDI_CONTROL_CHAR(uchar)) {
  1197. indexMap[k++]=m;
  1198. }
  1199. }
  1200. }
  1201. }
  1202. }
  1203. }
  1204. U_CAPI void U_EXPORT2
  1205. ubidi_invertMap(const int32_t *srcMap, int32_t *destMap, int32_t length) {
  1206. if(srcMap!=nullptr && destMap!=nullptr && length>0) {
  1207. const int32_t *pi;
  1208. int32_t destLength=-1, count=0;
  1209. /* find highest value and count positive indexes in srcMap */
  1210. pi=srcMap+length;
  1211. while(pi>srcMap) {
  1212. if(*--pi>destLength) {
  1213. destLength=*pi;
  1214. }
  1215. if(*pi>=0) {
  1216. count++;
  1217. }
  1218. }
  1219. destLength++; /* add 1 for origin 0 */
  1220. if(count<destLength) {
  1221. /* we must fill unmatched destMap entries with -1 */
  1222. uprv_memset(destMap, 0xFF, destLength*sizeof(int32_t));
  1223. }
  1224. pi=srcMap+length;
  1225. while(length>0) {
  1226. if(*--pi>=0) {
  1227. destMap[*pi]=--length;
  1228. } else {
  1229. --length;
  1230. }
  1231. }
  1232. }
  1233. }