ubidiln.cpp 48 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346
  1. // © 2016 and later: Unicode, Inc. and others.
  2. // License & terms of use: http://www.unicode.org/copyright.html
  3. /*
  4. ******************************************************************************
  5. *
  6. * Copyright (C) 1999-2015, International Business Machines
  7. * Corporation and others. All Rights Reserved.
  8. *
  9. ******************************************************************************
  10. * file name: ubidiln.c
  11. * encoding: UTF-8
  12. * tab size: 8 (not used)
  13. * indentation:4
  14. *
  15. * created on: 1999aug06
  16. * created by: Markus W. Scherer, updated by Matitiahu Allouche
  17. */
  18. #include "cmemory.h"
  19. #include "unicode/utypes.h"
  20. #include "unicode/ustring.h"
  21. #include "unicode/uchar.h"
  22. #include "unicode/ubidi.h"
  23. #include "ubidiimp.h"
  24. #include "uassert.h"
  25. /*
  26. * General remarks about the functions in this file:
  27. *
  28. * These functions deal with the aspects of potentially mixed-directional
  29. * text in a single paragraph or in a line of a single paragraph
  30. * which has already been processed according to
  31. * the Unicode 6.3 BiDi algorithm as defined in
  32. * https://www.unicode.org/reports/tr9/ , version 28,
  33. * also described in The Unicode Standard, Version 6.3.0 .
  34. *
  35. * This means that there is a UBiDi object with a levels
  36. * and a dirProps array.
  37. * paraLevel and direction are also set.
  38. * Only if the length of the text is zero, then levels==dirProps==nullptr.
  39. *
  40. * The overall directionality of the paragraph
  41. * or line is used to bypass the reordering steps if possible.
  42. * Even purely RTL text does not need reordering there because
  43. * the ubidi_getLogical/VisualIndex() functions can compute the
  44. * index on the fly in such a case.
  45. *
  46. * The implementation of the access to same-level-runs and of the reordering
  47. * do attempt to provide better performance and less memory usage compared to
  48. * a direct implementation of especially rule (L2) with an array of
  49. * one (32-bit) integer per text character.
  50. *
  51. * Here, the levels array is scanned as soon as necessary, and a vector of
  52. * same-level-runs is created. Reordering then is done on this vector.
  53. * For each run of text positions that were resolved to the same level,
  54. * only 8 bytes are stored: the first text position of the run and the visual
  55. * position behind the run after reordering.
  56. * One sign bit is used to hold the directionality of the run.
  57. * This is inefficient if there are many very short runs. If the average run
  58. * length is <2, then this uses more memory.
  59. *
  60. * In a further attempt to save memory, the levels array is never changed
  61. * after all the resolution rules (Xn, Wn, Nn, In).
  62. * Many functions have to consider the field trailingWSStart:
  63. * if it is less than length, then there is an implicit trailing run
  64. * at the paraLevel,
  65. * which is not reflected in the levels array.
  66. * This allows a line UBiDi object to use the same levels array as
  67. * its paragraph parent object.
  68. *
  69. * When a UBiDi object is created for a line of a paragraph, then the
  70. * paragraph's levels and dirProps arrays are reused by way of setting
  71. * a pointer into them, not by copying. This again saves memory and forbids to
  72. * change the now shared levels for (L1).
  73. */
  74. /* handle trailing WS (L1) -------------------------------------------------- */
  75. /*
  76. * setTrailingWSStart() sets the start index for a trailing
  77. * run of WS in the line. This is necessary because we do not modify
  78. * the paragraph's levels array that we just point into.
  79. * Using trailingWSStart is another form of performing (L1).
  80. *
  81. * To make subsequent operations easier, we also include the run
  82. * before the WS if it is at the paraLevel - we merge the two here.
  83. *
  84. * This function is called only from ubidi_setLine(), so pBiDi->paraLevel is
  85. * set correctly for the line even when contextual multiple paragraphs.
  86. */
  87. static void
  88. setTrailingWSStart(UBiDi *pBiDi) {
  89. /* pBiDi->direction!=UBIDI_MIXED */
  90. const DirProp *dirProps=pBiDi->dirProps;
  91. UBiDiLevel *levels=pBiDi->levels;
  92. int32_t start=pBiDi->length;
  93. UBiDiLevel paraLevel=pBiDi->paraLevel;
  94. /* If the line is terminated by a block separator, all preceding WS etc...
  95. are already set to paragraph level.
  96. Setting trailingWSStart to pBidi->length will avoid changing the
  97. level of B chars from 0 to paraLevel in ubidi_getLevels when
  98. orderParagraphsLTR==true.
  99. */
  100. if(dirProps[start-1]==B) {
  101. pBiDi->trailingWSStart=start; /* currently == pBiDi->length */
  102. return;
  103. }
  104. /* go backwards across all WS, BN, explicit codes */
  105. while(start>0 && DIRPROP_FLAG(dirProps[start-1])&MASK_WS) {
  106. --start;
  107. }
  108. /* if the WS run can be merged with the previous run then do so here */
  109. while(start>0 && levels[start-1]==paraLevel) {
  110. --start;
  111. }
  112. pBiDi->trailingWSStart=start;
  113. }
  114. /* ubidi_setLine ------------------------------------------------------------ */
  115. U_CAPI void U_EXPORT2
  116. ubidi_setLine(const UBiDi *pParaBiDi,
  117. int32_t start, int32_t limit,
  118. UBiDi *pLineBiDi,
  119. UErrorCode *pErrorCode) {
  120. int32_t length;
  121. /* check the argument values */
  122. RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
  123. RETURN_VOID_IF_NOT_VALID_PARA(pParaBiDi, *pErrorCode);
  124. RETURN_VOID_IF_BAD_RANGE(start, 0, limit, *pErrorCode);
  125. RETURN_VOID_IF_BAD_RANGE(limit, 0, pParaBiDi->length+1, *pErrorCode);
  126. if(pLineBiDi==nullptr) {
  127. *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
  128. return;
  129. }
  130. if(ubidi_getParagraph(pParaBiDi, start, nullptr, nullptr, nullptr, pErrorCode) !=
  131. ubidi_getParagraph(pParaBiDi, limit-1, nullptr, nullptr, nullptr, pErrorCode)) {
  132. /* the line crosses a paragraph boundary */
  133. *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
  134. return;
  135. }
  136. /* set the values in pLineBiDi from its pParaBiDi parent */
  137. pLineBiDi->pParaBiDi=nullptr; /* mark unfinished setLine */
  138. pLineBiDi->text=pParaBiDi->text+start;
  139. length=pLineBiDi->length=limit-start;
  140. pLineBiDi->resultLength=pLineBiDi->originalLength=length;
  141. pLineBiDi->paraLevel=GET_PARALEVEL(pParaBiDi, start);
  142. pLineBiDi->paraCount=pParaBiDi->paraCount;
  143. pLineBiDi->runs=nullptr;
  144. pLineBiDi->flags=0;
  145. pLineBiDi->reorderingMode=pParaBiDi->reorderingMode;
  146. pLineBiDi->reorderingOptions=pParaBiDi->reorderingOptions;
  147. pLineBiDi->controlCount=0;
  148. if(pParaBiDi->controlCount>0) {
  149. int32_t j;
  150. for(j=start; j<limit; j++) {
  151. if(IS_BIDI_CONTROL_CHAR(pParaBiDi->text[j])) {
  152. pLineBiDi->controlCount++;
  153. }
  154. }
  155. pLineBiDi->resultLength-=pLineBiDi->controlCount;
  156. }
  157. pLineBiDi->dirProps=pParaBiDi->dirProps+start;
  158. pLineBiDi->levels=pParaBiDi->levels+start;
  159. pLineBiDi->runCount=-1;
  160. if(pParaBiDi->direction!=UBIDI_MIXED) {
  161. /* the parent is already trivial */
  162. pLineBiDi->direction=pParaBiDi->direction;
  163. /*
  164. * The parent's levels are all either
  165. * implicitly or explicitly ==paraLevel;
  166. * do the same here.
  167. */
  168. if(pParaBiDi->trailingWSStart<=start) {
  169. pLineBiDi->trailingWSStart=0;
  170. } else if(pParaBiDi->trailingWSStart<limit) {
  171. pLineBiDi->trailingWSStart=pParaBiDi->trailingWSStart-start;
  172. } else {
  173. pLineBiDi->trailingWSStart=length;
  174. }
  175. } else {
  176. const UBiDiLevel *levels=pLineBiDi->levels;
  177. int32_t i, trailingWSStart;
  178. UBiDiLevel level;
  179. setTrailingWSStart(pLineBiDi);
  180. trailingWSStart=pLineBiDi->trailingWSStart;
  181. /* recalculate pLineBiDi->direction */
  182. if(trailingWSStart==0) {
  183. /* all levels are at paraLevel */
  184. pLineBiDi->direction=(UBiDiDirection)(pLineBiDi->paraLevel&1);
  185. } else {
  186. /* get the level of the first character */
  187. level=(UBiDiLevel)(levels[0]&1);
  188. /* if there is anything of a different level, then the line is mixed */
  189. if(trailingWSStart<length && (pLineBiDi->paraLevel&1)!=level) {
  190. /* the trailing WS is at paraLevel, which differs from levels[0] */
  191. pLineBiDi->direction=UBIDI_MIXED;
  192. } else {
  193. /* see if levels[1..trailingWSStart-1] have the same direction as levels[0] and paraLevel */
  194. i=1;
  195. for(;;) {
  196. if(i==trailingWSStart) {
  197. /* the direction values match those in level */
  198. pLineBiDi->direction=(UBiDiDirection)level;
  199. break;
  200. } else if((levels[i]&1)!=level) {
  201. pLineBiDi->direction=UBIDI_MIXED;
  202. break;
  203. }
  204. ++i;
  205. }
  206. }
  207. }
  208. switch(pLineBiDi->direction) {
  209. case UBIDI_LTR:
  210. /* make sure paraLevel is even */
  211. pLineBiDi->paraLevel=(UBiDiLevel)((pLineBiDi->paraLevel+1)&~1);
  212. /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */
  213. pLineBiDi->trailingWSStart=0;
  214. break;
  215. case UBIDI_RTL:
  216. /* make sure paraLevel is odd */
  217. pLineBiDi->paraLevel|=1;
  218. /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */
  219. pLineBiDi->trailingWSStart=0;
  220. break;
  221. default:
  222. break;
  223. }
  224. }
  225. pLineBiDi->pParaBiDi=pParaBiDi; /* mark successful setLine */
  226. }
  227. U_CAPI UBiDiLevel U_EXPORT2
  228. ubidi_getLevelAt(const UBiDi *pBiDi, int32_t charIndex) {
  229. /* return paraLevel if in the trailing WS run, otherwise the real level */
  230. if(!IS_VALID_PARA_OR_LINE(pBiDi) || charIndex<0 || pBiDi->length<=charIndex) {
  231. return 0;
  232. } else if(pBiDi->direction!=UBIDI_MIXED || charIndex>=pBiDi->trailingWSStart) {
  233. return GET_PARALEVEL(pBiDi, charIndex);
  234. } else {
  235. return pBiDi->levels[charIndex];
  236. }
  237. }
  238. U_CAPI const UBiDiLevel * U_EXPORT2
  239. ubidi_getLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) {
  240. int32_t start, length;
  241. RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode, nullptr);
  242. RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode, nullptr);
  243. if((length=pBiDi->length)<=0) {
  244. *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
  245. return nullptr;
  246. }
  247. if((start=pBiDi->trailingWSStart)==length) {
  248. /* the current levels array reflects the WS run */
  249. return pBiDi->levels;
  250. }
  251. /*
  252. * After the previous if(), we know that the levels array
  253. * has an implicit trailing WS run and therefore does not fully
  254. * reflect itself all the levels.
  255. * This must be a UBiDi object for a line, and
  256. * we need to create a new levels array.
  257. */
  258. if(getLevelsMemory(pBiDi, length)) {
  259. UBiDiLevel *levels=pBiDi->levelsMemory;
  260. if(start>0 && levels!=pBiDi->levels) {
  261. uprv_memcpy(levels, pBiDi->levels, start);
  262. }
  263. /* pBiDi->paraLevel is ok even if contextual multiple paragraphs,
  264. since pBidi is a line object */
  265. uprv_memset(levels+start, pBiDi->paraLevel, length-start);
  266. /* this new levels array is set for the line and reflects the WS run */
  267. pBiDi->trailingWSStart=length;
  268. return pBiDi->levels=levels;
  269. } else {
  270. /* out of memory */
  271. *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
  272. return nullptr;
  273. }
  274. }
  275. U_CAPI void U_EXPORT2
  276. ubidi_getLogicalRun(const UBiDi *pBiDi, int32_t logicalPosition,
  277. int32_t *pLogicalLimit, UBiDiLevel *pLevel) {
  278. UErrorCode errorCode;
  279. int32_t runCount, visualStart, logicalLimit, logicalFirst, i;
  280. Run iRun;
  281. errorCode=U_ZERO_ERROR;
  282. RETURN_VOID_IF_BAD_RANGE(logicalPosition, 0, pBiDi->length, errorCode);
  283. /* ubidi_countRuns will check VALID_PARA_OR_LINE */
  284. runCount=ubidi_countRuns((UBiDi *)pBiDi, &errorCode);
  285. if(U_FAILURE(errorCode)) {
  286. return;
  287. }
  288. /* this is done based on runs rather than on levels since levels have
  289. a special interpretation when UBIDI_REORDER_RUNS_ONLY
  290. */
  291. visualStart=logicalLimit=0;
  292. iRun=pBiDi->runs[0];
  293. for(i=0; i<runCount; i++) {
  294. iRun = pBiDi->runs[i];
  295. logicalFirst=GET_INDEX(iRun.logicalStart);
  296. logicalLimit=logicalFirst+iRun.visualLimit-visualStart;
  297. if((logicalPosition>=logicalFirst) &&
  298. (logicalPosition<logicalLimit)) {
  299. break;
  300. }
  301. visualStart = iRun.visualLimit;
  302. }
  303. if(pLogicalLimit) {
  304. *pLogicalLimit=logicalLimit;
  305. }
  306. if(pLevel) {
  307. if(pBiDi->reorderingMode==UBIDI_REORDER_RUNS_ONLY) {
  308. *pLevel=(UBiDiLevel)GET_ODD_BIT(iRun.logicalStart);
  309. }
  310. else if(pBiDi->direction!=UBIDI_MIXED || logicalPosition>=pBiDi->trailingWSStart) {
  311. *pLevel=GET_PARALEVEL(pBiDi, logicalPosition);
  312. } else {
  313. *pLevel=pBiDi->levels[logicalPosition];
  314. }
  315. }
  316. }
  317. /* runs API functions ------------------------------------------------------- */
  318. U_CAPI int32_t U_EXPORT2
  319. ubidi_countRuns(UBiDi *pBiDi, UErrorCode *pErrorCode) {
  320. RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode, -1);
  321. RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode, -1);
  322. ubidi_getRuns(pBiDi, pErrorCode);
  323. if(U_FAILURE(*pErrorCode)) {
  324. return -1;
  325. }
  326. return pBiDi->runCount;
  327. }
  328. U_CAPI UBiDiDirection U_EXPORT2
  329. ubidi_getVisualRun(UBiDi *pBiDi, int32_t runIndex,
  330. int32_t *pLogicalStart, int32_t *pLength)
  331. {
  332. int32_t start;
  333. UErrorCode errorCode = U_ZERO_ERROR;
  334. RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi, errorCode, UBIDI_LTR);
  335. ubidi_getRuns(pBiDi, &errorCode);
  336. if(U_FAILURE(errorCode)) {
  337. return UBIDI_LTR;
  338. }
  339. RETURN_IF_BAD_RANGE(runIndex, 0, pBiDi->runCount, errorCode, UBIDI_LTR);
  340. start=pBiDi->runs[runIndex].logicalStart;
  341. if(pLogicalStart!=nullptr) {
  342. *pLogicalStart=GET_INDEX(start);
  343. }
  344. if(pLength!=nullptr) {
  345. if(runIndex>0) {
  346. *pLength=pBiDi->runs[runIndex].visualLimit-
  347. pBiDi->runs[runIndex-1].visualLimit;
  348. } else {
  349. *pLength=pBiDi->runs[0].visualLimit;
  350. }
  351. }
  352. return (UBiDiDirection)GET_ODD_BIT(start);
  353. }
  354. /* in trivial cases there is only one trivial run; called by ubidi_getRuns() */
  355. static void
  356. getSingleRun(UBiDi *pBiDi, UBiDiLevel level) {
  357. /* simple, single-run case */
  358. pBiDi->runs=pBiDi->simpleRuns;
  359. pBiDi->runCount=1;
  360. /* fill and reorder the single run */
  361. pBiDi->runs[0].logicalStart=MAKE_INDEX_ODD_PAIR(0, level);
  362. pBiDi->runs[0].visualLimit=pBiDi->length;
  363. pBiDi->runs[0].insertRemove=0;
  364. }
  365. /* reorder the runs array (L2) ---------------------------------------------- */
  366. /*
  367. * Reorder the same-level runs in the runs array.
  368. * Here, runCount>1 and maxLevel>=minLevel>=paraLevel.
  369. * All the visualStart fields=logical start before reordering.
  370. * The "odd" bits are not set yet.
  371. *
  372. * Reordering with this data structure lends itself to some handy shortcuts:
  373. *
  374. * Since each run is moved but not modified, and since at the initial maxLevel
  375. * each sequence of same-level runs consists of only one run each, we
  376. * don't need to do anything there and can predecrement maxLevel.
  377. * In many simple cases, the reordering is thus done entirely in the
  378. * index mapping.
  379. * Also, reordering occurs only down to the lowest odd level that occurs,
  380. * which is minLevel|1. However, if the lowest level itself is odd, then
  381. * in the last reordering the sequence of the runs at this level or higher
  382. * will be all runs, and we don't need the elaborate loop to search for them.
  383. * This is covered by ++minLevel instead of minLevel|=1 followed
  384. * by an extra reorder-all after the reorder-some loop.
  385. * About a trailing WS run:
  386. * Such a run would need special treatment because its level is not
  387. * reflected in levels[] if this is not a paragraph object.
  388. * Instead, all characters from trailingWSStart on are implicitly at
  389. * paraLevel.
  390. * However, for all maxLevel>paraLevel, this run will never be reordered
  391. * and does not need to be taken into account. maxLevel==paraLevel is only reordered
  392. * if minLevel==paraLevel is odd, which is done in the extra segment.
  393. * This means that for the main reordering loop we don't need to consider
  394. * this run and can --runCount. If it is later part of the all-runs
  395. * reordering, then runCount is adjusted accordingly.
  396. */
  397. static void
  398. reorderLine(UBiDi *pBiDi, UBiDiLevel minLevel, UBiDiLevel maxLevel) {
  399. Run *runs, tempRun;
  400. UBiDiLevel *levels;
  401. int32_t firstRun, endRun, limitRun, runCount;
  402. /* nothing to do? */
  403. if(maxLevel<=(minLevel|1)) {
  404. return;
  405. }
  406. /*
  407. * Reorder only down to the lowest odd level
  408. * and reorder at an odd minLevel in a separate, simpler loop.
  409. * See comments above for why minLevel is always incremented.
  410. */
  411. ++minLevel;
  412. runs=pBiDi->runs;
  413. levels=pBiDi->levels;
  414. runCount=pBiDi->runCount;
  415. /* do not include the WS run at paraLevel<=old minLevel except in the simple loop */
  416. if(pBiDi->trailingWSStart<pBiDi->length) {
  417. --runCount;
  418. }
  419. while(--maxLevel>=minLevel) {
  420. firstRun=0;
  421. /* loop for all sequences of runs */
  422. for(;;) {
  423. /* look for a sequence of runs that are all at >=maxLevel */
  424. /* look for the first run of such a sequence */
  425. while(firstRun<runCount && levels[runs[firstRun].logicalStart]<maxLevel) {
  426. ++firstRun;
  427. }
  428. if(firstRun>=runCount) {
  429. break; /* no more such runs */
  430. }
  431. /* look for the limit run of such a sequence (the run behind it) */
  432. for(limitRun=firstRun; ++limitRun<runCount && levels[runs[limitRun].logicalStart]>=maxLevel;) {}
  433. /* Swap the entire sequence of runs from firstRun to limitRun-1. */
  434. endRun=limitRun-1;
  435. while(firstRun<endRun) {
  436. tempRun = runs[firstRun];
  437. runs[firstRun]=runs[endRun];
  438. runs[endRun]=tempRun;
  439. ++firstRun;
  440. --endRun;
  441. }
  442. if(limitRun==runCount) {
  443. break; /* no more such runs */
  444. } else {
  445. firstRun=limitRun+1;
  446. }
  447. }
  448. }
  449. /* now do maxLevel==old minLevel (==odd!), see above */
  450. if(!(minLevel&1)) {
  451. firstRun=0;
  452. /* include the trailing WS run in this complete reordering */
  453. if(pBiDi->trailingWSStart==pBiDi->length) {
  454. --runCount;
  455. }
  456. /* Swap the entire sequence of all runs. (endRun==runCount) */
  457. while(firstRun<runCount) {
  458. tempRun=runs[firstRun];
  459. runs[firstRun]=runs[runCount];
  460. runs[runCount]=tempRun;
  461. ++firstRun;
  462. --runCount;
  463. }
  464. }
  465. }
  466. /* compute the runs array --------------------------------------------------- */
  467. static int32_t getRunFromLogicalIndex(UBiDi *pBiDi, int32_t logicalIndex) {
  468. Run *runs=pBiDi->runs;
  469. int32_t runCount=pBiDi->runCount, visualStart=0, i, length, logicalStart;
  470. for(i=0; i<runCount; i++) {
  471. length=runs[i].visualLimit-visualStart;
  472. logicalStart=GET_INDEX(runs[i].logicalStart);
  473. if((logicalIndex>=logicalStart) && (logicalIndex<(logicalStart+length))) {
  474. return i;
  475. }
  476. visualStart+=length;
  477. }
  478. /* we should never get here */
  479. UPRV_UNREACHABLE_EXIT;
  480. }
  481. /*
  482. * Compute the runs array from the levels array.
  483. * After ubidi_getRuns() returns true, runCount is guaranteed to be >0
  484. * and the runs are reordered.
  485. * Odd-level runs have visualStart on their visual right edge and
  486. * they progress visually to the left.
  487. * If option UBIDI_OPTION_INSERT_MARKS is set, insertRemove will contain the
  488. * sum of appropriate LRM/RLM_BEFORE/AFTER flags.
  489. * If option UBIDI_OPTION_REMOVE_CONTROLS is set, insertRemove will contain the
  490. * negative number of BiDi control characters within this run.
  491. */
  492. U_CFUNC UBool
  493. ubidi_getRuns(UBiDi *pBiDi, UErrorCode*) {
  494. /*
  495. * This method returns immediately if the runs are already set. This
  496. * includes the case of length==0 (handled in setPara)..
  497. */
  498. if (pBiDi->runCount>=0) {
  499. return true;
  500. }
  501. if(pBiDi->direction!=UBIDI_MIXED) {
  502. /* simple, single-run case - this covers length==0 */
  503. /* pBiDi->paraLevel is ok even for contextual multiple paragraphs */
  504. getSingleRun(pBiDi, pBiDi->paraLevel);
  505. } else /* UBIDI_MIXED, length>0 */ {
  506. /* mixed directionality */
  507. int32_t length=pBiDi->length, limit;
  508. UBiDiLevel *levels=pBiDi->levels;
  509. int32_t i, runCount;
  510. UBiDiLevel level=UBIDI_DEFAULT_LTR; /* initialize with no valid level */
  511. /*
  512. * If there are WS characters at the end of the line
  513. * and the run preceding them has a level different from
  514. * paraLevel, then they will form their own run at paraLevel (L1).
  515. * Count them separately.
  516. * We need some special treatment for this in order to not
  517. * modify the levels array which a line UBiDi object shares
  518. * with its paragraph parent and its other line siblings.
  519. * In other words, for the trailing WS, it may be
  520. * levels[]!=paraLevel but we have to treat it like it were so.
  521. */
  522. limit=pBiDi->trailingWSStart;
  523. /* count the runs, there is at least one non-WS run, and limit>0 */
  524. runCount=0;
  525. for(i=0; i<limit; ++i) {
  526. /* increment runCount at the start of each run */
  527. if(levels[i]!=level) {
  528. ++runCount;
  529. level=levels[i];
  530. }
  531. }
  532. /*
  533. * We don't need to see if the last run can be merged with a trailing
  534. * WS run because setTrailingWSStart() would have done that.
  535. */
  536. if(runCount==1 && limit==length) {
  537. /* There is only one non-WS run and no trailing WS-run. */
  538. getSingleRun(pBiDi, levels[0]);
  539. } else /* runCount>1 || limit<length */ {
  540. /* allocate and set the runs */
  541. Run *runs;
  542. int32_t runIndex, start;
  543. UBiDiLevel minLevel=UBIDI_MAX_EXPLICIT_LEVEL+1, maxLevel=0;
  544. /* now, count a (non-mergeable) WS run */
  545. if(limit<length) {
  546. ++runCount;
  547. }
  548. /* runCount>1 */
  549. if(getRunsMemory(pBiDi, runCount)) {
  550. runs=pBiDi->runsMemory;
  551. } else {
  552. return false;
  553. }
  554. /* set the runs */
  555. /* FOOD FOR THOUGHT: this could be optimized, e.g.:
  556. * 464->444, 484->444, 575->555, 595->555
  557. * However, that would take longer. Check also how it would
  558. * interact with BiDi control removal and inserting Marks.
  559. */
  560. runIndex=0;
  561. /* search for the run limits and initialize visualLimit values with the run lengths */
  562. i=0;
  563. do {
  564. /* prepare this run */
  565. start=i;
  566. level=levels[i];
  567. if(level<minLevel) {
  568. minLevel=level;
  569. }
  570. if(level>maxLevel) {
  571. maxLevel=level;
  572. }
  573. /* look for the run limit */
  574. while(++i<limit && levels[i]==level) {}
  575. /* i is another run limit */
  576. runs[runIndex].logicalStart=start;
  577. runs[runIndex].visualLimit=i-start;
  578. runs[runIndex].insertRemove=0;
  579. ++runIndex;
  580. } while(i<limit);
  581. if(limit<length) {
  582. /* there is a separate WS run */
  583. runs[runIndex].logicalStart=limit;
  584. runs[runIndex].visualLimit=length-limit;
  585. /* For the trailing WS run, pBiDi->paraLevel is ok even
  586. if contextual multiple paragraphs. */
  587. if(pBiDi->paraLevel<minLevel) {
  588. minLevel=pBiDi->paraLevel;
  589. }
  590. }
  591. /* set the object fields */
  592. pBiDi->runs=runs;
  593. pBiDi->runCount=runCount;
  594. reorderLine(pBiDi, minLevel, maxLevel);
  595. /* now add the direction flags and adjust the visualLimit's to be just that */
  596. /* this loop will also handle the trailing WS run */
  597. limit=0;
  598. for(i=0; i<runCount; ++i) {
  599. ADD_ODD_BIT_FROM_LEVEL(runs[i].logicalStart, levels[runs[i].logicalStart]);
  600. limit+=runs[i].visualLimit;
  601. runs[i].visualLimit=limit;
  602. }
  603. /* Set the "odd" bit for the trailing WS run. */
  604. /* For a RTL paragraph, it will be the *first* run in visual order. */
  605. /* For the trailing WS run, pBiDi->paraLevel is ok even if
  606. contextual multiple paragraphs. */
  607. if(runIndex<runCount) {
  608. int32_t trailingRun = ((pBiDi->paraLevel & 1) != 0)? 0 : runIndex;
  609. ADD_ODD_BIT_FROM_LEVEL(runs[trailingRun].logicalStart, pBiDi->paraLevel);
  610. }
  611. }
  612. }
  613. /* handle insert LRM/RLM BEFORE/AFTER run */
  614. if(pBiDi->insertPoints.size>0) {
  615. Point *point, *start=pBiDi->insertPoints.points,
  616. *limit=start+pBiDi->insertPoints.size;
  617. int32_t runIndex;
  618. for(point=start; point<limit; point++) {
  619. runIndex=getRunFromLogicalIndex(pBiDi, point->pos);
  620. pBiDi->runs[runIndex].insertRemove|=point->flag;
  621. }
  622. }
  623. /* handle remove BiDi control characters */
  624. if(pBiDi->controlCount>0) {
  625. int32_t runIndex;
  626. const char16_t *start=pBiDi->text, *limit=start+pBiDi->length, *pu;
  627. for(pu=start; pu<limit; pu++) {
  628. if(IS_BIDI_CONTROL_CHAR(*pu)) {
  629. runIndex=getRunFromLogicalIndex(pBiDi, (int32_t)(pu-start));
  630. pBiDi->runs[runIndex].insertRemove--;
  631. }
  632. }
  633. }
  634. return true;
  635. }
  636. static UBool
  637. prepareReorder(const UBiDiLevel *levels, int32_t length,
  638. int32_t *indexMap,
  639. UBiDiLevel *pMinLevel, UBiDiLevel *pMaxLevel) {
  640. int32_t start;
  641. UBiDiLevel level, minLevel, maxLevel;
  642. if(levels==nullptr || length<=0) {
  643. return false;
  644. }
  645. /* determine minLevel and maxLevel */
  646. minLevel=UBIDI_MAX_EXPLICIT_LEVEL+1;
  647. maxLevel=0;
  648. for(start=length; start>0;) {
  649. level=levels[--start];
  650. if(level>UBIDI_MAX_EXPLICIT_LEVEL+1) {
  651. return false;
  652. }
  653. if(level<minLevel) {
  654. minLevel=level;
  655. }
  656. if(level>maxLevel) {
  657. maxLevel=level;
  658. }
  659. }
  660. *pMinLevel=minLevel;
  661. *pMaxLevel=maxLevel;
  662. /* initialize the index map */
  663. for(start=length; start>0;) {
  664. --start;
  665. indexMap[start]=start;
  666. }
  667. return true;
  668. }
  669. /* reorder a line based on a levels array (L2) ------------------------------ */
  670. U_CAPI void U_EXPORT2
  671. ubidi_reorderLogical(const UBiDiLevel *levels, int32_t length, int32_t *indexMap) {
  672. int32_t start, limit, sumOfSosEos;
  673. UBiDiLevel minLevel = 0, maxLevel = 0;
  674. if(indexMap==nullptr || !prepareReorder(levels, length, indexMap, &minLevel, &maxLevel)) {
  675. return;
  676. }
  677. /* nothing to do? */
  678. if(minLevel==maxLevel && (minLevel&1)==0) {
  679. return;
  680. }
  681. /* reorder only down to the lowest odd level */
  682. minLevel|=1;
  683. /* loop maxLevel..minLevel */
  684. do {
  685. start=0;
  686. /* loop for all sequences of levels to reorder at the current maxLevel */
  687. for(;;) {
  688. /* look for a sequence of levels that are all at >=maxLevel */
  689. /* look for the first index of such a sequence */
  690. while(start<length && levels[start]<maxLevel) {
  691. ++start;
  692. }
  693. if(start>=length) {
  694. break; /* no more such sequences */
  695. }
  696. /* look for the limit of such a sequence (the index behind it) */
  697. for(limit=start; ++limit<length && levels[limit]>=maxLevel;) {}
  698. /*
  699. * sos=start of sequence, eos=end of sequence
  700. *
  701. * The closed (inclusive) interval from sos to eos includes all the logical
  702. * and visual indexes within this sequence. They are logically and
  703. * visually contiguous and in the same range.
  704. *
  705. * For each run, the new visual index=sos+eos-old visual index;
  706. * we pre-add sos+eos into sumOfSosEos ->
  707. * new visual index=sumOfSosEos-old visual index;
  708. */
  709. sumOfSosEos=start+limit-1;
  710. /* reorder each index in the sequence */
  711. do {
  712. indexMap[start]=sumOfSosEos-indexMap[start];
  713. } while(++start<limit);
  714. /* start==limit */
  715. if(limit==length) {
  716. break; /* no more such sequences */
  717. } else {
  718. start=limit+1;
  719. }
  720. }
  721. } while(--maxLevel>=minLevel);
  722. }
  723. U_CAPI void U_EXPORT2
  724. ubidi_reorderVisual(const UBiDiLevel *levels, int32_t length, int32_t *indexMap) {
  725. int32_t start, end, limit, temp;
  726. UBiDiLevel minLevel = 0, maxLevel = 0;
  727. if(indexMap==nullptr || !prepareReorder(levels, length, indexMap, &minLevel, &maxLevel)) {
  728. return;
  729. }
  730. /* nothing to do? */
  731. if(minLevel==maxLevel && (minLevel&1)==0) {
  732. return;
  733. }
  734. /* reorder only down to the lowest odd level */
  735. minLevel|=1;
  736. /* loop maxLevel..minLevel */
  737. do {
  738. start=0;
  739. /* loop for all sequences of levels to reorder at the current maxLevel */
  740. for(;;) {
  741. /* look for a sequence of levels that are all at >=maxLevel */
  742. /* look for the first index of such a sequence */
  743. while(start<length && levels[start]<maxLevel) {
  744. ++start;
  745. }
  746. if(start>=length) {
  747. break; /* no more such runs */
  748. }
  749. /* look for the limit of such a sequence (the index behind it) */
  750. for(limit=start; ++limit<length && levels[limit]>=maxLevel;) {}
  751. /*
  752. * Swap the entire interval of indexes from start to limit-1.
  753. * We don't need to swap the levels for the purpose of this
  754. * algorithm: the sequence of levels that we look at does not
  755. * move anyway.
  756. */
  757. end=limit-1;
  758. while(start<end) {
  759. temp=indexMap[start];
  760. indexMap[start]=indexMap[end];
  761. indexMap[end]=temp;
  762. ++start;
  763. --end;
  764. }
  765. if(limit==length) {
  766. break; /* no more such sequences */
  767. } else {
  768. start=limit+1;
  769. }
  770. }
  771. } while(--maxLevel>=minLevel);
  772. }
  773. /* API functions for logical<->visual mapping ------------------------------- */
  774. U_CAPI int32_t U_EXPORT2
  775. ubidi_getVisualIndex(UBiDi *pBiDi, int32_t logicalIndex, UErrorCode *pErrorCode) {
  776. int32_t visualIndex=UBIDI_MAP_NOWHERE;
  777. RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode, -1);
  778. RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode, -1);
  779. RETURN_IF_BAD_RANGE(logicalIndex, 0, pBiDi->length, *pErrorCode, -1);
  780. /* we can do the trivial cases without the runs array */
  781. switch(pBiDi->direction) {
  782. case UBIDI_LTR:
  783. visualIndex=logicalIndex;
  784. break;
  785. case UBIDI_RTL:
  786. visualIndex=pBiDi->length-logicalIndex-1;
  787. break;
  788. default:
  789. if(!ubidi_getRuns(pBiDi, pErrorCode)) {
  790. *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
  791. return -1;
  792. } else {
  793. Run *runs=pBiDi->runs;
  794. int32_t i, visualStart=0, offset, length;
  795. /* linear search for the run, search on the visual runs */
  796. for(i=0; i<pBiDi->runCount; ++i) {
  797. length=runs[i].visualLimit-visualStart;
  798. offset=logicalIndex-GET_INDEX(runs[i].logicalStart);
  799. if(offset>=0 && offset<length) {
  800. if(IS_EVEN_RUN(runs[i].logicalStart)) {
  801. /* LTR */
  802. visualIndex=visualStart+offset;
  803. } else {
  804. /* RTL */
  805. visualIndex=visualStart+length-offset-1;
  806. }
  807. break; /* exit for loop */
  808. }
  809. visualStart+=length;
  810. }
  811. if(i>=pBiDi->runCount) {
  812. return UBIDI_MAP_NOWHERE;
  813. }
  814. }
  815. }
  816. if(pBiDi->insertPoints.size>0) {
  817. /* add the number of added marks until the calculated visual index */
  818. Run *runs=pBiDi->runs;
  819. int32_t i, length, insertRemove;
  820. int32_t visualStart=0, markFound=0;
  821. for(i=0; ; i++, visualStart+=length) {
  822. length=runs[i].visualLimit-visualStart;
  823. insertRemove=runs[i].insertRemove;
  824. if(insertRemove & (LRM_BEFORE|RLM_BEFORE)) {
  825. markFound++;
  826. }
  827. /* is it the run containing the visual index? */
  828. if(visualIndex<runs[i].visualLimit) {
  829. return visualIndex+markFound;
  830. }
  831. if(insertRemove & (LRM_AFTER|RLM_AFTER)) {
  832. markFound++;
  833. }
  834. }
  835. }
  836. else if(pBiDi->controlCount>0) {
  837. /* subtract the number of controls until the calculated visual index */
  838. Run *runs=pBiDi->runs;
  839. int32_t i, j, start, limit, length, insertRemove;
  840. int32_t visualStart=0, controlFound=0;
  841. char16_t uchar=pBiDi->text[logicalIndex];
  842. /* is the logical index pointing to a control ? */
  843. if(IS_BIDI_CONTROL_CHAR(uchar)) {
  844. return UBIDI_MAP_NOWHERE;
  845. }
  846. /* loop on runs */
  847. for(i=0; ; i++, visualStart+=length) {
  848. length=runs[i].visualLimit-visualStart;
  849. insertRemove=runs[i].insertRemove;
  850. /* calculated visual index is beyond this run? */
  851. if(visualIndex>=runs[i].visualLimit) {
  852. controlFound-=insertRemove;
  853. continue;
  854. }
  855. /* calculated visual index must be within current run */
  856. if(insertRemove==0) {
  857. return visualIndex-controlFound;
  858. }
  859. if(IS_EVEN_RUN(runs[i].logicalStart)) {
  860. /* LTR: check from run start to logical index */
  861. start=runs[i].logicalStart;
  862. limit=logicalIndex;
  863. } else {
  864. /* RTL: check from logical index to run end */
  865. start=logicalIndex+1;
  866. limit=GET_INDEX(runs[i].logicalStart)+length;
  867. }
  868. for(j=start; j<limit; j++) {
  869. uchar=pBiDi->text[j];
  870. if(IS_BIDI_CONTROL_CHAR(uchar)) {
  871. controlFound++;
  872. }
  873. }
  874. return visualIndex-controlFound;
  875. }
  876. }
  877. return visualIndex;
  878. }
  879. U_CAPI int32_t U_EXPORT2
  880. ubidi_getLogicalIndex(UBiDi *pBiDi, int32_t visualIndex, UErrorCode *pErrorCode) {
  881. Run *runs;
  882. int32_t i, runCount, start;
  883. RETURN_IF_NULL_OR_FAILING_ERRCODE(pErrorCode, -1);
  884. RETURN_IF_NOT_VALID_PARA_OR_LINE(pBiDi, *pErrorCode, -1);
  885. RETURN_IF_BAD_RANGE(visualIndex, 0, pBiDi->resultLength, *pErrorCode, -1);
  886. /* we can do the trivial cases without the runs array */
  887. if(pBiDi->insertPoints.size==0 && pBiDi->controlCount==0) {
  888. if(pBiDi->direction==UBIDI_LTR) {
  889. return visualIndex;
  890. }
  891. else if(pBiDi->direction==UBIDI_RTL) {
  892. return pBiDi->length-visualIndex-1;
  893. }
  894. }
  895. if(!ubidi_getRuns(pBiDi, pErrorCode)) {
  896. *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
  897. return -1;
  898. }
  899. runs=pBiDi->runs;
  900. runCount=pBiDi->runCount;
  901. if(pBiDi->insertPoints.size>0) {
  902. /* handle inserted LRM/RLM */
  903. int32_t markFound=0, insertRemove;
  904. int32_t visualStart=0, length;
  905. runs=pBiDi->runs;
  906. /* subtract number of marks until visual index */
  907. for(i=0; ; i++, visualStart+=length) {
  908. length=runs[i].visualLimit-visualStart;
  909. insertRemove=runs[i].insertRemove;
  910. if(insertRemove&(LRM_BEFORE|RLM_BEFORE)) {
  911. if(visualIndex<=(visualStart+markFound)) {
  912. return UBIDI_MAP_NOWHERE;
  913. }
  914. markFound++;
  915. }
  916. /* is adjusted visual index within this run? */
  917. if(visualIndex<(runs[i].visualLimit+markFound)) {
  918. visualIndex-=markFound;
  919. break;
  920. }
  921. if(insertRemove&(LRM_AFTER|RLM_AFTER)) {
  922. if(visualIndex==(visualStart+length+markFound)) {
  923. return UBIDI_MAP_NOWHERE;
  924. }
  925. markFound++;
  926. }
  927. }
  928. }
  929. else if(pBiDi->controlCount>0) {
  930. /* handle removed BiDi control characters */
  931. int32_t controlFound=0, insertRemove, length;
  932. int32_t logicalStart, logicalEnd, visualStart=0, j, k;
  933. char16_t uchar;
  934. UBool evenRun;
  935. /* add number of controls until visual index */
  936. for(i=0; ; i++, visualStart+=length) {
  937. length=runs[i].visualLimit-visualStart;
  938. insertRemove=runs[i].insertRemove;
  939. /* is adjusted visual index beyond current run? */
  940. if(visualIndex>=(runs[i].visualLimit-controlFound+insertRemove)) {
  941. controlFound-=insertRemove;
  942. continue;
  943. }
  944. /* adjusted visual index is within current run */
  945. if(insertRemove==0) {
  946. visualIndex+=controlFound;
  947. break;
  948. }
  949. /* count non-control chars until visualIndex */
  950. logicalStart=runs[i].logicalStart;
  951. evenRun=IS_EVEN_RUN(logicalStart);
  952. REMOVE_ODD_BIT(logicalStart);
  953. logicalEnd=logicalStart+length-1;
  954. for(j=0; j<length; j++) {
  955. k= evenRun ? logicalStart+j : logicalEnd-j;
  956. uchar=pBiDi->text[k];
  957. if(IS_BIDI_CONTROL_CHAR(uchar)) {
  958. controlFound++;
  959. }
  960. if((visualIndex+controlFound)==(visualStart+j)) {
  961. break;
  962. }
  963. }
  964. visualIndex+=controlFound;
  965. break;
  966. }
  967. }
  968. /* handle all cases */
  969. if(runCount<=10) {
  970. /* linear search for the run */
  971. for(i=0; visualIndex>=runs[i].visualLimit; ++i) {}
  972. } else {
  973. /* binary search for the run */
  974. int32_t begin=0, limit=runCount;
  975. /* the middle if() is guaranteed to find the run, we don't need a loop limit */
  976. for(;;) {
  977. i=(begin+limit)/2;
  978. if(visualIndex>=runs[i].visualLimit) {
  979. begin=i+1;
  980. } else if(i==0 || visualIndex>=runs[i-1].visualLimit) {
  981. break;
  982. } else {
  983. limit=i;
  984. }
  985. }
  986. }
  987. start=runs[i].logicalStart;
  988. if(IS_EVEN_RUN(start)) {
  989. /* LTR */
  990. /* the offset in runs[i] is visualIndex-runs[i-1].visualLimit */
  991. if(i>0) {
  992. visualIndex-=runs[i-1].visualLimit;
  993. }
  994. return start+visualIndex;
  995. } else {
  996. /* RTL */
  997. return GET_INDEX(start)+runs[i].visualLimit-visualIndex-1;
  998. }
  999. }
  1000. U_CAPI void U_EXPORT2
  1001. ubidi_getLogicalMap(UBiDi *pBiDi, int32_t *indexMap, UErrorCode *pErrorCode) {
  1002. RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
  1003. /* ubidi_countRuns() checks for VALID_PARA_OR_LINE */
  1004. ubidi_countRuns(pBiDi, pErrorCode);
  1005. if(U_FAILURE(*pErrorCode)) {
  1006. /* no op */
  1007. } else if(indexMap==nullptr) {
  1008. *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
  1009. } else {
  1010. /* fill a logical-to-visual index map using the runs[] */
  1011. int32_t visualStart, visualLimit, i, j, k;
  1012. int32_t logicalStart, logicalLimit;
  1013. Run *runs=pBiDi->runs;
  1014. if (pBiDi->length<=0) {
  1015. return;
  1016. }
  1017. if (pBiDi->length>pBiDi->resultLength) {
  1018. uprv_memset(indexMap, 0xFF, pBiDi->length*sizeof(int32_t));
  1019. }
  1020. visualStart=0;
  1021. for(j=0; j<pBiDi->runCount; ++j) {
  1022. logicalStart=GET_INDEX(runs[j].logicalStart);
  1023. visualLimit=runs[j].visualLimit;
  1024. if(IS_EVEN_RUN(runs[j].logicalStart)) {
  1025. do { /* LTR */
  1026. indexMap[logicalStart++]=visualStart++;
  1027. } while(visualStart<visualLimit);
  1028. } else {
  1029. logicalStart+=visualLimit-visualStart; /* logicalLimit */
  1030. do { /* RTL */
  1031. indexMap[--logicalStart]=visualStart++;
  1032. } while(visualStart<visualLimit);
  1033. }
  1034. /* visualStart==visualLimit; */
  1035. }
  1036. if(pBiDi->insertPoints.size>0) {
  1037. int32_t markFound=0, runCount=pBiDi->runCount;
  1038. int32_t length, insertRemove;
  1039. visualStart=0;
  1040. /* add number of marks found until each index */
  1041. for(i=0; i<runCount; i++, visualStart+=length) {
  1042. length=runs[i].visualLimit-visualStart;
  1043. insertRemove=runs[i].insertRemove;
  1044. if(insertRemove&(LRM_BEFORE|RLM_BEFORE)) {
  1045. markFound++;
  1046. }
  1047. if(markFound>0) {
  1048. logicalStart=GET_INDEX(runs[i].logicalStart);
  1049. logicalLimit=logicalStart+length;
  1050. for(j=logicalStart; j<logicalLimit; j++) {
  1051. indexMap[j]+=markFound;
  1052. }
  1053. }
  1054. if(insertRemove&(LRM_AFTER|RLM_AFTER)) {
  1055. markFound++;
  1056. }
  1057. }
  1058. }
  1059. else if(pBiDi->controlCount>0) {
  1060. int32_t controlFound=0, runCount=pBiDi->runCount;
  1061. int32_t length, insertRemove;
  1062. UBool evenRun;
  1063. char16_t uchar;
  1064. visualStart=0;
  1065. /* subtract number of controls found until each index */
  1066. for(i=0; i<runCount; i++, visualStart+=length) {
  1067. length=runs[i].visualLimit-visualStart;
  1068. insertRemove=runs[i].insertRemove;
  1069. /* no control found within previous runs nor within this run */
  1070. if((controlFound-insertRemove)==0) {
  1071. continue;
  1072. }
  1073. logicalStart=runs[i].logicalStart;
  1074. evenRun=IS_EVEN_RUN(logicalStart);
  1075. REMOVE_ODD_BIT(logicalStart);
  1076. logicalLimit=logicalStart+length;
  1077. /* if no control within this run */
  1078. if(insertRemove==0) {
  1079. for(j=logicalStart; j<logicalLimit; j++) {
  1080. indexMap[j]-=controlFound;
  1081. }
  1082. continue;
  1083. }
  1084. for(j=0; j<length; j++) {
  1085. k= evenRun ? logicalStart+j : logicalLimit-j-1;
  1086. uchar=pBiDi->text[k];
  1087. if(IS_BIDI_CONTROL_CHAR(uchar)) {
  1088. controlFound++;
  1089. indexMap[k]=UBIDI_MAP_NOWHERE;
  1090. continue;
  1091. }
  1092. indexMap[k]-=controlFound;
  1093. }
  1094. }
  1095. }
  1096. }
  1097. }
  1098. U_CAPI void U_EXPORT2
  1099. ubidi_getVisualMap(UBiDi *pBiDi, int32_t *indexMap, UErrorCode *pErrorCode) {
  1100. RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode);
  1101. if(indexMap==nullptr) {
  1102. *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
  1103. return;
  1104. }
  1105. /* ubidi_countRuns() checks for VALID_PARA_OR_LINE */
  1106. ubidi_countRuns(pBiDi, pErrorCode);
  1107. if(U_SUCCESS(*pErrorCode)) {
  1108. /* fill a visual-to-logical index map using the runs[] */
  1109. Run *runs=pBiDi->runs, *runsLimit=runs+pBiDi->runCount;
  1110. int32_t logicalStart, visualStart, visualLimit, *pi=indexMap;
  1111. if (pBiDi->resultLength<=0) {
  1112. return;
  1113. }
  1114. visualStart=0;
  1115. for(; runs<runsLimit; ++runs) {
  1116. logicalStart=runs->logicalStart;
  1117. visualLimit=runs->visualLimit;
  1118. if(IS_EVEN_RUN(logicalStart)) {
  1119. do { /* LTR */
  1120. *pi++ = logicalStart++;
  1121. } while(++visualStart<visualLimit);
  1122. } else {
  1123. REMOVE_ODD_BIT(logicalStart);
  1124. logicalStart+=visualLimit-visualStart; /* logicalLimit */
  1125. do { /* RTL */
  1126. *pi++ = --logicalStart;
  1127. } while(++visualStart<visualLimit);
  1128. }
  1129. /* visualStart==visualLimit; */
  1130. }
  1131. if(pBiDi->insertPoints.size>0) {
  1132. int32_t markFound=0, runCount=pBiDi->runCount;
  1133. int32_t insertRemove, i, j, k;
  1134. runs=pBiDi->runs;
  1135. /* count all inserted marks */
  1136. for(i=0; i<runCount; i++) {
  1137. insertRemove=runs[i].insertRemove;
  1138. if(insertRemove&(LRM_BEFORE|RLM_BEFORE)) {
  1139. markFound++;
  1140. }
  1141. if(insertRemove&(LRM_AFTER|RLM_AFTER)) {
  1142. markFound++;
  1143. }
  1144. }
  1145. /* move back indexes by number of preceding marks */
  1146. k=pBiDi->resultLength;
  1147. for(i=runCount-1; i>=0 && markFound>0; i--) {
  1148. insertRemove=runs[i].insertRemove;
  1149. if(insertRemove&(LRM_AFTER|RLM_AFTER)) {
  1150. indexMap[--k]= UBIDI_MAP_NOWHERE;
  1151. markFound--;
  1152. }
  1153. visualStart= i>0 ? runs[i-1].visualLimit : 0;
  1154. for(j=runs[i].visualLimit-1; j>=visualStart && markFound>0; j--) {
  1155. indexMap[--k]=indexMap[j];
  1156. }
  1157. if(insertRemove&(LRM_BEFORE|RLM_BEFORE)) {
  1158. indexMap[--k]= UBIDI_MAP_NOWHERE;
  1159. markFound--;
  1160. }
  1161. }
  1162. }
  1163. else if(pBiDi->controlCount>0) {
  1164. int32_t runCount=pBiDi->runCount, logicalEnd;
  1165. int32_t insertRemove, length, i, j, k, m;
  1166. char16_t uchar;
  1167. UBool evenRun;
  1168. runs=pBiDi->runs;
  1169. visualStart=0;
  1170. /* move forward indexes by number of preceding controls */
  1171. k=0;
  1172. for(i=0; i<runCount; i++, visualStart+=length) {
  1173. length=runs[i].visualLimit-visualStart;
  1174. insertRemove=runs[i].insertRemove;
  1175. /* if no control found yet, nothing to do in this run */
  1176. if((insertRemove==0)&&(k==visualStart)) {
  1177. k+=length;
  1178. continue;
  1179. }
  1180. /* if no control in this run */
  1181. if(insertRemove==0) {
  1182. visualLimit=runs[i].visualLimit;
  1183. for(j=visualStart; j<visualLimit; j++) {
  1184. indexMap[k++]=indexMap[j];
  1185. }
  1186. continue;
  1187. }
  1188. logicalStart=runs[i].logicalStart;
  1189. evenRun=IS_EVEN_RUN(logicalStart);
  1190. REMOVE_ODD_BIT(logicalStart);
  1191. logicalEnd=logicalStart+length-1;
  1192. for(j=0; j<length; j++) {
  1193. m= evenRun ? logicalStart+j : logicalEnd-j;
  1194. uchar=pBiDi->text[m];
  1195. if(!IS_BIDI_CONTROL_CHAR(uchar)) {
  1196. indexMap[k++]=m;
  1197. }
  1198. }
  1199. }
  1200. }
  1201. }
  1202. }
  1203. U_CAPI void U_EXPORT2
  1204. ubidi_invertMap(const int32_t *srcMap, int32_t *destMap, int32_t length) {
  1205. if(srcMap!=nullptr && destMap!=nullptr && length>0) {
  1206. const int32_t *pi;
  1207. int32_t destLength=-1, count=0;
  1208. /* find highest value and count positive indexes in srcMap */
  1209. pi=srcMap+length;
  1210. while(pi>srcMap) {
  1211. if(*--pi>destLength) {
  1212. destLength=*pi;
  1213. }
  1214. if(*pi>=0) {
  1215. count++;
  1216. }
  1217. }
  1218. destLength++; /* add 1 for origin 0 */
  1219. if(count<destLength) {
  1220. /* we must fill unmatched destMap entries with -1 */
  1221. uprv_memset(destMap, 0xFF, destLength*sizeof(int32_t));
  1222. }
  1223. pi=srcMap+length;
  1224. while(length>0) {
  1225. if(*--pi>=0) {
  1226. destMap[*pi]=--length;
  1227. } else {
  1228. --length;
  1229. }
  1230. }
  1231. }
  1232. }