unormcmp.cpp 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640
  1. // © 2016 and later: Unicode, Inc. and others.
  2. // License & terms of use: http://www.unicode.org/copyright.html
  3. /*
  4. *******************************************************************************
  5. *
  6. * Copyright (C) 2001-2014, International Business Machines
  7. * Corporation and others. All Rights Reserved.
  8. *
  9. *******************************************************************************
  10. * file name: unormcmp.cpp
  11. * encoding: UTF-8
  12. * tab size: 8 (not used)
  13. * indentation:4
  14. *
  15. * created on: 2004sep13
  16. * created by: Markus W. Scherer
  17. *
  18. * unorm_compare() function moved here from unorm.cpp for better modularization.
  19. * Depends on both normalization and case folding.
  20. * Allows unorm.cpp to not depend on any character properties code.
  21. */
  22. #include "unicode/utypes.h"
  23. #if !UCONFIG_NO_NORMALIZATION
  24. #include "unicode/unorm.h"
  25. #include "unicode/ustring.h"
  26. #include "cmemory.h"
  27. #include "normalizer2impl.h"
  28. #include "ucase.h"
  29. #include "uprops.h"
  30. #include "ustr_imp.h"
  31. U_NAMESPACE_USE
  32. /* compare canonically equivalent ------------------------------------------- */
  33. /*
  34. * Compare two strings for canonical equivalence.
  35. * Further options include case-insensitive comparison and
  36. * code point order (as opposed to code unit order).
  37. *
  38. * In this function, canonical equivalence is optional as well.
  39. * If canonical equivalence is tested, then both strings must fulfill
  40. * the FCD check.
  41. *
  42. * Semantically, this is equivalent to
  43. * strcmp[CodePointOrder](NFD(foldCase(s1)), NFD(foldCase(s2)))
  44. * where code point order, NFD and foldCase are all optional.
  45. *
  46. * String comparisons almost always yield results before processing both strings
  47. * completely.
  48. * They are generally more efficient working incrementally instead of
  49. * performing the sub-processing (strlen, normalization, case-folding)
  50. * on the entire strings first.
  51. *
  52. * It is also unnecessary to not normalize identical characters.
  53. *
  54. * This function works in principle as follows:
  55. *
  56. * loop {
  57. * get one code unit c1 from s1 (-1 if end of source)
  58. * get one code unit c2 from s2 (-1 if end of source)
  59. *
  60. * if(either string finished) {
  61. * return result;
  62. * }
  63. * if(c1==c2) {
  64. * continue;
  65. * }
  66. *
  67. * // c1!=c2
  68. * try to decompose/case-fold c1/c2, and continue if one does;
  69. *
  70. * // still c1!=c2 and neither decomposes/case-folds, return result
  71. * return c1-c2;
  72. * }
  73. *
  74. * When a character decomposes, then the pointer for that source changes to
  75. * the decomposition, pushing the previous pointer onto a stack.
  76. * When the end of the decomposition is reached, then the code unit reader
  77. * pops the previous source from the stack.
  78. * (Same for case-folding.)
  79. *
  80. * This is complicated further by operating on variable-width UTF-16.
  81. * The top part of the loop works on code units, while lookups for decomposition
  82. * and case-folding need code points.
  83. * Code points are assembled after the equality/end-of-source part.
  84. * The source pointer is only advanced beyond all code units when the code point
  85. * actually decomposes/case-folds.
  86. *
  87. * If we were on a trail surrogate unit when assembling a code point,
  88. * and the code point decomposes/case-folds, then the decomposition/folding
  89. * result must be compared with the part of the other string that corresponds to
  90. * this string's lead surrogate.
  91. * Since we only assemble a code point when hitting a trail unit when the
  92. * preceding lead units were identical, we back up the other string by one unit
  93. * in such a case.
  94. *
  95. * The optional code point order comparison at the end works with
  96. * the same fix-up as the other code point order comparison functions.
  97. * See ustring.c and the comment near the end of this function.
  98. *
  99. * Assumption: A decomposition or case-folding result string never contains
  100. * a single surrogate. This is a safe assumption in the Unicode Standard.
  101. * Therefore, we do not need to check for surrogate pairs across
  102. * decomposition/case-folding boundaries.
  103. *
  104. * Further assumptions (see verifications tstnorm.cpp):
  105. * The API function checks for FCD first, while the core function
  106. * first case-folds and then decomposes. This requires that case-folding does not
  107. * un-FCD any strings.
  108. *
  109. * The API function may also NFD the input and turn off decomposition.
  110. * This requires that case-folding does not un-NFD strings either.
  111. *
  112. * TODO If any of the above two assumptions is violated,
  113. * then this entire code must be re-thought.
  114. * If this happens, then a simple solution is to case-fold both strings up front
  115. * and to turn off UNORM_INPUT_IS_FCD.
  116. * We already do this when not both strings are in FCD because makeFCD
  117. * would be a partial NFD before the case folding, which does not work.
  118. * Note that all of this is only a problem when case-folding _and_
  119. * canonical equivalence come together.
  120. * (Comments in unorm_compare() are more up to date than this TODO.)
  121. */
  122. /* stack element for previous-level source/decomposition pointers */
  123. struct CmpEquivLevel {
  124. const char16_t *start, *s, *limit;
  125. };
  126. typedef struct CmpEquivLevel CmpEquivLevel;
  127. /**
  128. * Internal option for unorm_cmpEquivFold() for decomposing.
  129. * If not set, just do strcasecmp().
  130. */
  131. #define _COMPARE_EQUIV 0x80000
  132. /* internal function */
  133. static int32_t
  134. unorm_cmpEquivFold(const char16_t *s1, int32_t length1,
  135. const char16_t *s2, int32_t length2,
  136. uint32_t options,
  137. UErrorCode *pErrorCode) {
  138. const Normalizer2Impl *nfcImpl;
  139. /* current-level start/limit - s1/s2 as current */
  140. const char16_t *start1, *start2, *limit1, *limit2;
  141. /* decomposition and case folding variables */
  142. const char16_t *p;
  143. int32_t length;
  144. /* stacks of previous-level start/current/limit */
  145. CmpEquivLevel stack1[2], stack2[2];
  146. /* buffers for algorithmic decompositions */
  147. char16_t decomp1[4], decomp2[4];
  148. /* case folding buffers, only use current-level start/limit */
  149. char16_t fold1[UCASE_MAX_STRING_LENGTH+1], fold2[UCASE_MAX_STRING_LENGTH+1];
  150. /* track which is the current level per string */
  151. int32_t level1, level2;
  152. /* current code units, and code points for lookups */
  153. UChar32 c1, c2, cp1, cp2;
  154. /* no argument error checking because this itself is not an API */
  155. /*
  156. * assume that at least one of the options _COMPARE_EQUIV and U_COMPARE_IGNORE_CASE is set
  157. * otherwise this function must behave exactly as uprv_strCompare()
  158. * not checking for that here makes testing this function easier
  159. */
  160. /* normalization/properties data loaded? */
  161. if((options&_COMPARE_EQUIV)!=0) {
  162. nfcImpl=Normalizer2Factory::getNFCImpl(*pErrorCode);
  163. } else {
  164. nfcImpl=nullptr;
  165. }
  166. if(U_FAILURE(*pErrorCode)) {
  167. return 0;
  168. }
  169. /* initialize */
  170. start1=s1;
  171. if(length1==-1) {
  172. limit1=nullptr;
  173. } else {
  174. limit1=s1+length1;
  175. }
  176. start2=s2;
  177. if(length2==-1) {
  178. limit2=nullptr;
  179. } else {
  180. limit2=s2+length2;
  181. }
  182. level1=level2=0;
  183. c1=c2=-1;
  184. /* comparison loop */
  185. for(;;) {
  186. /*
  187. * here a code unit value of -1 means "get another code unit"
  188. * below it will mean "this source is finished"
  189. */
  190. if(c1<0) {
  191. /* get next code unit from string 1, post-increment */
  192. for(;;) {
  193. if(s1==limit1 || ((c1=*s1)==0 && (limit1==nullptr || (options&_STRNCMP_STYLE)))) {
  194. if(level1==0) {
  195. c1=-1;
  196. break;
  197. }
  198. } else {
  199. ++s1;
  200. break;
  201. }
  202. /* reached end of level buffer, pop one level */
  203. do {
  204. --level1;
  205. start1=stack1[level1].start; /*Not uninitialized*/
  206. } while(start1==nullptr);
  207. s1=stack1[level1].s; /*Not uninitialized*/
  208. limit1=stack1[level1].limit; /*Not uninitialized*/
  209. }
  210. }
  211. if(c2<0) {
  212. /* get next code unit from string 2, post-increment */
  213. for(;;) {
  214. if(s2==limit2 || ((c2=*s2)==0 && (limit2==nullptr || (options&_STRNCMP_STYLE)))) {
  215. if(level2==0) {
  216. c2=-1;
  217. break;
  218. }
  219. } else {
  220. ++s2;
  221. break;
  222. }
  223. /* reached end of level buffer, pop one level */
  224. do {
  225. --level2;
  226. start2=stack2[level2].start; /*Not uninitialized*/
  227. } while(start2==nullptr);
  228. s2=stack2[level2].s; /*Not uninitialized*/
  229. limit2=stack2[level2].limit; /*Not uninitialized*/
  230. }
  231. }
  232. /*
  233. * compare c1 and c2
  234. * either variable c1, c2 is -1 only if the corresponding string is finished
  235. */
  236. if(c1==c2) {
  237. if(c1<0) {
  238. return 0; /* c1==c2==-1 indicating end of strings */
  239. }
  240. c1=c2=-1; /* make us fetch new code units */
  241. continue;
  242. } else if(c1<0) {
  243. return -1; /* string 1 ends before string 2 */
  244. } else if(c2<0) {
  245. return 1; /* string 2 ends before string 1 */
  246. }
  247. /* c1!=c2 && c1>=0 && c2>=0 */
  248. /* get complete code points for c1, c2 for lookups if either is a surrogate */
  249. cp1=c1;
  250. if(U_IS_SURROGATE(c1)) {
  251. char16_t c;
  252. if(U_IS_SURROGATE_LEAD(c1)) {
  253. if(s1!=limit1 && U16_IS_TRAIL(c=*s1)) {
  254. /* advance ++s1; only below if cp1 decomposes/case-folds */
  255. cp1=U16_GET_SUPPLEMENTARY(c1, c);
  256. }
  257. } else /* isTrail(c1) */ {
  258. if(start1<=(s1-2) && U16_IS_LEAD(c=*(s1-2))) {
  259. cp1=U16_GET_SUPPLEMENTARY(c, c1);
  260. }
  261. }
  262. }
  263. cp2=c2;
  264. if(U_IS_SURROGATE(c2)) {
  265. char16_t c;
  266. if(U_IS_SURROGATE_LEAD(c2)) {
  267. if(s2!=limit2 && U16_IS_TRAIL(c=*s2)) {
  268. /* advance ++s2; only below if cp2 decomposes/case-folds */
  269. cp2=U16_GET_SUPPLEMENTARY(c2, c);
  270. }
  271. } else /* isTrail(c2) */ {
  272. if(start2<=(s2-2) && U16_IS_LEAD(c=*(s2-2))) {
  273. cp2=U16_GET_SUPPLEMENTARY(c, c2);
  274. }
  275. }
  276. }
  277. /*
  278. * go down one level for each string
  279. * continue with the main loop as soon as there is a real change
  280. */
  281. if( level1==0 && (options&U_COMPARE_IGNORE_CASE) &&
  282. (length = ucase_toFullFolding(cp1, &p, options)) >= 0
  283. ) {
  284. /* cp1 case-folds to the code point "length" or to p[length] */
  285. if(U_IS_SURROGATE(c1)) {
  286. if(U_IS_SURROGATE_LEAD(c1)) {
  287. /* advance beyond source surrogate pair if it case-folds */
  288. ++s1;
  289. } else /* isTrail(c1) */ {
  290. /*
  291. * we got a supplementary code point when hitting its trail surrogate,
  292. * therefore the lead surrogate must have been the same as in the other string;
  293. * compare this decomposition with the lead surrogate in the other string
  294. * remember that this simulates bulk text replacement:
  295. * the decomposition would replace the entire code point
  296. */
  297. --s2;
  298. c2=*(s2-1);
  299. }
  300. }
  301. /* push current level pointers */
  302. stack1[0].start=start1;
  303. stack1[0].s=s1;
  304. stack1[0].limit=limit1;
  305. ++level1;
  306. /* copy the folding result to fold1[] */
  307. if(length<=UCASE_MAX_STRING_LENGTH) {
  308. u_memcpy(fold1, p, length);
  309. } else {
  310. int32_t i=0;
  311. U16_APPEND_UNSAFE(fold1, i, length);
  312. length=i;
  313. }
  314. /* set next level pointers to case folding */
  315. start1=s1=fold1;
  316. limit1=fold1+length;
  317. /* get ready to read from decomposition, continue with loop */
  318. c1=-1;
  319. continue;
  320. }
  321. if( level2==0 && (options&U_COMPARE_IGNORE_CASE) &&
  322. (length = ucase_toFullFolding(cp2, &p, options)) >= 0
  323. ) {
  324. /* cp2 case-folds to the code point "length" or to p[length] */
  325. if(U_IS_SURROGATE(c2)) {
  326. if(U_IS_SURROGATE_LEAD(c2)) {
  327. /* advance beyond source surrogate pair if it case-folds */
  328. ++s2;
  329. } else /* isTrail(c2) */ {
  330. /*
  331. * we got a supplementary code point when hitting its trail surrogate,
  332. * therefore the lead surrogate must have been the same as in the other string;
  333. * compare this decomposition with the lead surrogate in the other string
  334. * remember that this simulates bulk text replacement:
  335. * the decomposition would replace the entire code point
  336. */
  337. --s1;
  338. c1=*(s1-1);
  339. }
  340. }
  341. /* push current level pointers */
  342. stack2[0].start=start2;
  343. stack2[0].s=s2;
  344. stack2[0].limit=limit2;
  345. ++level2;
  346. /* copy the folding result to fold2[] */
  347. if(length<=UCASE_MAX_STRING_LENGTH) {
  348. u_memcpy(fold2, p, length);
  349. } else {
  350. int32_t i=0;
  351. U16_APPEND_UNSAFE(fold2, i, length);
  352. length=i;
  353. }
  354. /* set next level pointers to case folding */
  355. start2=s2=fold2;
  356. limit2=fold2+length;
  357. /* get ready to read from decomposition, continue with loop */
  358. c2=-1;
  359. continue;
  360. }
  361. if( level1<2 && (options&_COMPARE_EQUIV) &&
  362. nullptr != (p = nfcImpl->getDecomposition(cp1, decomp1, length))
  363. ) {
  364. /* cp1 decomposes into p[length] */
  365. if(U_IS_SURROGATE(c1)) {
  366. if(U_IS_SURROGATE_LEAD(c1)) {
  367. /* advance beyond source surrogate pair if it decomposes */
  368. ++s1;
  369. } else /* isTrail(c1) */ {
  370. /*
  371. * we got a supplementary code point when hitting its trail surrogate,
  372. * therefore the lead surrogate must have been the same as in the other string;
  373. * compare this decomposition with the lead surrogate in the other string
  374. * remember that this simulates bulk text replacement:
  375. * the decomposition would replace the entire code point
  376. */
  377. --s2;
  378. c2=*(s2-1);
  379. }
  380. }
  381. /* push current level pointers */
  382. stack1[level1].start=start1;
  383. stack1[level1].s=s1;
  384. stack1[level1].limit=limit1;
  385. ++level1;
  386. /* set empty intermediate level if skipped */
  387. if(level1<2) {
  388. stack1[level1++].start=nullptr;
  389. }
  390. /* set next level pointers to decomposition */
  391. start1=s1=p;
  392. limit1=p+length;
  393. /* get ready to read from decomposition, continue with loop */
  394. c1=-1;
  395. continue;
  396. }
  397. if( level2<2 && (options&_COMPARE_EQUIV) &&
  398. nullptr != (p = nfcImpl->getDecomposition(cp2, decomp2, length))
  399. ) {
  400. /* cp2 decomposes into p[length] */
  401. if(U_IS_SURROGATE(c2)) {
  402. if(U_IS_SURROGATE_LEAD(c2)) {
  403. /* advance beyond source surrogate pair if it decomposes */
  404. ++s2;
  405. } else /* isTrail(c2) */ {
  406. /*
  407. * we got a supplementary code point when hitting its trail surrogate,
  408. * therefore the lead surrogate must have been the same as in the other string;
  409. * compare this decomposition with the lead surrogate in the other string
  410. * remember that this simulates bulk text replacement:
  411. * the decomposition would replace the entire code point
  412. */
  413. --s1;
  414. c1=*(s1-1);
  415. }
  416. }
  417. /* push current level pointers */
  418. stack2[level2].start=start2;
  419. stack2[level2].s=s2;
  420. stack2[level2].limit=limit2;
  421. ++level2;
  422. /* set empty intermediate level if skipped */
  423. if(level2<2) {
  424. stack2[level2++].start=nullptr;
  425. }
  426. /* set next level pointers to decomposition */
  427. start2=s2=p;
  428. limit2=p+length;
  429. /* get ready to read from decomposition, continue with loop */
  430. c2=-1;
  431. continue;
  432. }
  433. /*
  434. * no decomposition/case folding, max level for both sides:
  435. * return difference result
  436. *
  437. * code point order comparison must not just return cp1-cp2
  438. * because when single surrogates are present then the surrogate pairs
  439. * that formed cp1 and cp2 may be from different string indexes
  440. *
  441. * example: { d800 d800 dc01 } vs. { d800 dc00 }, compare at second code units
  442. * c1=d800 cp1=10001 c2=dc00 cp2=10000
  443. * cp1-cp2>0 but c1-c2<0 and in fact in UTF-32 it is { d800 10001 } < { 10000 }
  444. *
  445. * therefore, use same fix-up as in ustring.c/uprv_strCompare()
  446. * except: uprv_strCompare() fetches c=*s while this functions fetches c=*s++
  447. * so we have slightly different pointer/start/limit comparisons here
  448. */
  449. if(c1>=0xd800 && c2>=0xd800 && (options&U_COMPARE_CODE_POINT_ORDER)) {
  450. /* subtract 0x2800 from BMP code points to make them smaller than supplementary ones */
  451. if(
  452. (c1<=0xdbff && s1!=limit1 && U16_IS_TRAIL(*s1)) ||
  453. (U16_IS_TRAIL(c1) && start1!=(s1-1) && U16_IS_LEAD(*(s1-2)))
  454. ) {
  455. /* part of a surrogate pair, leave >=d800 */
  456. } else {
  457. /* BMP code point - may be surrogate code point - make <d800 */
  458. c1-=0x2800;
  459. }
  460. if(
  461. (c2<=0xdbff && s2!=limit2 && U16_IS_TRAIL(*s2)) ||
  462. (U16_IS_TRAIL(c2) && start2!=(s2-1) && U16_IS_LEAD(*(s2-2)))
  463. ) {
  464. /* part of a surrogate pair, leave >=d800 */
  465. } else {
  466. /* BMP code point - may be surrogate code point - make <d800 */
  467. c2-=0x2800;
  468. }
  469. }
  470. return c1-c2;
  471. }
  472. }
  473. static
  474. UBool _normalize(const Normalizer2 *n2, const char16_t *s, int32_t length,
  475. UnicodeString &normalized, UErrorCode *pErrorCode) {
  476. UnicodeString str(length<0, s, length);
  477. // check if s fulfill the conditions
  478. int32_t spanQCYes=n2->spanQuickCheckYes(str, *pErrorCode);
  479. if (U_FAILURE(*pErrorCode)) {
  480. return false;
  481. }
  482. /*
  483. * ICU 2.4 had a further optimization:
  484. * If both strings were not in FCD, then they were both NFD'ed,
  485. * and the _COMPARE_EQUIV option was turned off.
  486. * It is not entirely clear that this is valid with the current
  487. * definition of the canonical caseless match.
  488. * Therefore, ICU 2.6 removes that optimization.
  489. */
  490. if(spanQCYes<str.length()) {
  491. UnicodeString unnormalized=str.tempSubString(spanQCYes);
  492. normalized.setTo(false, str.getBuffer(), spanQCYes);
  493. n2->normalizeSecondAndAppend(normalized, unnormalized, *pErrorCode);
  494. if (U_SUCCESS(*pErrorCode)) {
  495. return true;
  496. }
  497. }
  498. return false;
  499. }
  500. U_CAPI int32_t U_EXPORT2
  501. unorm_compare(const char16_t *s1, int32_t length1,
  502. const char16_t *s2, int32_t length2,
  503. uint32_t options,
  504. UErrorCode *pErrorCode) {
  505. /* argument checking */
  506. if(U_FAILURE(*pErrorCode)) {
  507. return 0;
  508. }
  509. if (s1 == nullptr || length1 < -1 || s2 == nullptr || length2 < -1) {
  510. *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
  511. return 0;
  512. }
  513. UnicodeString fcd1, fcd2;
  514. int32_t normOptions=(int32_t)(options>>UNORM_COMPARE_NORM_OPTIONS_SHIFT);
  515. options|=_COMPARE_EQUIV;
  516. /*
  517. * UAX #21 Case Mappings, as fixed for Unicode version 4
  518. * (see Jitterbug 2021), defines a canonical caseless match as
  519. *
  520. * A string X is a canonical caseless match
  521. * for a string Y if and only if
  522. * NFD(toCasefold(NFD(X))) = NFD(toCasefold(NFD(Y)))
  523. *
  524. * For better performance, we check for FCD (or let the caller tell us that
  525. * both strings are in FCD) for the inner normalization.
  526. * BasicNormalizerTest::FindFoldFCDExceptions() makes sure that
  527. * case-folding preserves the FCD-ness of a string.
  528. * The outer normalization is then only performed by unorm_cmpEquivFold()
  529. * when there is a difference.
  530. *
  531. * Exception: When using the Turkic case-folding option, we do perform
  532. * full NFD first. This is because in the Turkic case precomposed characters
  533. * with 0049 capital I or 0069 small i fold differently whether they
  534. * are first decomposed or not, so an FCD check - a check only for
  535. * canonical order - is not sufficient.
  536. */
  537. if(!(options&UNORM_INPUT_IS_FCD) || (options&U_FOLD_CASE_EXCLUDE_SPECIAL_I)) {
  538. const Normalizer2 *n2;
  539. if(options&U_FOLD_CASE_EXCLUDE_SPECIAL_I) {
  540. n2=Normalizer2::getNFDInstance(*pErrorCode);
  541. } else {
  542. n2=Normalizer2Factory::getFCDInstance(*pErrorCode);
  543. }
  544. if (U_FAILURE(*pErrorCode)) {
  545. return 0;
  546. }
  547. if(normOptions&UNORM_UNICODE_3_2) {
  548. const UnicodeSet *uni32=uniset_getUnicode32Instance(*pErrorCode);
  549. FilteredNormalizer2 fn2(*n2, *uni32);
  550. if(_normalize(&fn2, s1, length1, fcd1, pErrorCode)) {
  551. s1=fcd1.getBuffer();
  552. length1=fcd1.length();
  553. }
  554. if(_normalize(&fn2, s2, length2, fcd2, pErrorCode)) {
  555. s2=fcd2.getBuffer();
  556. length2=fcd2.length();
  557. }
  558. } else {
  559. if(_normalize(n2, s1, length1, fcd1, pErrorCode)) {
  560. s1=fcd1.getBuffer();
  561. length1=fcd1.length();
  562. }
  563. if(_normalize(n2, s2, length2, fcd2, pErrorCode)) {
  564. s2=fcd2.getBuffer();
  565. length2=fcd2.length();
  566. }
  567. }
  568. }
  569. if(U_SUCCESS(*pErrorCode)) {
  570. return unorm_cmpEquivFold(s1, length1, s2, length2, options, pErrorCode);
  571. } else {
  572. return 0;
  573. }
  574. }
  575. #endif /* #if !UCONFIG_NO_NORMALIZATION */