ucnvlat1.cpp 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756
  1. // © 2016 and later: Unicode, Inc. and others.
  2. // License & terms of use: http://www.unicode.org/copyright.html
  3. /*
  4. **********************************************************************
  5. * Copyright (C) 2000-2015, International Business Machines
  6. * Corporation and others. All Rights Reserved.
  7. **********************************************************************
  8. * file name: ucnvlat1.cpp
  9. * encoding: UTF-8
  10. * tab size: 8 (not used)
  11. * indentation:4
  12. *
  13. * created on: 2000feb07
  14. * created by: Markus W. Scherer
  15. */
  16. #include "unicode/utypes.h"
  17. #if !UCONFIG_NO_CONVERSION
  18. #include "unicode/ucnv.h"
  19. #include "unicode/uset.h"
  20. #include "unicode/utf8.h"
  21. #include "ucnv_bld.h"
  22. #include "ucnv_cnv.h"
  23. #include "ustr_imp.h"
  24. /* control optimizations according to the platform */
  25. #define LATIN1_UNROLL_FROM_UNICODE 1
  26. /* ISO 8859-1 --------------------------------------------------------------- */
  27. /* This is a table-less and callback-less version of ucnv_MBCSSingleToBMPWithOffsets(). */
  28. U_CDECL_BEGIN
  29. static void U_CALLCONV
  30. _Latin1ToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
  31. UErrorCode *pErrorCode) {
  32. const uint8_t *source;
  33. char16_t *target;
  34. int32_t targetCapacity, length;
  35. int32_t *offsets;
  36. int32_t sourceIndex;
  37. /* set up the local pointers */
  38. source=(const uint8_t *)pArgs->source;
  39. target=pArgs->target;
  40. targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
  41. offsets=pArgs->offsets;
  42. sourceIndex=0;
  43. /*
  44. * since the conversion here is 1:1 char16_t:uint8_t, we need only one counter
  45. * for the minimum of the sourceLength and targetCapacity
  46. */
  47. length=(int32_t)((const uint8_t *)pArgs->sourceLimit-source);
  48. if(length<=targetCapacity) {
  49. targetCapacity=length;
  50. } else {
  51. /* target will be full */
  52. *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
  53. length=targetCapacity;
  54. }
  55. if(targetCapacity>=8) {
  56. /* This loop is unrolled for speed and improved pipelining. */
  57. int32_t count, loops;
  58. loops=count=targetCapacity>>3;
  59. length=targetCapacity&=0x7;
  60. do {
  61. target[0]=source[0];
  62. target[1]=source[1];
  63. target[2]=source[2];
  64. target[3]=source[3];
  65. target[4]=source[4];
  66. target[5]=source[5];
  67. target[6]=source[6];
  68. target[7]=source[7];
  69. target+=8;
  70. source+=8;
  71. } while(--count>0);
  72. if(offsets!=nullptr) {
  73. do {
  74. offsets[0]=sourceIndex++;
  75. offsets[1]=sourceIndex++;
  76. offsets[2]=sourceIndex++;
  77. offsets[3]=sourceIndex++;
  78. offsets[4]=sourceIndex++;
  79. offsets[5]=sourceIndex++;
  80. offsets[6]=sourceIndex++;
  81. offsets[7]=sourceIndex++;
  82. offsets+=8;
  83. } while(--loops>0);
  84. }
  85. }
  86. /* conversion loop */
  87. while(targetCapacity>0) {
  88. *target++=*source++;
  89. --targetCapacity;
  90. }
  91. /* write back the updated pointers */
  92. pArgs->source=(const char *)source;
  93. pArgs->target=target;
  94. /* set offsets */
  95. if(offsets!=nullptr) {
  96. while(length>0) {
  97. *offsets++=sourceIndex++;
  98. --length;
  99. }
  100. pArgs->offsets=offsets;
  101. }
  102. }
  103. /* This is a table-less and callback-less version of ucnv_MBCSSingleGetNextUChar(). */
  104. static UChar32 U_CALLCONV
  105. _Latin1GetNextUChar(UConverterToUnicodeArgs *pArgs,
  106. UErrorCode *pErrorCode) {
  107. const uint8_t *source=(const uint8_t *)pArgs->source;
  108. if(source<(const uint8_t *)pArgs->sourceLimit) {
  109. pArgs->source=(const char *)(source+1);
  110. return *source;
  111. }
  112. /* no output because of empty input */
  113. *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
  114. return 0xffff;
  115. }
  116. /* This is a table-less version of ucnv_MBCSSingleFromBMPWithOffsets(). */
  117. static void U_CALLCONV
  118. _Latin1FromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
  119. UErrorCode *pErrorCode) {
  120. UConverter *cnv;
  121. const char16_t *source, *sourceLimit;
  122. uint8_t *target, *oldTarget;
  123. int32_t targetCapacity, length;
  124. int32_t *offsets;
  125. UChar32 cp;
  126. char16_t c, max;
  127. int32_t sourceIndex;
  128. /* set up the local pointers */
  129. cnv=pArgs->converter;
  130. source=pArgs->source;
  131. sourceLimit=pArgs->sourceLimit;
  132. target=oldTarget=(uint8_t *)pArgs->target;
  133. targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
  134. offsets=pArgs->offsets;
  135. if(cnv->sharedData==&_Latin1Data) {
  136. max=0xff; /* Latin-1 */
  137. } else {
  138. max=0x7f; /* US-ASCII */
  139. }
  140. /* get the converter state from UConverter */
  141. cp=cnv->fromUChar32;
  142. /* sourceIndex=-1 if the current character began in the previous buffer */
  143. sourceIndex= cp==0 ? 0 : -1;
  144. /*
  145. * since the conversion here is 1:1 char16_t:uint8_t, we need only one counter
  146. * for the minimum of the sourceLength and targetCapacity
  147. */
  148. length=(int32_t)(sourceLimit-source);
  149. if(length<targetCapacity) {
  150. targetCapacity=length;
  151. }
  152. /* conversion loop */
  153. if(cp!=0 && targetCapacity>0) {
  154. goto getTrail;
  155. }
  156. #if LATIN1_UNROLL_FROM_UNICODE
  157. /* unroll the loop with the most common case */
  158. if(targetCapacity>=16) {
  159. int32_t count, loops;
  160. char16_t u, oredChars;
  161. loops=count=targetCapacity>>4;
  162. do {
  163. oredChars=u=*source++;
  164. *target++=(uint8_t)u;
  165. oredChars|=u=*source++;
  166. *target++=(uint8_t)u;
  167. oredChars|=u=*source++;
  168. *target++=(uint8_t)u;
  169. oredChars|=u=*source++;
  170. *target++=(uint8_t)u;
  171. oredChars|=u=*source++;
  172. *target++=(uint8_t)u;
  173. oredChars|=u=*source++;
  174. *target++=(uint8_t)u;
  175. oredChars|=u=*source++;
  176. *target++=(uint8_t)u;
  177. oredChars|=u=*source++;
  178. *target++=(uint8_t)u;
  179. oredChars|=u=*source++;
  180. *target++=(uint8_t)u;
  181. oredChars|=u=*source++;
  182. *target++=(uint8_t)u;
  183. oredChars|=u=*source++;
  184. *target++=(uint8_t)u;
  185. oredChars|=u=*source++;
  186. *target++=(uint8_t)u;
  187. oredChars|=u=*source++;
  188. *target++=(uint8_t)u;
  189. oredChars|=u=*source++;
  190. *target++=(uint8_t)u;
  191. oredChars|=u=*source++;
  192. *target++=(uint8_t)u;
  193. oredChars|=u=*source++;
  194. *target++=(uint8_t)u;
  195. /* were all 16 entries really valid? */
  196. if(oredChars>max) {
  197. /* no, return to the first of these 16 */
  198. source-=16;
  199. target-=16;
  200. break;
  201. }
  202. } while(--count>0);
  203. count=loops-count;
  204. targetCapacity-=16*count;
  205. if(offsets!=nullptr) {
  206. oldTarget+=16*count;
  207. while(count>0) {
  208. *offsets++=sourceIndex++;
  209. *offsets++=sourceIndex++;
  210. *offsets++=sourceIndex++;
  211. *offsets++=sourceIndex++;
  212. *offsets++=sourceIndex++;
  213. *offsets++=sourceIndex++;
  214. *offsets++=sourceIndex++;
  215. *offsets++=sourceIndex++;
  216. *offsets++=sourceIndex++;
  217. *offsets++=sourceIndex++;
  218. *offsets++=sourceIndex++;
  219. *offsets++=sourceIndex++;
  220. *offsets++=sourceIndex++;
  221. *offsets++=sourceIndex++;
  222. *offsets++=sourceIndex++;
  223. *offsets++=sourceIndex++;
  224. --count;
  225. }
  226. }
  227. }
  228. #endif
  229. /* conversion loop */
  230. c=0;
  231. while(targetCapacity>0 && (c=*source++)<=max) {
  232. /* convert the Unicode code point */
  233. *target++=(uint8_t)c;
  234. --targetCapacity;
  235. }
  236. if(c>max) {
  237. cp=c;
  238. if(!U_IS_SURROGATE(cp)) {
  239. /* callback(unassigned) */
  240. } else if(U_IS_SURROGATE_LEAD(cp)) {
  241. getTrail:
  242. if(source<sourceLimit) {
  243. /* test the following code unit */
  244. char16_t trail=*source;
  245. if(U16_IS_TRAIL(trail)) {
  246. ++source;
  247. cp=U16_GET_SUPPLEMENTARY(cp, trail);
  248. /* this codepage does not map supplementary code points */
  249. /* callback(unassigned) */
  250. } else {
  251. /* this is an unmatched lead code unit (1st surrogate) */
  252. /* callback(illegal) */
  253. }
  254. } else {
  255. /* no more input */
  256. cnv->fromUChar32=cp;
  257. goto noMoreInput;
  258. }
  259. } else {
  260. /* this is an unmatched trail code unit (2nd surrogate) */
  261. /* callback(illegal) */
  262. }
  263. *pErrorCode= U_IS_SURROGATE(cp) ? U_ILLEGAL_CHAR_FOUND : U_INVALID_CHAR_FOUND;
  264. cnv->fromUChar32=cp;
  265. }
  266. noMoreInput:
  267. /* set offsets since the start */
  268. if(offsets!=nullptr) {
  269. size_t count=target-oldTarget;
  270. while(count>0) {
  271. *offsets++=sourceIndex++;
  272. --count;
  273. }
  274. }
  275. if(U_SUCCESS(*pErrorCode) && source<sourceLimit && target>=(uint8_t *)pArgs->targetLimit) {
  276. /* target is full */
  277. *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
  278. }
  279. /* write back the updated pointers */
  280. pArgs->source=source;
  281. pArgs->target=(char *)target;
  282. pArgs->offsets=offsets;
  283. }
  284. /* Convert UTF-8 to Latin-1. Adapted from ucnv_SBCSFromUTF8(). */
  285. static void U_CALLCONV
  286. ucnv_Latin1FromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
  287. UConverterToUnicodeArgs *pToUArgs,
  288. UErrorCode *pErrorCode) {
  289. UConverter *utf8;
  290. const uint8_t *source, *sourceLimit;
  291. uint8_t *target;
  292. int32_t targetCapacity;
  293. UChar32 c;
  294. uint8_t b, t1;
  295. /* set up the local pointers */
  296. utf8=pToUArgs->converter;
  297. source=(uint8_t *)pToUArgs->source;
  298. sourceLimit=(uint8_t *)pToUArgs->sourceLimit;
  299. target=(uint8_t *)pFromUArgs->target;
  300. targetCapacity=(int32_t)(pFromUArgs->targetLimit-pFromUArgs->target);
  301. /* get the converter state from the UTF-8 UConverter */
  302. if (utf8->toULength > 0) {
  303. c=(UChar32)utf8->toUnicodeStatus;
  304. } else {
  305. c = 0;
  306. }
  307. if(c!=0 && source<sourceLimit) {
  308. if(targetCapacity==0) {
  309. *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
  310. return;
  311. } else if(c>=0xc2 && c<=0xc3 && (t1=(uint8_t)(*source-0x80)) <= 0x3f) {
  312. ++source;
  313. *target++=(uint8_t)(((c&3)<<6)|t1);
  314. --targetCapacity;
  315. utf8->toUnicodeStatus=0;
  316. utf8->toULength=0;
  317. } else {
  318. /* complicated, illegal or unmappable input: fall back to the pivoting implementation */
  319. *pErrorCode=U_USING_DEFAULT_WARNING;
  320. return;
  321. }
  322. }
  323. /*
  324. * Make sure that the last byte sequence before sourceLimit is complete
  325. * or runs into a lead byte.
  326. * In the conversion loop compare source with sourceLimit only once
  327. * per multi-byte character.
  328. * For Latin-1, adjust sourceLimit only for 1 trail byte because
  329. * the conversion loop handles at most 2-byte sequences.
  330. */
  331. if(source<sourceLimit && U8_IS_LEAD(*(sourceLimit-1))) {
  332. --sourceLimit;
  333. }
  334. /* conversion loop */
  335. while(source<sourceLimit) {
  336. if(targetCapacity>0) {
  337. b=*source++;
  338. if(U8_IS_SINGLE(b)) {
  339. /* convert ASCII */
  340. *target++ = b;
  341. --targetCapacity;
  342. } else if( /* handle U+0080..U+00FF inline */
  343. b>=0xc2 && b<=0xc3 &&
  344. (t1=(uint8_t)(*source-0x80)) <= 0x3f
  345. ) {
  346. ++source;
  347. *target++=(uint8_t)(((b&3)<<6)|t1);
  348. --targetCapacity;
  349. } else {
  350. /* complicated, illegal or unmappable input: fall back to the pivoting implementation */
  351. pToUArgs->source=(char *)(source-1);
  352. pFromUArgs->target=(char *)target;
  353. *pErrorCode=U_USING_DEFAULT_WARNING;
  354. return;
  355. }
  356. } else {
  357. /* target is full */
  358. *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
  359. break;
  360. }
  361. }
  362. /*
  363. * The sourceLimit may have been adjusted before the conversion loop
  364. * to stop before a truncated sequence.
  365. * If so, then collect the truncated sequence now.
  366. * For Latin-1, there is at most exactly one lead byte because of the
  367. * smaller sourceLimit adjustment logic.
  368. */
  369. if(U_SUCCESS(*pErrorCode) && source<(sourceLimit=(uint8_t *)pToUArgs->sourceLimit)) {
  370. utf8->toUnicodeStatus=utf8->toUBytes[0]=b=*source++;
  371. utf8->toULength=1;
  372. utf8->mode=U8_COUNT_BYTES(b);
  373. }
  374. /* write back the updated pointers */
  375. pToUArgs->source=(char *)source;
  376. pFromUArgs->target=(char *)target;
  377. }
  378. static void U_CALLCONV
  379. _Latin1GetUnicodeSet(const UConverter *cnv,
  380. const USetAdder *sa,
  381. UConverterUnicodeSet which,
  382. UErrorCode *pErrorCode) {
  383. (void)cnv;
  384. (void)which;
  385. (void)pErrorCode;
  386. sa->addRange(sa->set, 0, 0xff);
  387. }
  388. U_CDECL_END
  389. static const UConverterImpl _Latin1Impl={
  390. UCNV_LATIN_1,
  391. nullptr,
  392. nullptr,
  393. nullptr,
  394. nullptr,
  395. nullptr,
  396. _Latin1ToUnicodeWithOffsets,
  397. _Latin1ToUnicodeWithOffsets,
  398. _Latin1FromUnicodeWithOffsets,
  399. _Latin1FromUnicodeWithOffsets,
  400. _Latin1GetNextUChar,
  401. nullptr,
  402. nullptr,
  403. nullptr,
  404. nullptr,
  405. _Latin1GetUnicodeSet,
  406. nullptr,
  407. ucnv_Latin1FromUTF8
  408. };
  409. static const UConverterStaticData _Latin1StaticData={
  410. sizeof(UConverterStaticData),
  411. "ISO-8859-1",
  412. 819, UCNV_IBM, UCNV_LATIN_1, 1, 1,
  413. { 0x1a, 0, 0, 0 }, 1, false, false,
  414. 0,
  415. 0,
  416. { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
  417. };
  418. const UConverterSharedData _Latin1Data=
  419. UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_Latin1StaticData, &_Latin1Impl);
  420. /* US-ASCII ----------------------------------------------------------------- */
  421. U_CDECL_BEGIN
  422. /* This is a table-less version of ucnv_MBCSSingleToBMPWithOffsets(). */
  423. static void U_CALLCONV
  424. _ASCIIToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
  425. UErrorCode *pErrorCode) {
  426. const uint8_t *source, *sourceLimit;
  427. char16_t *target, *oldTarget;
  428. int32_t targetCapacity, length;
  429. int32_t *offsets;
  430. int32_t sourceIndex;
  431. uint8_t c;
  432. /* set up the local pointers */
  433. source=(const uint8_t *)pArgs->source;
  434. sourceLimit=(const uint8_t *)pArgs->sourceLimit;
  435. target=oldTarget=pArgs->target;
  436. targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
  437. offsets=pArgs->offsets;
  438. /* sourceIndex=-1 if the current character began in the previous buffer */
  439. sourceIndex=0;
  440. /*
  441. * since the conversion here is 1:1 char16_t:uint8_t, we need only one counter
  442. * for the minimum of the sourceLength and targetCapacity
  443. */
  444. length=(int32_t)(sourceLimit-source);
  445. if(length<targetCapacity) {
  446. targetCapacity=length;
  447. }
  448. if(targetCapacity>=8) {
  449. /* This loop is unrolled for speed and improved pipelining. */
  450. int32_t count, loops;
  451. char16_t oredChars;
  452. loops=count=targetCapacity>>3;
  453. do {
  454. oredChars=target[0]=source[0];
  455. oredChars|=target[1]=source[1];
  456. oredChars|=target[2]=source[2];
  457. oredChars|=target[3]=source[3];
  458. oredChars|=target[4]=source[4];
  459. oredChars|=target[5]=source[5];
  460. oredChars|=target[6]=source[6];
  461. oredChars|=target[7]=source[7];
  462. /* were all 16 entries really valid? */
  463. if(oredChars>0x7f) {
  464. /* no, return to the first of these 16 */
  465. break;
  466. }
  467. source+=8;
  468. target+=8;
  469. } while(--count>0);
  470. count=loops-count;
  471. targetCapacity-=count*8;
  472. if(offsets!=nullptr) {
  473. oldTarget+=count*8;
  474. while(count>0) {
  475. offsets[0]=sourceIndex++;
  476. offsets[1]=sourceIndex++;
  477. offsets[2]=sourceIndex++;
  478. offsets[3]=sourceIndex++;
  479. offsets[4]=sourceIndex++;
  480. offsets[5]=sourceIndex++;
  481. offsets[6]=sourceIndex++;
  482. offsets[7]=sourceIndex++;
  483. offsets+=8;
  484. --count;
  485. }
  486. }
  487. }
  488. /* conversion loop */
  489. c=0;
  490. while(targetCapacity>0 && (c=*source++)<=0x7f) {
  491. *target++=c;
  492. --targetCapacity;
  493. }
  494. if(c>0x7f) {
  495. /* callback(illegal); copy the current bytes to toUBytes[] */
  496. UConverter *cnv=pArgs->converter;
  497. cnv->toUBytes[0]=c;
  498. cnv->toULength=1;
  499. *pErrorCode=U_ILLEGAL_CHAR_FOUND;
  500. } else if(source<sourceLimit && target>=pArgs->targetLimit) {
  501. /* target is full */
  502. *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
  503. }
  504. /* set offsets since the start */
  505. if(offsets!=nullptr) {
  506. size_t count=target-oldTarget;
  507. while(count>0) {
  508. *offsets++=sourceIndex++;
  509. --count;
  510. }
  511. }
  512. /* write back the updated pointers */
  513. pArgs->source=(const char *)source;
  514. pArgs->target=target;
  515. pArgs->offsets=offsets;
  516. }
  517. /* This is a table-less version of ucnv_MBCSSingleGetNextUChar(). */
  518. static UChar32 U_CALLCONV
  519. _ASCIIGetNextUChar(UConverterToUnicodeArgs *pArgs,
  520. UErrorCode *pErrorCode) {
  521. const uint8_t *source;
  522. uint8_t b;
  523. source=(const uint8_t *)pArgs->source;
  524. if(source<(const uint8_t *)pArgs->sourceLimit) {
  525. b=*source++;
  526. pArgs->source=(const char *)source;
  527. if(b<=0x7f) {
  528. return b;
  529. } else {
  530. UConverter *cnv=pArgs->converter;
  531. cnv->toUBytes[0]=b;
  532. cnv->toULength=1;
  533. *pErrorCode=U_ILLEGAL_CHAR_FOUND;
  534. return 0xffff;
  535. }
  536. }
  537. /* no output because of empty input */
  538. *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
  539. return 0xffff;
  540. }
  541. /* "Convert" UTF-8 to US-ASCII: Validate and copy. */
  542. static void U_CALLCONV
  543. ucnv_ASCIIFromUTF8(UConverterFromUnicodeArgs *pFromUArgs,
  544. UConverterToUnicodeArgs *pToUArgs,
  545. UErrorCode *pErrorCode) {
  546. const uint8_t *source, *sourceLimit;
  547. uint8_t *target;
  548. int32_t targetCapacity, length;
  549. uint8_t c;
  550. if(pToUArgs->converter->toULength > 0) {
  551. /* no handling of partial UTF-8 characters here, fall back to pivoting */
  552. *pErrorCode=U_USING_DEFAULT_WARNING;
  553. return;
  554. }
  555. /* set up the local pointers */
  556. source=(const uint8_t *)pToUArgs->source;
  557. sourceLimit=(const uint8_t *)pToUArgs->sourceLimit;
  558. target=(uint8_t *)pFromUArgs->target;
  559. targetCapacity=(int32_t)(pFromUArgs->targetLimit-pFromUArgs->target);
  560. /*
  561. * since the conversion here is 1:1 uint8_t:uint8_t, we need only one counter
  562. * for the minimum of the sourceLength and targetCapacity
  563. */
  564. length=(int32_t)(sourceLimit-source);
  565. if(length<targetCapacity) {
  566. targetCapacity=length;
  567. }
  568. /* unroll the loop with the most common case */
  569. if(targetCapacity>=16) {
  570. int32_t count, loops;
  571. uint8_t oredChars;
  572. loops=count=targetCapacity>>4;
  573. do {
  574. oredChars=*target++=*source++;
  575. oredChars|=*target++=*source++;
  576. oredChars|=*target++=*source++;
  577. oredChars|=*target++=*source++;
  578. oredChars|=*target++=*source++;
  579. oredChars|=*target++=*source++;
  580. oredChars|=*target++=*source++;
  581. oredChars|=*target++=*source++;
  582. oredChars|=*target++=*source++;
  583. oredChars|=*target++=*source++;
  584. oredChars|=*target++=*source++;
  585. oredChars|=*target++=*source++;
  586. oredChars|=*target++=*source++;
  587. oredChars|=*target++=*source++;
  588. oredChars|=*target++=*source++;
  589. oredChars|=*target++=*source++;
  590. /* were all 16 entries really valid? */
  591. if(oredChars>0x7f) {
  592. /* no, return to the first of these 16 */
  593. source-=16;
  594. target-=16;
  595. break;
  596. }
  597. } while(--count>0);
  598. count=loops-count;
  599. targetCapacity-=16*count;
  600. }
  601. /* conversion loop */
  602. c=0;
  603. while(targetCapacity>0 && (c=*source)<=0x7f) {
  604. ++source;
  605. *target++=c;
  606. --targetCapacity;
  607. }
  608. if(c>0x7f) {
  609. /* non-ASCII character, handle in standard converter */
  610. *pErrorCode=U_USING_DEFAULT_WARNING;
  611. } else if(source<sourceLimit && target>=(const uint8_t *)pFromUArgs->targetLimit) {
  612. /* target is full */
  613. *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
  614. }
  615. /* write back the updated pointers */
  616. pToUArgs->source=(const char *)source;
  617. pFromUArgs->target=(char *)target;
  618. }
  619. static void U_CALLCONV
  620. _ASCIIGetUnicodeSet(const UConverter *cnv,
  621. const USetAdder *sa,
  622. UConverterUnicodeSet which,
  623. UErrorCode *pErrorCode) {
  624. (void)cnv;
  625. (void)which;
  626. (void)pErrorCode;
  627. sa->addRange(sa->set, 0, 0x7f);
  628. }
  629. U_CDECL_END
  630. static const UConverterImpl _ASCIIImpl={
  631. UCNV_US_ASCII,
  632. nullptr,
  633. nullptr,
  634. nullptr,
  635. nullptr,
  636. nullptr,
  637. _ASCIIToUnicodeWithOffsets,
  638. _ASCIIToUnicodeWithOffsets,
  639. _Latin1FromUnicodeWithOffsets,
  640. _Latin1FromUnicodeWithOffsets,
  641. _ASCIIGetNextUChar,
  642. nullptr,
  643. nullptr,
  644. nullptr,
  645. nullptr,
  646. _ASCIIGetUnicodeSet,
  647. nullptr,
  648. ucnv_ASCIIFromUTF8
  649. };
  650. static const UConverterStaticData _ASCIIStaticData={
  651. sizeof(UConverterStaticData),
  652. "US-ASCII",
  653. 367, UCNV_IBM, UCNV_US_ASCII, 1, 1,
  654. { 0x1a, 0, 0, 0 }, 1, false, false,
  655. 0,
  656. 0,
  657. { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
  658. };
  659. const UConverterSharedData _ASCIIData=
  660. UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_ASCIIStaticData, &_ASCIIImpl);
  661. #endif