ustr_wcs.cpp 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535
  1. // © 2016 and later: Unicode, Inc. and others.
  2. // License & terms of use: http://www.unicode.org/copyright.html
  3. /*
  4. *******************************************************************************
  5. *
  6. * Copyright (C) 2001-2012, International Business Machines
  7. * Corporation and others. All Rights Reserved.
  8. *
  9. *******************************************************************************
  10. * file name: ustr_wcs.cpp
  11. * encoding: UTF-8
  12. * tab size: 8 (not used)
  13. * indentation:4
  14. *
  15. * created on: 2004sep07
  16. * created by: Markus W. Scherer
  17. *
  18. * u_strToWCS() and u_strFromWCS() functions
  19. * moved here from ustrtrns.c for better modularization.
  20. */
  21. #include "unicode/utypes.h"
  22. #include "unicode/ustring.h"
  23. #include "cstring.h"
  24. #include "cwchar.h"
  25. #include "cmemory.h"
  26. #include "ustr_imp.h"
  27. #include "ustr_cnv.h"
  28. #if defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32) || !UCONFIG_NO_CONVERSION
  29. #define _STACK_BUFFER_CAPACITY 1000
  30. #define _BUFFER_CAPACITY_MULTIPLIER 2
  31. #if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32)
  32. // TODO: We should use CharString for char buffers and UnicodeString for char16_t buffers.
  33. // Then we could change this to work only with wchar_t buffers.
  34. static inline UBool
  35. u_growAnyBufferFromStatic(void *context,
  36. void **pBuffer, int32_t *pCapacity, int32_t reqCapacity,
  37. int32_t length, int32_t size) {
  38. // Use char* not void* to avoid the compiler's strict-aliasing assumptions
  39. // and related warnings.
  40. char *newBuffer=(char *)uprv_malloc(reqCapacity*size);
  41. if(newBuffer!=nullptr) {
  42. if(length>0) {
  43. uprv_memcpy(newBuffer, *pBuffer, (size_t)length*size);
  44. }
  45. *pCapacity=reqCapacity;
  46. } else {
  47. *pCapacity=0;
  48. }
  49. /* release the old pBuffer if it was not statically allocated */
  50. if(*pBuffer!=(char *)context) {
  51. uprv_free(*pBuffer);
  52. }
  53. *pBuffer=newBuffer;
  54. return newBuffer!=nullptr;
  55. }
  56. /* helper function */
  57. static wchar_t*
  58. _strToWCS(wchar_t *dest,
  59. int32_t destCapacity,
  60. int32_t *pDestLength,
  61. const char16_t *src,
  62. int32_t srcLength,
  63. UErrorCode *pErrorCode){
  64. char stackBuffer [_STACK_BUFFER_CAPACITY];
  65. char* tempBuf = stackBuffer;
  66. int32_t tempBufCapacity = _STACK_BUFFER_CAPACITY;
  67. char* tempBufLimit = stackBuffer + tempBufCapacity;
  68. UConverter* conv = nullptr;
  69. char* saveBuf = tempBuf;
  70. wchar_t* intTarget=nullptr;
  71. int32_t intTargetCapacity=0;
  72. int count=0,retVal=0;
  73. const char16_t *pSrcLimit =nullptr;
  74. const char16_t *pSrc = src;
  75. conv = u_getDefaultConverter(pErrorCode);
  76. if(U_FAILURE(*pErrorCode)){
  77. return nullptr;
  78. }
  79. if(srcLength == -1){
  80. srcLength = u_strlen(pSrc);
  81. }
  82. pSrcLimit = pSrc + srcLength;
  83. for(;;) {
  84. /* reset the error state */
  85. *pErrorCode = U_ZERO_ERROR;
  86. /* convert to chars using default converter */
  87. ucnv_fromUnicode(conv,&tempBuf,tempBufLimit,&pSrc,pSrcLimit,nullptr,(UBool)(pSrc==pSrcLimit),pErrorCode);
  88. count =(tempBuf - saveBuf);
  89. /* This should rarely occur */
  90. if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR){
  91. tempBuf = saveBuf;
  92. /* we don't have enough room on the stack grow the buffer */
  93. int32_t newCapacity = 2 * srcLength;
  94. if(newCapacity <= tempBufCapacity) {
  95. newCapacity = _BUFFER_CAPACITY_MULTIPLIER * tempBufCapacity;
  96. }
  97. if(!u_growAnyBufferFromStatic(stackBuffer,(void**) &tempBuf, &tempBufCapacity,
  98. newCapacity, count, 1)) {
  99. goto cleanup;
  100. }
  101. saveBuf = tempBuf;
  102. tempBufLimit = tempBuf + tempBufCapacity;
  103. tempBuf = tempBuf + count;
  104. } else {
  105. break;
  106. }
  107. }
  108. if(U_FAILURE(*pErrorCode)){
  109. goto cleanup;
  110. }
  111. /* done with conversion null terminate the char buffer */
  112. if(count>=tempBufCapacity){
  113. tempBuf = saveBuf;
  114. /* we don't have enough room on the stack grow the buffer */
  115. if(!u_growAnyBufferFromStatic(stackBuffer,(void**) &tempBuf, &tempBufCapacity,
  116. count+1, count, 1)) {
  117. goto cleanup;
  118. }
  119. saveBuf = tempBuf;
  120. }
  121. saveBuf[count]=0;
  122. /* allocate more space than required
  123. * here we assume that every char requires
  124. * no more than 2 wchar_ts
  125. */
  126. intTargetCapacity = (count * _BUFFER_CAPACITY_MULTIPLIER + 1) /*for null termination */;
  127. intTarget = (wchar_t*)uprv_malloc( intTargetCapacity * sizeof(wchar_t) );
  128. if(intTarget){
  129. int32_t nulLen = 0;
  130. int32_t remaining = intTargetCapacity;
  131. wchar_t* pIntTarget=intTarget;
  132. tempBuf = saveBuf;
  133. /* now convert the mbs to wcs */
  134. for(;;){
  135. /* we can call the system API since we are sure that
  136. * there is atleast 1 null in the input
  137. */
  138. retVal = uprv_mbstowcs(pIntTarget,(tempBuf+nulLen),remaining);
  139. if(retVal==-1){
  140. *pErrorCode = U_INVALID_CHAR_FOUND;
  141. break;
  142. }else if(retVal== remaining){/* should never occur */
  143. int numWritten = (pIntTarget-intTarget);
  144. u_growAnyBufferFromStatic(nullptr,(void**) &intTarget,
  145. &intTargetCapacity,
  146. intTargetCapacity * _BUFFER_CAPACITY_MULTIPLIER,
  147. numWritten,
  148. sizeof(wchar_t));
  149. pIntTarget = intTarget;
  150. remaining=intTargetCapacity;
  151. if(nulLen!=count){ /*there are embedded nulls*/
  152. pIntTarget+=numWritten;
  153. remaining-=numWritten;
  154. }
  155. }else{
  156. int32_t nulVal;
  157. /*scan for nulls */
  158. /* we donot check for limit since tempBuf is null terminated */
  159. while(tempBuf[nulLen++] != 0){
  160. }
  161. nulVal = (nulLen < srcLength) ? 1 : 0;
  162. pIntTarget = pIntTarget + retVal+nulVal;
  163. remaining -=(retVal+nulVal);
  164. /* check if we have reached the source limit*/
  165. if(nulLen>=(count)){
  166. break;
  167. }
  168. }
  169. }
  170. count = (int32_t)(pIntTarget-intTarget);
  171. if(0 < count && count <= destCapacity){
  172. uprv_memcpy(dest, intTarget, (size_t)count*sizeof(wchar_t));
  173. }
  174. if(pDestLength){
  175. *pDestLength = count;
  176. }
  177. /* free the allocated memory */
  178. uprv_free(intTarget);
  179. }else{
  180. *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
  181. }
  182. cleanup:
  183. /* are we still using stack buffer */
  184. if(stackBuffer != saveBuf){
  185. uprv_free(saveBuf);
  186. }
  187. u_terminateWChars(dest,destCapacity,count,pErrorCode);
  188. u_releaseDefaultConverter(conv);
  189. return dest;
  190. }
  191. #endif
  192. U_CAPI wchar_t* U_EXPORT2
  193. u_strToWCS(wchar_t *dest,
  194. int32_t destCapacity,
  195. int32_t *pDestLength,
  196. const char16_t *src,
  197. int32_t srcLength,
  198. UErrorCode *pErrorCode){
  199. /* args check */
  200. if(pErrorCode==nullptr || U_FAILURE(*pErrorCode)){
  201. return nullptr;
  202. }
  203. if( (src==nullptr && srcLength!=0) || srcLength < -1 ||
  204. (destCapacity<0) || (dest == nullptr && destCapacity > 0)
  205. ) {
  206. *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
  207. return nullptr;
  208. }
  209. #ifdef U_WCHAR_IS_UTF16
  210. /* wchar_t is UTF-16 just do a memcpy */
  211. if(srcLength == -1){
  212. srcLength = u_strlen(src);
  213. }
  214. if(0 < srcLength && srcLength <= destCapacity){
  215. u_memcpy((char16_t *)dest, src, srcLength);
  216. }
  217. if(pDestLength){
  218. *pDestLength = srcLength;
  219. }
  220. u_terminateUChars((char16_t *)dest,destCapacity,srcLength,pErrorCode);
  221. return dest;
  222. #elif defined U_WCHAR_IS_UTF32
  223. return (wchar_t*)u_strToUTF32((UChar32*)dest, destCapacity, pDestLength,
  224. src, srcLength, pErrorCode);
  225. #else
  226. return _strToWCS(dest,destCapacity,pDestLength,src,srcLength, pErrorCode);
  227. #endif
  228. }
  229. #if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32)
  230. /* helper function */
  231. static char16_t*
  232. _strFromWCS( char16_t *dest,
  233. int32_t destCapacity,
  234. int32_t *pDestLength,
  235. const wchar_t *src,
  236. int32_t srcLength,
  237. UErrorCode *pErrorCode)
  238. {
  239. int32_t retVal =0, count =0 ;
  240. UConverter* conv = nullptr;
  241. char16_t* pTarget = nullptr;
  242. char16_t* pTargetLimit = nullptr;
  243. char16_t* target = nullptr;
  244. char16_t uStack [_STACK_BUFFER_CAPACITY];
  245. wchar_t wStack[_STACK_BUFFER_CAPACITY];
  246. wchar_t* pWStack = wStack;
  247. char cStack[_STACK_BUFFER_CAPACITY];
  248. int32_t cStackCap = _STACK_BUFFER_CAPACITY;
  249. char* pCSrc=cStack;
  250. char* pCSave=pCSrc;
  251. char* pCSrcLimit=nullptr;
  252. const wchar_t* pSrc = src;
  253. const wchar_t* pSrcLimit = nullptr;
  254. if(srcLength ==-1){
  255. /* if the wchar_t source is null terminated we can safely
  256. * assume that there are no embedded nulls, this is a fast
  257. * path for null terminated strings.
  258. */
  259. for(;;){
  260. /* convert wchars to chars */
  261. retVal = uprv_wcstombs(pCSrc,src, cStackCap);
  262. if(retVal == -1){
  263. *pErrorCode = U_ILLEGAL_CHAR_FOUND;
  264. goto cleanup;
  265. }else if(retVal >= (cStackCap-1)){
  266. /* Should rarely occur */
  267. u_growAnyBufferFromStatic(cStack,(void**)&pCSrc,&cStackCap,
  268. cStackCap * _BUFFER_CAPACITY_MULTIPLIER, 0, sizeof(char));
  269. pCSave = pCSrc;
  270. }else{
  271. /* converted every thing */
  272. pCSrc = pCSrc+retVal;
  273. break;
  274. }
  275. }
  276. }else{
  277. /* here the source is not null terminated
  278. * so it may have nulls embedded and we need to
  279. * do some extra processing
  280. */
  281. int32_t remaining =cStackCap;
  282. pSrcLimit = src + srcLength;
  283. for(;;){
  284. int32_t nulLen = 0;
  285. /* find nulls in the string */
  286. while(nulLen<srcLength && pSrc[nulLen++]!=0){
  287. }
  288. if((pSrc+nulLen) < pSrcLimit){
  289. /* check if we have enough room in pCSrc */
  290. if(remaining < (nulLen * MB_CUR_MAX)){
  291. /* should rarely occur */
  292. int32_t len = (pCSrc-pCSave);
  293. pCSrc = pCSave;
  294. /* we do not have enough room so grow the buffer*/
  295. u_growAnyBufferFromStatic(cStack,(void**)&pCSrc,&cStackCap,
  296. _BUFFER_CAPACITY_MULTIPLIER*cStackCap+(nulLen*MB_CUR_MAX),len,sizeof(char));
  297. pCSave = pCSrc;
  298. pCSrc = pCSave+len;
  299. remaining = cStackCap-(pCSrc - pCSave);
  300. }
  301. /* we have found a null so convert the
  302. * chunk from beginning of non-null char to null
  303. */
  304. retVal = uprv_wcstombs(pCSrc,pSrc,remaining);
  305. if(retVal==-1){
  306. /* an error occurred bail out */
  307. *pErrorCode = U_ILLEGAL_CHAR_FOUND;
  308. goto cleanup;
  309. }
  310. pCSrc += retVal+1 /* already null terminated */;
  311. pSrc += nulLen; /* skip past the null */
  312. srcLength-=nulLen; /* decrement the srcLength */
  313. remaining -= (pCSrc-pCSave);
  314. }else{
  315. /* the source is not null terminated and we are
  316. * end of source so we copy the source to a temp buffer
  317. * null terminate it and convert wchar_ts to chars
  318. */
  319. if(nulLen >= _STACK_BUFFER_CAPACITY){
  320. /* Should rarely occur */
  321. /* allocate new buffer buffer */
  322. pWStack =(wchar_t*) uprv_malloc(sizeof(wchar_t) * (nulLen + 1));
  323. if(pWStack==nullptr){
  324. *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
  325. goto cleanup;
  326. }
  327. }
  328. if(nulLen>0){
  329. /* copy the contents to tempStack */
  330. uprv_memcpy(pWStack, pSrc, (size_t)nulLen*sizeof(wchar_t));
  331. }
  332. /* null terminate the tempBuffer */
  333. pWStack[nulLen] =0 ;
  334. if(remaining < (nulLen * MB_CUR_MAX)){
  335. /* Should rarely occur */
  336. int32_t len = (pCSrc-pCSave);
  337. pCSrc = pCSave;
  338. /* we do not have enough room so grow the buffer*/
  339. u_growAnyBufferFromStatic(cStack,(void**)&pCSrc,&cStackCap,
  340. cStackCap+(nulLen*MB_CUR_MAX),len,sizeof(char));
  341. pCSave = pCSrc;
  342. pCSrc = pCSave+len;
  343. remaining = cStackCap-(pCSrc - pCSave);
  344. }
  345. /* convert to chars */
  346. retVal = uprv_wcstombs(pCSrc,pWStack,remaining);
  347. pCSrc += retVal;
  348. pSrc += nulLen;
  349. srcLength-=nulLen; /* decrement the srcLength */
  350. break;
  351. }
  352. }
  353. }
  354. /* OK..now we have converted from wchar_ts to chars now
  355. * convert chars to UChars
  356. */
  357. pCSrcLimit = pCSrc;
  358. pCSrc = pCSave;
  359. pTarget = target= dest;
  360. pTargetLimit = dest + destCapacity;
  361. conv= u_getDefaultConverter(pErrorCode);
  362. if(U_FAILURE(*pErrorCode)|| conv==nullptr){
  363. goto cleanup;
  364. }
  365. for(;;) {
  366. *pErrorCode = U_ZERO_ERROR;
  367. /* convert to stack buffer*/
  368. ucnv_toUnicode(conv,&pTarget,pTargetLimit,(const char**)&pCSrc,pCSrcLimit,nullptr,(UBool)(pCSrc==pCSrcLimit),pErrorCode);
  369. /* increment count to number written to stack */
  370. count+= pTarget - target;
  371. if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR){
  372. target = uStack;
  373. pTarget = uStack;
  374. pTargetLimit = uStack + _STACK_BUFFER_CAPACITY;
  375. } else {
  376. break;
  377. }
  378. }
  379. if(pDestLength){
  380. *pDestLength =count;
  381. }
  382. u_terminateUChars(dest,destCapacity,count,pErrorCode);
  383. cleanup:
  384. if(cStack != pCSave){
  385. uprv_free(pCSave);
  386. }
  387. if(wStack != pWStack){
  388. uprv_free(pWStack);
  389. }
  390. u_releaseDefaultConverter(conv);
  391. return dest;
  392. }
  393. #endif
  394. U_CAPI char16_t* U_EXPORT2
  395. u_strFromWCS(char16_t *dest,
  396. int32_t destCapacity,
  397. int32_t *pDestLength,
  398. const wchar_t *src,
  399. int32_t srcLength,
  400. UErrorCode *pErrorCode)
  401. {
  402. /* args check */
  403. if(pErrorCode==nullptr || U_FAILURE(*pErrorCode)){
  404. return nullptr;
  405. }
  406. if( (src==nullptr && srcLength!=0) || srcLength < -1 ||
  407. (destCapacity<0) || (dest == nullptr && destCapacity > 0)
  408. ) {
  409. *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
  410. return nullptr;
  411. }
  412. #ifdef U_WCHAR_IS_UTF16
  413. /* wchar_t is UTF-16 just do a memcpy */
  414. if(srcLength == -1){
  415. srcLength = u_strlen((const char16_t *)src);
  416. }
  417. if(0 < srcLength && srcLength <= destCapacity){
  418. u_memcpy(dest, (const char16_t *)src, srcLength);
  419. }
  420. if(pDestLength){
  421. *pDestLength = srcLength;
  422. }
  423. u_terminateUChars(dest,destCapacity,srcLength,pErrorCode);
  424. return dest;
  425. #elif defined U_WCHAR_IS_UTF32
  426. return u_strFromUTF32(dest, destCapacity, pDestLength,
  427. (UChar32*)src, srcLength, pErrorCode);
  428. #else
  429. return _strFromWCS(dest,destCapacity,pDestLength,src,srcLength,pErrorCode);
  430. #endif
  431. }
  432. #endif /* #if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32) && !UCONFIG_NO_CONVERSION */