123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535 |
- // © 2016 and later: Unicode, Inc. and others.
- // License & terms of use: http://www.unicode.org/copyright.html
- /*
- *******************************************************************************
- *
- * Copyright (C) 2001-2012, International Business Machines
- * Corporation and others. All Rights Reserved.
- *
- *******************************************************************************
- * file name: ustr_wcs.cpp
- * encoding: UTF-8
- * tab size: 8 (not used)
- * indentation:4
- *
- * created on: 2004sep07
- * created by: Markus W. Scherer
- *
- * u_strToWCS() and u_strFromWCS() functions
- * moved here from ustrtrns.c for better modularization.
- */
- #include "unicode/utypes.h"
- #include "unicode/ustring.h"
- #include "cstring.h"
- #include "cwchar.h"
- #include "cmemory.h"
- #include "ustr_imp.h"
- #include "ustr_cnv.h"
- #if defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32) || !UCONFIG_NO_CONVERSION
- #define _STACK_BUFFER_CAPACITY 1000
- #define _BUFFER_CAPACITY_MULTIPLIER 2
- #if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32)
- // TODO: We should use CharString for char buffers and UnicodeString for char16_t buffers.
- // Then we could change this to work only with wchar_t buffers.
- static inline UBool
- u_growAnyBufferFromStatic(void *context,
- void **pBuffer, int32_t *pCapacity, int32_t reqCapacity,
- int32_t length, int32_t size) {
- // Use char* not void* to avoid the compiler's strict-aliasing assumptions
- // and related warnings.
- char *newBuffer=(char *)uprv_malloc(reqCapacity*size);
- if(newBuffer!=nullptr) {
- if(length>0) {
- uprv_memcpy(newBuffer, *pBuffer, (size_t)length*size);
- }
- *pCapacity=reqCapacity;
- } else {
- *pCapacity=0;
- }
- /* release the old pBuffer if it was not statically allocated */
- if(*pBuffer!=(char *)context) {
- uprv_free(*pBuffer);
- }
- *pBuffer=newBuffer;
- return newBuffer!=nullptr;
- }
- /* helper function */
- static wchar_t*
- _strToWCS(wchar_t *dest,
- int32_t destCapacity,
- int32_t *pDestLength,
- const char16_t *src,
- int32_t srcLength,
- UErrorCode *pErrorCode){
- char stackBuffer [_STACK_BUFFER_CAPACITY];
- char* tempBuf = stackBuffer;
- int32_t tempBufCapacity = _STACK_BUFFER_CAPACITY;
- char* tempBufLimit = stackBuffer + tempBufCapacity;
- UConverter* conv = nullptr;
- char* saveBuf = tempBuf;
- wchar_t* intTarget=nullptr;
- int32_t intTargetCapacity=0;
- int count=0,retVal=0;
-
- const char16_t *pSrcLimit =nullptr;
- const char16_t *pSrc = src;
- conv = u_getDefaultConverter(pErrorCode);
-
- if(U_FAILURE(*pErrorCode)){
- return nullptr;
- }
-
- if(srcLength == -1){
- srcLength = u_strlen(pSrc);
- }
-
- pSrcLimit = pSrc + srcLength;
- for(;;) {
- /* reset the error state */
- *pErrorCode = U_ZERO_ERROR;
- /* convert to chars using default converter */
- ucnv_fromUnicode(conv,&tempBuf,tempBufLimit,&pSrc,pSrcLimit,nullptr,(UBool)(pSrc==pSrcLimit),pErrorCode);
- count =(tempBuf - saveBuf);
-
- /* This should rarely occur */
- if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR){
- tempBuf = saveBuf;
-
- /* we don't have enough room on the stack grow the buffer */
- int32_t newCapacity = 2 * srcLength;
- if(newCapacity <= tempBufCapacity) {
- newCapacity = _BUFFER_CAPACITY_MULTIPLIER * tempBufCapacity;
- }
- if(!u_growAnyBufferFromStatic(stackBuffer,(void**) &tempBuf, &tempBufCapacity,
- newCapacity, count, 1)) {
- goto cleanup;
- }
-
- saveBuf = tempBuf;
- tempBufLimit = tempBuf + tempBufCapacity;
- tempBuf = tempBuf + count;
- } else {
- break;
- }
- }
- if(U_FAILURE(*pErrorCode)){
- goto cleanup;
- }
- /* done with conversion null terminate the char buffer */
- if(count>=tempBufCapacity){
- tempBuf = saveBuf;
- /* we don't have enough room on the stack grow the buffer */
- if(!u_growAnyBufferFromStatic(stackBuffer,(void**) &tempBuf, &tempBufCapacity,
- count+1, count, 1)) {
- goto cleanup;
- }
- saveBuf = tempBuf;
- }
-
- saveBuf[count]=0;
-
- /* allocate more space than required
- * here we assume that every char requires
- * no more than 2 wchar_ts
- */
- intTargetCapacity = (count * _BUFFER_CAPACITY_MULTIPLIER + 1) /*for null termination */;
- intTarget = (wchar_t*)uprv_malloc( intTargetCapacity * sizeof(wchar_t) );
- if(intTarget){
- int32_t nulLen = 0;
- int32_t remaining = intTargetCapacity;
- wchar_t* pIntTarget=intTarget;
- tempBuf = saveBuf;
-
- /* now convert the mbs to wcs */
- for(;;){
-
- /* we can call the system API since we are sure that
- * there is atleast 1 null in the input
- */
- retVal = uprv_mbstowcs(pIntTarget,(tempBuf+nulLen),remaining);
-
- if(retVal==-1){
- *pErrorCode = U_INVALID_CHAR_FOUND;
- break;
- }else if(retVal== remaining){/* should never occur */
- int numWritten = (pIntTarget-intTarget);
- u_growAnyBufferFromStatic(nullptr,(void**) &intTarget,
- &intTargetCapacity,
- intTargetCapacity * _BUFFER_CAPACITY_MULTIPLIER,
- numWritten,
- sizeof(wchar_t));
- pIntTarget = intTarget;
- remaining=intTargetCapacity;
- if(nulLen!=count){ /*there are embedded nulls*/
- pIntTarget+=numWritten;
- remaining-=numWritten;
- }
- }else{
- int32_t nulVal;
- /*scan for nulls */
- /* we donot check for limit since tempBuf is null terminated */
- while(tempBuf[nulLen++] != 0){
- }
- nulVal = (nulLen < srcLength) ? 1 : 0;
- pIntTarget = pIntTarget + retVal+nulVal;
- remaining -=(retVal+nulVal);
-
- /* check if we have reached the source limit*/
- if(nulLen>=(count)){
- break;
- }
- }
- }
- count = (int32_t)(pIntTarget-intTarget);
-
- if(0 < count && count <= destCapacity){
- uprv_memcpy(dest, intTarget, (size_t)count*sizeof(wchar_t));
- }
- if(pDestLength){
- *pDestLength = count;
- }
- /* free the allocated memory */
- uprv_free(intTarget);
- }else{
- *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
- }
- cleanup:
- /* are we still using stack buffer */
- if(stackBuffer != saveBuf){
- uprv_free(saveBuf);
- }
- u_terminateWChars(dest,destCapacity,count,pErrorCode);
- u_releaseDefaultConverter(conv);
- return dest;
- }
- #endif
- U_CAPI wchar_t* U_EXPORT2
- u_strToWCS(wchar_t *dest,
- int32_t destCapacity,
- int32_t *pDestLength,
- const char16_t *src,
- int32_t srcLength,
- UErrorCode *pErrorCode){
- /* args check */
- if(pErrorCode==nullptr || U_FAILURE(*pErrorCode)){
- return nullptr;
- }
-
- if( (src==nullptr && srcLength!=0) || srcLength < -1 ||
- (destCapacity<0) || (dest == nullptr && destCapacity > 0)
- ) {
- *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
- return nullptr;
- }
-
- #ifdef U_WCHAR_IS_UTF16
- /* wchar_t is UTF-16 just do a memcpy */
- if(srcLength == -1){
- srcLength = u_strlen(src);
- }
- if(0 < srcLength && srcLength <= destCapacity){
- u_memcpy((char16_t *)dest, src, srcLength);
- }
- if(pDestLength){
- *pDestLength = srcLength;
- }
- u_terminateUChars((char16_t *)dest,destCapacity,srcLength,pErrorCode);
- return dest;
- #elif defined U_WCHAR_IS_UTF32
-
- return (wchar_t*)u_strToUTF32((UChar32*)dest, destCapacity, pDestLength,
- src, srcLength, pErrorCode);
- #else
-
- return _strToWCS(dest,destCapacity,pDestLength,src,srcLength, pErrorCode);
-
- #endif
- }
- #if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32)
- /* helper function */
- static char16_t*
- _strFromWCS( char16_t *dest,
- int32_t destCapacity,
- int32_t *pDestLength,
- const wchar_t *src,
- int32_t srcLength,
- UErrorCode *pErrorCode)
- {
- int32_t retVal =0, count =0 ;
- UConverter* conv = nullptr;
- char16_t* pTarget = nullptr;
- char16_t* pTargetLimit = nullptr;
- char16_t* target = nullptr;
-
- char16_t uStack [_STACK_BUFFER_CAPACITY];
- wchar_t wStack[_STACK_BUFFER_CAPACITY];
- wchar_t* pWStack = wStack;
- char cStack[_STACK_BUFFER_CAPACITY];
- int32_t cStackCap = _STACK_BUFFER_CAPACITY;
- char* pCSrc=cStack;
- char* pCSave=pCSrc;
- char* pCSrcLimit=nullptr;
- const wchar_t* pSrc = src;
- const wchar_t* pSrcLimit = nullptr;
- if(srcLength ==-1){
- /* if the wchar_t source is null terminated we can safely
- * assume that there are no embedded nulls, this is a fast
- * path for null terminated strings.
- */
- for(;;){
- /* convert wchars to chars */
- retVal = uprv_wcstombs(pCSrc,src, cStackCap);
-
- if(retVal == -1){
- *pErrorCode = U_ILLEGAL_CHAR_FOUND;
- goto cleanup;
- }else if(retVal >= (cStackCap-1)){
- /* Should rarely occur */
- u_growAnyBufferFromStatic(cStack,(void**)&pCSrc,&cStackCap,
- cStackCap * _BUFFER_CAPACITY_MULTIPLIER, 0, sizeof(char));
- pCSave = pCSrc;
- }else{
- /* converted every thing */
- pCSrc = pCSrc+retVal;
- break;
- }
- }
-
- }else{
- /* here the source is not null terminated
- * so it may have nulls embedded and we need to
- * do some extra processing
- */
- int32_t remaining =cStackCap;
-
- pSrcLimit = src + srcLength;
- for(;;){
- int32_t nulLen = 0;
- /* find nulls in the string */
- while(nulLen<srcLength && pSrc[nulLen++]!=0){
- }
- if((pSrc+nulLen) < pSrcLimit){
- /* check if we have enough room in pCSrc */
- if(remaining < (nulLen * MB_CUR_MAX)){
- /* should rarely occur */
- int32_t len = (pCSrc-pCSave);
- pCSrc = pCSave;
- /* we do not have enough room so grow the buffer*/
- u_growAnyBufferFromStatic(cStack,(void**)&pCSrc,&cStackCap,
- _BUFFER_CAPACITY_MULTIPLIER*cStackCap+(nulLen*MB_CUR_MAX),len,sizeof(char));
- pCSave = pCSrc;
- pCSrc = pCSave+len;
- remaining = cStackCap-(pCSrc - pCSave);
- }
- /* we have found a null so convert the
- * chunk from beginning of non-null char to null
- */
- retVal = uprv_wcstombs(pCSrc,pSrc,remaining);
- if(retVal==-1){
- /* an error occurred bail out */
- *pErrorCode = U_ILLEGAL_CHAR_FOUND;
- goto cleanup;
- }
- pCSrc += retVal+1 /* already null terminated */;
- pSrc += nulLen; /* skip past the null */
- srcLength-=nulLen; /* decrement the srcLength */
- remaining -= (pCSrc-pCSave);
- }else{
- /* the source is not null terminated and we are
- * end of source so we copy the source to a temp buffer
- * null terminate it and convert wchar_ts to chars
- */
- if(nulLen >= _STACK_BUFFER_CAPACITY){
- /* Should rarely occur */
- /* allocate new buffer buffer */
- pWStack =(wchar_t*) uprv_malloc(sizeof(wchar_t) * (nulLen + 1));
- if(pWStack==nullptr){
- *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
- goto cleanup;
- }
- }
- if(nulLen>0){
- /* copy the contents to tempStack */
- uprv_memcpy(pWStack, pSrc, (size_t)nulLen*sizeof(wchar_t));
- }
-
- /* null terminate the tempBuffer */
- pWStack[nulLen] =0 ;
-
- if(remaining < (nulLen * MB_CUR_MAX)){
- /* Should rarely occur */
- int32_t len = (pCSrc-pCSave);
- pCSrc = pCSave;
- /* we do not have enough room so grow the buffer*/
- u_growAnyBufferFromStatic(cStack,(void**)&pCSrc,&cStackCap,
- cStackCap+(nulLen*MB_CUR_MAX),len,sizeof(char));
- pCSave = pCSrc;
- pCSrc = pCSave+len;
- remaining = cStackCap-(pCSrc - pCSave);
- }
- /* convert to chars */
- retVal = uprv_wcstombs(pCSrc,pWStack,remaining);
-
- pCSrc += retVal;
- pSrc += nulLen;
- srcLength-=nulLen; /* decrement the srcLength */
- break;
- }
- }
- }
- /* OK..now we have converted from wchar_ts to chars now
- * convert chars to UChars
- */
- pCSrcLimit = pCSrc;
- pCSrc = pCSave;
- pTarget = target= dest;
- pTargetLimit = dest + destCapacity;
-
- conv= u_getDefaultConverter(pErrorCode);
-
- if(U_FAILURE(*pErrorCode)|| conv==nullptr){
- goto cleanup;
- }
-
- for(;;) {
-
- *pErrorCode = U_ZERO_ERROR;
-
- /* convert to stack buffer*/
- ucnv_toUnicode(conv,&pTarget,pTargetLimit,(const char**)&pCSrc,pCSrcLimit,nullptr,(UBool)(pCSrc==pCSrcLimit),pErrorCode);
-
- /* increment count to number written to stack */
- count+= pTarget - target;
-
- if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR){
- target = uStack;
- pTarget = uStack;
- pTargetLimit = uStack + _STACK_BUFFER_CAPACITY;
- } else {
- break;
- }
-
- }
-
- if(pDestLength){
- *pDestLength =count;
- }
- u_terminateUChars(dest,destCapacity,count,pErrorCode);
-
- cleanup:
-
- if(cStack != pCSave){
- uprv_free(pCSave);
- }
- if(wStack != pWStack){
- uprv_free(pWStack);
- }
-
- u_releaseDefaultConverter(conv);
- return dest;
- }
- #endif
- U_CAPI char16_t* U_EXPORT2
- u_strFromWCS(char16_t *dest,
- int32_t destCapacity,
- int32_t *pDestLength,
- const wchar_t *src,
- int32_t srcLength,
- UErrorCode *pErrorCode)
- {
- /* args check */
- if(pErrorCode==nullptr || U_FAILURE(*pErrorCode)){
- return nullptr;
- }
- if( (src==nullptr && srcLength!=0) || srcLength < -1 ||
- (destCapacity<0) || (dest == nullptr && destCapacity > 0)
- ) {
- *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
- return nullptr;
- }
- #ifdef U_WCHAR_IS_UTF16
- /* wchar_t is UTF-16 just do a memcpy */
- if(srcLength == -1){
- srcLength = u_strlen((const char16_t *)src);
- }
- if(0 < srcLength && srcLength <= destCapacity){
- u_memcpy(dest, (const char16_t *)src, srcLength);
- }
- if(pDestLength){
- *pDestLength = srcLength;
- }
- u_terminateUChars(dest,destCapacity,srcLength,pErrorCode);
- return dest;
- #elif defined U_WCHAR_IS_UTF32
-
- return u_strFromUTF32(dest, destCapacity, pDestLength,
- (UChar32*)src, srcLength, pErrorCode);
- #else
- return _strFromWCS(dest,destCapacity,pDestLength,src,srcLength,pErrorCode);
- #endif
- }
- #endif /* #if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32) && !UCONFIG_NO_CONVERSION */
|