123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348 |
- // © 2018 and later: Unicode, Inc. and others.
- // License & terms of use: http://www.unicode.org/copyright.html
- // utrie_swap.cpp
- // created: 2018aug08 Markus W. Scherer
- #include "unicode/utypes.h"
- #include "cmemory.h"
- #include "ucptrie_impl.h"
- #include "udataswp.h"
- #include "utrie.h"
- #include "utrie2_impl.h"
- // These functions for swapping different generations of ICU code point tries are here
- // so that their implementation files need not depend on swapper code,
- // need not depend on each other, and so that other swapper code
- // need not depend on other trie code.
- namespace {
- constexpr int32_t ASCII_LIMIT = 0x80;
- } // namespace
- U_CAPI int32_t U_EXPORT2
- utrie_swap(const UDataSwapper *ds,
- const void *inData, int32_t length, void *outData,
- UErrorCode *pErrorCode) {
- const UTrieHeader *inTrie;
- UTrieHeader trie;
- int32_t size;
- UBool dataIs32;
- if(pErrorCode==nullptr || U_FAILURE(*pErrorCode)) {
- return 0;
- }
- if(ds==nullptr || inData==nullptr || (length>=0 && outData==nullptr)) {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
- /* setup and swapping */
- if(length>=0 && (uint32_t)length<sizeof(UTrieHeader)) {
- *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
- return 0;
- }
- inTrie=(const UTrieHeader *)inData;
- trie.signature=ds->readUInt32(inTrie->signature);
- trie.options=ds->readUInt32(inTrie->options);
- trie.indexLength=udata_readInt32(ds, inTrie->indexLength);
- trie.dataLength=udata_readInt32(ds, inTrie->dataLength);
- if( trie.signature!=0x54726965 ||
- (trie.options&UTRIE_OPTIONS_SHIFT_MASK)!=UTRIE_SHIFT ||
- ((trie.options>>UTRIE_OPTIONS_INDEX_SHIFT)&UTRIE_OPTIONS_SHIFT_MASK)!=UTRIE_INDEX_SHIFT ||
- trie.indexLength<UTRIE_BMP_INDEX_LENGTH ||
- (trie.indexLength&(UTRIE_SURROGATE_BLOCK_COUNT-1))!=0 ||
- trie.dataLength<UTRIE_DATA_BLOCK_LENGTH ||
- (trie.dataLength&(UTRIE_DATA_GRANULARITY-1))!=0 ||
- ((trie.options&UTRIE_OPTIONS_LATIN1_IS_LINEAR)!=0 && trie.dataLength<(UTRIE_DATA_BLOCK_LENGTH+0x100))
- ) {
- *pErrorCode=U_INVALID_FORMAT_ERROR; /* not a UTrie */
- return 0;
- }
- dataIs32 = (trie.options & UTRIE_OPTIONS_DATA_IS_32_BIT) != 0;
- size=sizeof(UTrieHeader)+trie.indexLength*2+trie.dataLength*(dataIs32?4:2);
- if(length>=0) {
- UTrieHeader *outTrie;
- if(length<size) {
- *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
- return 0;
- }
- outTrie=(UTrieHeader *)outData;
- /* swap the header */
- ds->swapArray32(ds, inTrie, sizeof(UTrieHeader), outTrie, pErrorCode);
- /* swap the index and the data */
- if(dataIs32) {
- ds->swapArray16(ds, inTrie+1, trie.indexLength*2, outTrie+1, pErrorCode);
- ds->swapArray32(ds, (const uint16_t *)(inTrie+1)+trie.indexLength, trie.dataLength*4,
- (uint16_t *)(outTrie+1)+trie.indexLength, pErrorCode);
- } else {
- ds->swapArray16(ds, inTrie+1, (trie.indexLength+trie.dataLength)*2, outTrie+1, pErrorCode);
- }
- }
- return size;
- }
- U_CAPI int32_t U_EXPORT2
- utrie2_swap(const UDataSwapper *ds,
- const void *inData, int32_t length, void *outData,
- UErrorCode *pErrorCode) {
- const UTrie2Header *inTrie;
- UTrie2Header trie;
- int32_t dataLength, size;
- UTrie2ValueBits valueBits;
- if(U_FAILURE(*pErrorCode)) {
- return 0;
- }
- if(ds==nullptr || inData==nullptr || (length>=0 && outData==nullptr)) {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
- /* setup and swapping */
- if(length>=0 && length<(int32_t)sizeof(UTrie2Header)) {
- *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
- return 0;
- }
- inTrie=(const UTrie2Header *)inData;
- trie.signature=ds->readUInt32(inTrie->signature);
- trie.options=ds->readUInt16(inTrie->options);
- trie.indexLength=ds->readUInt16(inTrie->indexLength);
- trie.shiftedDataLength=ds->readUInt16(inTrie->shiftedDataLength);
- valueBits=(UTrie2ValueBits)(trie.options&UTRIE2_OPTIONS_VALUE_BITS_MASK);
- dataLength=(int32_t)trie.shiftedDataLength<<UTRIE2_INDEX_SHIFT;
- if( trie.signature!=UTRIE2_SIG ||
- valueBits<0 || UTRIE2_COUNT_VALUE_BITS<=valueBits ||
- trie.indexLength<UTRIE2_INDEX_1_OFFSET ||
- dataLength<UTRIE2_DATA_START_OFFSET
- ) {
- *pErrorCode=U_INVALID_FORMAT_ERROR; /* not a UTrie */
- return 0;
- }
- size=sizeof(UTrie2Header)+trie.indexLength*2;
- switch(valueBits) {
- case UTRIE2_16_VALUE_BITS:
- size+=dataLength*2;
- break;
- case UTRIE2_32_VALUE_BITS:
- size+=dataLength*4;
- break;
- default:
- *pErrorCode=U_INVALID_FORMAT_ERROR;
- return 0;
- }
- if(length>=0) {
- UTrie2Header *outTrie;
- if(length<size) {
- *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
- return 0;
- }
- outTrie=(UTrie2Header *)outData;
- /* swap the header */
- ds->swapArray32(ds, &inTrie->signature, 4, &outTrie->signature, pErrorCode);
- ds->swapArray16(ds, &inTrie->options, 12, &outTrie->options, pErrorCode);
- /* swap the index and the data */
- switch(valueBits) {
- case UTRIE2_16_VALUE_BITS:
- ds->swapArray16(ds, inTrie+1, (trie.indexLength+dataLength)*2, outTrie+1, pErrorCode);
- break;
- case UTRIE2_32_VALUE_BITS:
- ds->swapArray16(ds, inTrie+1, trie.indexLength*2, outTrie+1, pErrorCode);
- ds->swapArray32(ds, (const uint16_t *)(inTrie+1)+trie.indexLength, dataLength*4,
- (uint16_t *)(outTrie+1)+trie.indexLength, pErrorCode);
- break;
- default:
- *pErrorCode=U_INVALID_FORMAT_ERROR;
- return 0;
- }
- }
- return size;
- }
- U_CAPI int32_t U_EXPORT2
- ucptrie_swap(const UDataSwapper *ds,
- const void *inData, int32_t length, void *outData,
- UErrorCode *pErrorCode) {
- const UCPTrieHeader *inTrie;
- UCPTrieHeader trie;
- int32_t dataLength, size;
- UCPTrieValueWidth valueWidth;
- if(U_FAILURE(*pErrorCode)) {
- return 0;
- }
- if(ds==nullptr || inData==nullptr || (length>=0 && outData==nullptr)) {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
- /* setup and swapping */
- if(length>=0 && length<(int32_t)sizeof(UCPTrieHeader)) {
- *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
- return 0;
- }
- inTrie=(const UCPTrieHeader *)inData;
- trie.signature=ds->readUInt32(inTrie->signature);
- trie.options=ds->readUInt16(inTrie->options);
- trie.indexLength=ds->readUInt16(inTrie->indexLength);
- trie.dataLength = ds->readUInt16(inTrie->dataLength);
- UCPTrieType type = (UCPTrieType)((trie.options >> 6) & 3);
- valueWidth = (UCPTrieValueWidth)(trie.options & UCPTRIE_OPTIONS_VALUE_BITS_MASK);
- dataLength = ((int32_t)(trie.options & UCPTRIE_OPTIONS_DATA_LENGTH_MASK) << 4) | trie.dataLength;
- int32_t minIndexLength = type == UCPTRIE_TYPE_FAST ?
- UCPTRIE_BMP_INDEX_LENGTH : UCPTRIE_SMALL_INDEX_LENGTH;
- if( trie.signature!=UCPTRIE_SIG ||
- type > UCPTRIE_TYPE_SMALL ||
- (trie.options & UCPTRIE_OPTIONS_RESERVED_MASK) != 0 ||
- valueWidth > UCPTRIE_VALUE_BITS_8 ||
- trie.indexLength < minIndexLength ||
- dataLength < ASCII_LIMIT
- ) {
- *pErrorCode=U_INVALID_FORMAT_ERROR; /* not a UCPTrie */
- return 0;
- }
- size=sizeof(UCPTrieHeader)+trie.indexLength*2;
- switch(valueWidth) {
- case UCPTRIE_VALUE_BITS_16:
- size+=dataLength*2;
- break;
- case UCPTRIE_VALUE_BITS_32:
- size+=dataLength*4;
- break;
- case UCPTRIE_VALUE_BITS_8:
- size+=dataLength;
- break;
- default:
- *pErrorCode=U_INVALID_FORMAT_ERROR;
- return 0;
- }
- if(length>=0) {
- UCPTrieHeader *outTrie;
- if(length<size) {
- *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
- return 0;
- }
- outTrie=(UCPTrieHeader *)outData;
- /* swap the header */
- ds->swapArray32(ds, &inTrie->signature, 4, &outTrie->signature, pErrorCode);
- ds->swapArray16(ds, &inTrie->options, 12, &outTrie->options, pErrorCode);
- /* swap the index */
- const uint16_t *inIndex=reinterpret_cast<const uint16_t *>(inTrie+1);
- uint16_t *outIndex=reinterpret_cast<uint16_t *>(outTrie+1);
- ds->swapArray16(ds, inIndex, trie.indexLength*2, outIndex, pErrorCode);
- /* swap the data */
- const uint16_t *inData=inIndex+trie.indexLength;
- uint16_t *outData=outIndex+trie.indexLength;
- switch(valueWidth) {
- case UCPTRIE_VALUE_BITS_16:
- ds->swapArray16(ds, inData, dataLength*2, outData, pErrorCode);
- break;
- case UCPTRIE_VALUE_BITS_32:
- ds->swapArray32(ds, inData, dataLength*4, outData, pErrorCode);
- break;
- case UCPTRIE_VALUE_BITS_8:
- if(inTrie!=outTrie) {
- uprv_memmove(outData, inData, dataLength);
- }
- break;
- default:
- *pErrorCode=U_INVALID_FORMAT_ERROR;
- return 0;
- }
- }
- return size;
- }
- namespace {
- /**
- * Gets the trie version from 32-bit-aligned memory containing the serialized form
- * of a UTrie (version 1), a UTrie2 (version 2), or a UCPTrie (version 3).
- *
- * @param data a pointer to 32-bit-aligned memory containing the serialized form of a trie
- * @param length the number of bytes available at data;
- * can be more than necessary (see return value)
- * @param anyEndianOk If false, only platform-endian serialized forms are recognized.
- * If true, opposite-endian serialized forms are recognized as well.
- * @return the trie version of the serialized form, or 0 if it is not
- * recognized as a serialized trie
- */
- int32_t
- getVersion(const void *data, int32_t length, UBool anyEndianOk) {
- uint32_t signature;
- if(length<16 || data==nullptr || (U_POINTER_MASK_LSB(data, 3)!=0)) {
- return 0;
- }
- signature = *static_cast<const uint32_t*>(data);
- if(signature==UCPTRIE_SIG) {
- return 3;
- }
- if(anyEndianOk && signature==UCPTRIE_OE_SIG) {
- return 3;
- }
- if(signature==UTRIE2_SIG) {
- return 2;
- }
- if(anyEndianOk && signature==UTRIE2_OE_SIG) {
- return 2;
- }
- if(signature==UTRIE_SIG) {
- return 1;
- }
- if(anyEndianOk && signature==UTRIE_OE_SIG) {
- return 1;
- }
- return 0;
- }
- } // namespace
- U_CAPI int32_t U_EXPORT2
- utrie_swapAnyVersion(const UDataSwapper *ds,
- const void *inData, int32_t length, void *outData,
- UErrorCode *pErrorCode) {
- if(U_FAILURE(*pErrorCode)) { return 0; }
- switch(getVersion(inData, length, true)) {
- case 1:
- return utrie_swap(ds, inData, length, outData, pErrorCode);
- case 2:
- return utrie2_swap(ds, inData, length, outData, pErrorCode);
- case 3:
- return ucptrie_swap(ds, inData, length, outData, pErrorCode);
- default:
- *pErrorCode=U_INVALID_FORMAT_ERROR;
- return 0;
- }
- }
|