12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481 |
- // © 2016 and later: Unicode, Inc. and others.
- // License & terms of use: http://www.unicode.org/copyright.html
- /*
- ******************************************************************************
- *
- * Copyright (C) 2001-2014, International Business Machines
- * Corporation and others. All Rights Reserved.
- *
- ******************************************************************************
- * file name: utrie2_builder.cpp
- * encoding: UTF-8
- * tab size: 8 (not used)
- * indentation:4
- *
- * created on: 2008sep26 (split off from utrie2.c)
- * created by: Markus W. Scherer
- *
- * This is a common implementation of a Unicode trie.
- * It is a kind of compressed, serializable table of 16- or 32-bit values associated with
- * Unicode code points (0..0x10ffff).
- * This is the second common version of a Unicode trie (hence the name UTrie2).
- * See utrie2.h for a comparison.
- *
- * This file contains only the builder code.
- * See utrie2.c for the runtime and enumeration code.
- */
- // #define UTRIE2_DEBUG
- #ifdef UTRIE2_DEBUG
- # include <stdio.h>
- #endif
- // #define UCPTRIE_DEBUG
- #include "unicode/utypes.h"
- #ifdef UCPTRIE_DEBUG
- #include "unicode/ucptrie.h"
- #include "unicode/umutablecptrie.h"
- #include "ucptrie_impl.h"
- #endif
- #include "cmemory.h"
- #include "utrie2.h"
- #include "utrie2_impl.h"
- #include "utrie.h" // for utrie2_fromUTrie()
- /* Implementation notes ----------------------------------------------------- */
- /*
- * The UTRIE2_SHIFT_1, UTRIE2_SHIFT_2, UTRIE2_INDEX_SHIFT and other values
- * have been chosen to minimize trie sizes overall.
- * Most of the code is flexible enough to work with a range of values,
- * within certain limits.
- *
- * Exception: Support for separate values for lead surrogate code _units_
- * vs. code _points_ was added after the constants were fixed,
- * and has not been tested nor particularly designed for different constant values.
- * (Especially the utrie2_enum() code that jumps to the special LSCP index-2
- * part and back.)
- *
- * Requires UTRIE2_SHIFT_2<=6. Otherwise 0xc0 which is the top of the ASCII-linear data
- * including the bad-UTF-8-data block is not a multiple of UTRIE2_DATA_BLOCK_LENGTH
- * and map[block>>UTRIE2_SHIFT_2] (used in reference counting and compaction
- * remapping) stops working.
- *
- * Requires UTRIE2_SHIFT_1>=10 because utrie2_enumForLeadSurrogate()
- * assumes that a single index-2 block is used for 0x400 code points
- * corresponding to one lead surrogate.
- *
- * Requires UTRIE2_SHIFT_1<=16. Otherwise one single index-2 block contains
- * more than one Unicode plane, and the split of the index-2 table into a BMP
- * part and a supplementary part, with a gap in between, would not work.
- *
- * Requires UTRIE2_INDEX_SHIFT>=1 not because of the code but because
- * there is data with more than 64k distinct values,
- * for example for Unihan collation with a separate collation weight per
- * Han character.
- */
- /* Building a trie ----------------------------------------------------------*/
- enum {
- /** The null index-2 block, following the gap in the index-2 table. */
- UNEWTRIE2_INDEX_2_NULL_OFFSET=UNEWTRIE2_INDEX_GAP_OFFSET+UNEWTRIE2_INDEX_GAP_LENGTH,
- /** The start of allocated index-2 blocks. */
- UNEWTRIE2_INDEX_2_START_OFFSET=UNEWTRIE2_INDEX_2_NULL_OFFSET+UTRIE2_INDEX_2_BLOCK_LENGTH,
- /**
- * The null data block.
- * Length 64=0x40 even if UTRIE2_DATA_BLOCK_LENGTH is smaller,
- * to work with 6-bit trail bytes from 2-byte UTF-8.
- */
- UNEWTRIE2_DATA_NULL_OFFSET=UTRIE2_DATA_START_OFFSET,
- /** The start of allocated data blocks. */
- UNEWTRIE2_DATA_START_OFFSET=UNEWTRIE2_DATA_NULL_OFFSET+0x40,
- /**
- * The start of data blocks for U+0800 and above.
- * Below, compaction uses a block length of 64 for 2-byte UTF-8.
- * From here on, compaction uses UTRIE2_DATA_BLOCK_LENGTH.
- * Data values for 0x780 code points beyond ASCII.
- */
- UNEWTRIE2_DATA_0800_OFFSET=UNEWTRIE2_DATA_START_OFFSET+0x780
- };
- /* Start with allocation of 16k data entries. */
- #define UNEWTRIE2_INITIAL_DATA_LENGTH ((int32_t)1<<14)
- /* Grow about 8x each time. */
- #define UNEWTRIE2_MEDIUM_DATA_LENGTH ((int32_t)1<<17)
- static int32_t
- allocIndex2Block(UNewTrie2 *trie);
- U_CAPI UTrie2 * U_EXPORT2
- utrie2_open(uint32_t initialValue, uint32_t errorValue, UErrorCode *pErrorCode) {
- UTrie2 *trie;
- UNewTrie2 *newTrie;
- uint32_t *data;
- int32_t i, j;
- if(U_FAILURE(*pErrorCode)) {
- return nullptr;
- }
- trie=(UTrie2 *)uprv_malloc(sizeof(UTrie2));
- newTrie=(UNewTrie2 *)uprv_malloc(sizeof(UNewTrie2));
- data=(uint32_t *)uprv_malloc(UNEWTRIE2_INITIAL_DATA_LENGTH*4);
- if(trie==nullptr || newTrie==nullptr || data==nullptr) {
- uprv_free(trie);
- uprv_free(newTrie);
- uprv_free(data);
- *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
- return nullptr;
- }
- uprv_memset(trie, 0, sizeof(UTrie2));
- trie->initialValue=initialValue;
- trie->errorValue=errorValue;
- trie->highStart=0x110000;
- trie->newTrie=newTrie;
- #ifdef UTRIE2_DEBUG
- trie->name="open";
- #endif
- newTrie->data=data;
- #ifdef UCPTRIE_DEBUG
- newTrie->t3=umutablecptrie_open(initialValue, errorValue, pErrorCode);
- #endif
- newTrie->dataCapacity=UNEWTRIE2_INITIAL_DATA_LENGTH;
- newTrie->initialValue=initialValue;
- newTrie->errorValue=errorValue;
- newTrie->highStart=0x110000;
- newTrie->firstFreeBlock=0; /* no free block in the list */
- newTrie->isCompacted=false;
- /*
- * preallocate and reset
- * - ASCII
- * - the bad-UTF-8-data block
- * - the null data block
- */
- for(i=0; i<0x80; ++i) {
- newTrie->data[i]=initialValue;
- }
- for(; i<0xc0; ++i) {
- newTrie->data[i]=errorValue;
- }
- for(i=UNEWTRIE2_DATA_NULL_OFFSET; i<UNEWTRIE2_DATA_START_OFFSET; ++i) {
- newTrie->data[i]=initialValue;
- }
- newTrie->dataNullOffset=UNEWTRIE2_DATA_NULL_OFFSET;
- newTrie->dataLength=UNEWTRIE2_DATA_START_OFFSET;
- /* set the index-2 indexes for the 2=0x80>>UTRIE2_SHIFT_2 ASCII data blocks */
- for(i=0, j=0; j<0x80; ++i, j+=UTRIE2_DATA_BLOCK_LENGTH) {
- newTrie->index2[i]=j;
- newTrie->map[i]=1;
- }
- /* reference counts for the bad-UTF-8-data block */
- for(; j<0xc0; ++i, j+=UTRIE2_DATA_BLOCK_LENGTH) {
- newTrie->map[i]=0;
- }
- /*
- * Reference counts for the null data block: all blocks except for the ASCII blocks.
- * Plus 1 so that we don't drop this block during compaction.
- * Plus as many as needed for lead surrogate code points.
- */
- /* i==newTrie->dataNullOffset */
- newTrie->map[i++]=
- (0x110000>>UTRIE2_SHIFT_2)-
- (0x80>>UTRIE2_SHIFT_2)+
- 1+
- UTRIE2_LSCP_INDEX_2_LENGTH;
- j+=UTRIE2_DATA_BLOCK_LENGTH;
- for(; j<UNEWTRIE2_DATA_START_OFFSET; ++i, j+=UTRIE2_DATA_BLOCK_LENGTH) {
- newTrie->map[i]=0;
- }
- /*
- * set the remaining indexes in the BMP index-2 block
- * to the null data block
- */
- for(i=0x80>>UTRIE2_SHIFT_2; i<UTRIE2_INDEX_2_BMP_LENGTH; ++i) {
- newTrie->index2[i]=UNEWTRIE2_DATA_NULL_OFFSET;
- }
- /*
- * Fill the index gap with impossible values so that compaction
- * does not overlap other index-2 blocks with the gap.
- */
- for(i=0; i<UNEWTRIE2_INDEX_GAP_LENGTH; ++i) {
- newTrie->index2[UNEWTRIE2_INDEX_GAP_OFFSET+i]=-1;
- }
- /* set the indexes in the null index-2 block */
- for(i=0; i<UTRIE2_INDEX_2_BLOCK_LENGTH; ++i) {
- newTrie->index2[UNEWTRIE2_INDEX_2_NULL_OFFSET+i]=UNEWTRIE2_DATA_NULL_OFFSET;
- }
- newTrie->index2NullOffset=UNEWTRIE2_INDEX_2_NULL_OFFSET;
- newTrie->index2Length=UNEWTRIE2_INDEX_2_START_OFFSET;
- /* set the index-1 indexes for the linear index-2 block */
- for(i=0, j=0;
- i<UTRIE2_OMITTED_BMP_INDEX_1_LENGTH;
- ++i, j+=UTRIE2_INDEX_2_BLOCK_LENGTH
- ) {
- newTrie->index1[i]=j;
- }
- /* set the remaining index-1 indexes to the null index-2 block */
- for(; i<UNEWTRIE2_INDEX_1_LENGTH; ++i) {
- newTrie->index1[i]=UNEWTRIE2_INDEX_2_NULL_OFFSET;
- }
- /*
- * Preallocate and reset data for U+0080..U+07ff,
- * for 2-byte UTF-8 which will be compacted in 64-blocks
- * even if UTRIE2_DATA_BLOCK_LENGTH is smaller.
- */
- for(i=0x80; i<0x800; i+=UTRIE2_DATA_BLOCK_LENGTH) {
- utrie2_set32(trie, i, initialValue, pErrorCode);
- }
- return trie;
- }
- static UNewTrie2 *
- cloneBuilder(const UNewTrie2 *other) {
- UNewTrie2 *trie;
- trie = static_cast<UNewTrie2*>(uprv_malloc(sizeof(UNewTrie2)));
- if(trie==nullptr) {
- return nullptr;
- }
- trie->data = static_cast<uint32_t*>(uprv_malloc(other->dataCapacity * 4));
- if(trie->data==nullptr) {
- uprv_free(trie);
- return nullptr;
- }
- #ifdef UCPTRIE_DEBUG
- if(other->t3==nullptr) {
- trie->t3=nullptr;
- } else {
- UErrorCode errorCode=U_ZERO_ERROR;
- trie->t3=umutablecptrie_clone(other->t3, &errorCode);
- }
- #endif
- trie->dataCapacity=other->dataCapacity;
- /* clone data */
- uprv_memcpy(trie->index1, other->index1, sizeof(trie->index1));
- uprv_memcpy(trie->index2, other->index2, (size_t)other->index2Length*4);
- trie->index2NullOffset=other->index2NullOffset;
- trie->index2Length=other->index2Length;
- uprv_memcpy(trie->data, other->data, (size_t)other->dataLength*4);
- trie->dataNullOffset=other->dataNullOffset;
- trie->dataLength=other->dataLength;
- /* reference counters */
- if(other->isCompacted) {
- trie->firstFreeBlock=0;
- } else {
- uprv_memcpy(trie->map, other->map, ((size_t)other->dataLength>>UTRIE2_SHIFT_2)*4);
- trie->firstFreeBlock=other->firstFreeBlock;
- }
- trie->initialValue=other->initialValue;
- trie->errorValue=other->errorValue;
- trie->highStart=other->highStart;
- trie->isCompacted=other->isCompacted;
- return trie;
- }
- U_CAPI UTrie2 * U_EXPORT2
- utrie2_clone(const UTrie2 *other, UErrorCode *pErrorCode) {
- UTrie2 *trie;
- if(U_FAILURE(*pErrorCode)) {
- return nullptr;
- }
- if(other==nullptr || (other->memory==nullptr && other->newTrie==nullptr)) {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return nullptr;
- }
- trie=(UTrie2 *)uprv_malloc(sizeof(UTrie2));
- if(trie==nullptr) {
- *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
- return nullptr;
- }
- uprv_memcpy(trie, other, sizeof(UTrie2));
- if(other->memory!=nullptr) {
- trie->memory=uprv_malloc(other->length);
- if(trie->memory!=nullptr) {
- trie->isMemoryOwned=true;
- uprv_memcpy(trie->memory, other->memory, other->length);
- /* make the clone's pointers point to its own memory */
- trie->index=(uint16_t *)trie->memory+(other->index-(uint16_t *)other->memory);
- if(other->data16!=nullptr) {
- trie->data16=(uint16_t *)trie->memory+(other->data16-(uint16_t *)other->memory);
- }
- if(other->data32!=nullptr) {
- trie->data32=(uint32_t *)trie->memory+(other->data32-(uint32_t *)other->memory);
- }
- }
- } else /* other->newTrie!=nullptr */ {
- trie->newTrie=cloneBuilder(other->newTrie);
- }
- if(trie->memory==nullptr && trie->newTrie==nullptr) {
- *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
- uprv_free(trie);
- trie=nullptr;
- }
- return trie;
- }
- typedef struct NewTrieAndStatus {
- UTrie2 *trie;
- UErrorCode errorCode;
- UBool exclusiveLimit; /* rather than inclusive range end */
- } NewTrieAndStatus;
- static UBool U_CALLCONV
- copyEnumRange(const void *context, UChar32 start, UChar32 end, uint32_t value) {
- NewTrieAndStatus *nt=(NewTrieAndStatus *)context;
- if(value!=nt->trie->initialValue) {
- if(nt->exclusiveLimit) {
- --end;
- }
- if(start==end) {
- utrie2_set32(nt->trie, start, value, &nt->errorCode);
- } else {
- utrie2_setRange32(nt->trie, start, end, value, true, &nt->errorCode);
- }
- return U_SUCCESS(nt->errorCode);
- } else {
- return true;
- }
- }
- #ifdef UTRIE2_DEBUG
- static long countInitial(const UTrie2 *trie) {
- uint32_t initialValue=trie->initialValue;
- int32_t length=trie->dataLength;
- long count=0;
- if(trie->data16!=nullptr) {
- for(int32_t i=0; i<length; ++i) {
- if(trie->data16[i]==initialValue) { ++count; }
- }
- } else {
- for(int32_t i=0; i<length; ++i) {
- if(trie->data32[i]==initialValue) { ++count; }
- }
- }
- return count;
- }
- static void
- utrie_printLengths(const UTrie *trie) {
- long indexLength=trie->indexLength;
- long dataLength=(long)trie->dataLength;
- long totalLength=(long)sizeof(UTrieHeader)+indexLength*2+dataLength*(trie->data32!=nullptr ? 4 : 2);
- printf("**UTrieLengths** index:%6ld data:%6ld serialized:%6ld\n",
- indexLength, dataLength, totalLength);
- }
- static void
- utrie2_printLengths(const UTrie2 *trie, const char *which) {
- long indexLength=trie->indexLength;
- long dataLength=(long)trie->dataLength;
- long totalLength=(long)sizeof(UTrie2Header)+indexLength*2+dataLength*(trie->data32!=nullptr ? 4 : 2);
- printf("**UTrie2Lengths(%s %s)** index:%6ld data:%6ld countInitial:%6ld serialized:%6ld\n",
- which, trie->name, indexLength, dataLength, countInitial(trie), totalLength);
- }
- #endif
- U_CAPI UTrie2 * U_EXPORT2
- utrie2_cloneAsThawed(const UTrie2 *other, UErrorCode *pErrorCode) {
- NewTrieAndStatus context;
- char16_t lead;
- if(U_FAILURE(*pErrorCode)) {
- return nullptr;
- }
- if(other==nullptr || (other->memory==nullptr && other->newTrie==nullptr)) {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return nullptr;
- }
- if(other->newTrie!=nullptr && !other->newTrie->isCompacted) {
- return utrie2_clone(other, pErrorCode); /* clone an unfrozen trie */
- }
- /* Clone the frozen trie by enumerating it and building a new one. */
- context.trie=utrie2_open(other->initialValue, other->errorValue, pErrorCode);
- if(U_FAILURE(*pErrorCode)) {
- return nullptr;
- }
- context.exclusiveLimit=false;
- context.errorCode=*pErrorCode;
- utrie2_enum(other, nullptr, copyEnumRange, &context);
- *pErrorCode=context.errorCode;
- for(lead=0xd800; lead<0xdc00; ++lead) {
- uint32_t value;
- if(other->data32==nullptr) {
- value=UTRIE2_GET16_FROM_U16_SINGLE_LEAD(other, lead);
- } else {
- value=UTRIE2_GET32_FROM_U16_SINGLE_LEAD(other, lead);
- }
- if(value!=other->initialValue) {
- utrie2_set32ForLeadSurrogateCodeUnit(context.trie, lead, value, pErrorCode);
- }
- }
- if(U_FAILURE(*pErrorCode)) {
- utrie2_close(context.trie);
- context.trie=nullptr;
- }
- return context.trie;
- }
- /* Almost the same as utrie2_cloneAsThawed() but copies a UTrie and freezes the clone. */
- U_CAPI UTrie2 * U_EXPORT2
- utrie2_fromUTrie(const UTrie *trie1, uint32_t errorValue, UErrorCode *pErrorCode) {
- NewTrieAndStatus context;
- char16_t lead;
- if(U_FAILURE(*pErrorCode)) {
- return nullptr;
- }
- if(trie1==nullptr) {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return nullptr;
- }
- context.trie=utrie2_open(trie1->initialValue, errorValue, pErrorCode);
- if(U_FAILURE(*pErrorCode)) {
- return nullptr;
- }
- context.exclusiveLimit=true;
- context.errorCode=*pErrorCode;
- utrie_enum(trie1, nullptr, copyEnumRange, &context);
- *pErrorCode=context.errorCode;
- for(lead=0xd800; lead<0xdc00; ++lead) {
- uint32_t value;
- if(trie1->data32==nullptr) {
- value=UTRIE_GET16_FROM_LEAD(trie1, lead);
- } else {
- value=UTRIE_GET32_FROM_LEAD(trie1, lead);
- }
- if(value!=trie1->initialValue) {
- utrie2_set32ForLeadSurrogateCodeUnit(context.trie, lead, value, pErrorCode);
- }
- }
- if(U_SUCCESS(*pErrorCode)) {
- utrie2_freeze(context.trie,
- trie1->data32!=nullptr ? UTRIE2_32_VALUE_BITS : UTRIE2_16_VALUE_BITS,
- pErrorCode);
- }
- #ifdef UTRIE2_DEBUG
- if(U_SUCCESS(*pErrorCode)) {
- utrie_printLengths(trie1);
- utrie2_printLengths(context.trie, "fromUTrie");
- }
- #endif
- if(U_FAILURE(*pErrorCode)) {
- utrie2_close(context.trie);
- context.trie=nullptr;
- }
- return context.trie;
- }
- static inline UBool
- isInNullBlock(UNewTrie2 *trie, UChar32 c, UBool forLSCP) {
- int32_t i2, block;
- if(U_IS_LEAD(c) && forLSCP) {
- i2=(UTRIE2_LSCP_INDEX_2_OFFSET-(0xd800>>UTRIE2_SHIFT_2))+
- (c>>UTRIE2_SHIFT_2);
- } else {
- i2=trie->index1[c>>UTRIE2_SHIFT_1]+
- ((c>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK);
- }
- block=trie->index2[i2];
- return block == trie->dataNullOffset;
- }
- static int32_t
- allocIndex2Block(UNewTrie2 *trie) {
- int32_t newBlock, newTop;
- newBlock=trie->index2Length;
- newTop=newBlock+UTRIE2_INDEX_2_BLOCK_LENGTH;
- if(newTop>UPRV_LENGTHOF(trie->index2)) {
- /*
- * Should never occur.
- * Either UTRIE2_MAX_BUILD_TIME_INDEX_LENGTH is incorrect,
- * or the code writes more values than should be possible.
- */
- return -1;
- }
- trie->index2Length=newTop;
- uprv_memcpy(trie->index2+newBlock, trie->index2+trie->index2NullOffset, UTRIE2_INDEX_2_BLOCK_LENGTH*4);
- return newBlock;
- }
- static int32_t
- getIndex2Block(UNewTrie2 *trie, UChar32 c, UBool forLSCP) {
- int32_t i1, i2;
- if(U_IS_LEAD(c) && forLSCP) {
- return UTRIE2_LSCP_INDEX_2_OFFSET;
- }
- i1=c>>UTRIE2_SHIFT_1;
- i2=trie->index1[i1];
- if(i2==trie->index2NullOffset) {
- i2=allocIndex2Block(trie);
- if(i2<0) {
- return -1; /* program error */
- }
- trie->index1[i1]=i2;
- }
- return i2;
- }
- static int32_t
- allocDataBlock(UNewTrie2 *trie, int32_t copyBlock) {
- int32_t newBlock, newTop;
- if(trie->firstFreeBlock!=0) {
- /* get the first free block */
- newBlock=trie->firstFreeBlock;
- trie->firstFreeBlock=-trie->map[newBlock>>UTRIE2_SHIFT_2];
- } else {
- /* get a new block from the high end */
- newBlock=trie->dataLength;
- newTop=newBlock+UTRIE2_DATA_BLOCK_LENGTH;
- if(newTop>trie->dataCapacity) {
- /* out of memory in the data array */
- int32_t capacity;
- uint32_t *data;
- if(trie->dataCapacity<UNEWTRIE2_MEDIUM_DATA_LENGTH) {
- capacity=UNEWTRIE2_MEDIUM_DATA_LENGTH;
- } else if(trie->dataCapacity<UNEWTRIE2_MAX_DATA_LENGTH) {
- capacity=UNEWTRIE2_MAX_DATA_LENGTH;
- } else {
- /*
- * Should never occur.
- * Either UNEWTRIE2_MAX_DATA_LENGTH is incorrect,
- * or the code writes more values than should be possible.
- */
- return -1;
- }
- data = static_cast<uint32_t*>(uprv_malloc(capacity * 4));
- if(data==nullptr) {
- return -1;
- }
- uprv_memcpy(data, trie->data, (size_t)trie->dataLength*4);
- uprv_free(trie->data);
- trie->data=data;
- trie->dataCapacity=capacity;
- }
- trie->dataLength=newTop;
- }
- uprv_memcpy(trie->data+newBlock, trie->data+copyBlock, UTRIE2_DATA_BLOCK_LENGTH*4);
- trie->map[newBlock>>UTRIE2_SHIFT_2]=0;
- return newBlock;
- }
- /* call when the block's reference counter reaches 0 */
- static void
- releaseDataBlock(UNewTrie2 *trie, int32_t block) {
- /* put this block at the front of the free-block chain */
- trie->map[block>>UTRIE2_SHIFT_2]=-trie->firstFreeBlock;
- trie->firstFreeBlock=block;
- }
- static inline UBool
- isWritableBlock(UNewTrie2 *trie, int32_t block) {
- return block != trie->dataNullOffset && 1 == trie->map[block >> UTRIE2_SHIFT_2];
- }
- static inline void
- setIndex2Entry(UNewTrie2 *trie, int32_t i2, int32_t block) {
- int32_t oldBlock;
- ++trie->map[block>>UTRIE2_SHIFT_2]; /* increment first, in case block==oldBlock! */
- oldBlock=trie->index2[i2];
- if(0 == --trie->map[oldBlock>>UTRIE2_SHIFT_2]) {
- releaseDataBlock(trie, oldBlock);
- }
- trie->index2[i2]=block;
- }
- /**
- * No error checking for illegal arguments.
- *
- * @return -1 if no new data block available (out of memory in data array)
- * @internal
- */
- static int32_t
- getDataBlock(UNewTrie2 *trie, UChar32 c, UBool forLSCP) {
- int32_t i2, oldBlock, newBlock;
- i2=getIndex2Block(trie, c, forLSCP);
- if(i2<0) {
- return -1; /* program error */
- }
- i2+=(c>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK;
- oldBlock=trie->index2[i2];
- if(isWritableBlock(trie, oldBlock)) {
- return oldBlock;
- }
- /* allocate a new data block */
- newBlock=allocDataBlock(trie, oldBlock);
- if(newBlock<0) {
- /* out of memory in the data array */
- return -1;
- }
- setIndex2Entry(trie, i2, newBlock);
- return newBlock;
- }
- /**
- * @return true if the value was successfully set
- */
- static void
- set32(UNewTrie2 *trie,
- UChar32 c, UBool forLSCP, uint32_t value,
- UErrorCode *pErrorCode) {
- int32_t block;
- if(trie==nullptr || trie->isCompacted) {
- *pErrorCode=U_NO_WRITE_PERMISSION;
- return;
- }
- #ifdef UCPTRIE_DEBUG
- umutablecptrie_set(trie->t3, c, value, pErrorCode);
- #endif
- block=getDataBlock(trie, c, forLSCP);
- if(block<0) {
- *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
- return;
- }
- trie->data[block+(c&UTRIE2_DATA_MASK)]=value;
- }
- U_CAPI void U_EXPORT2
- utrie2_set32(UTrie2 *trie, UChar32 c, uint32_t value, UErrorCode *pErrorCode) {
- if(U_FAILURE(*pErrorCode)) {
- return;
- }
- if((uint32_t)c>0x10ffff) {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return;
- }
- set32(trie->newTrie, c, true, value, pErrorCode);
- }
- U_CAPI void U_EXPORT2
- utrie2_set32ForLeadSurrogateCodeUnit(UTrie2 *trie,
- UChar32 c, uint32_t value,
- UErrorCode *pErrorCode) {
- if(U_FAILURE(*pErrorCode)) {
- return;
- }
- if(!U_IS_LEAD(c)) {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return;
- }
- set32(trie->newTrie, c, false, value, pErrorCode);
- }
- static void
- writeBlock(uint32_t *block, uint32_t value) {
- uint32_t *limit=block+UTRIE2_DATA_BLOCK_LENGTH;
- while(block<limit) {
- *block++=value;
- }
- }
- /**
- * initialValue is ignored if overwrite=true
- * @internal
- */
- static void
- fillBlock(uint32_t *block, UChar32 start, UChar32 limit,
- uint32_t value, uint32_t initialValue, UBool overwrite) {
- uint32_t *pLimit;
- pLimit=block+limit;
- block+=start;
- if(overwrite) {
- while(block<pLimit) {
- *block++=value;
- }
- } else {
- while(block<pLimit) {
- if(*block==initialValue) {
- *block=value;
- }
- ++block;
- }
- }
- }
- U_CAPI void U_EXPORT2
- utrie2_setRange32(UTrie2 *trie,
- UChar32 start, UChar32 end,
- uint32_t value, UBool overwrite,
- UErrorCode *pErrorCode) {
- /*
- * repeat value in [start..end]
- * mark index values for repeat-data blocks by setting bit 31 of the index values
- * fill around existing values if any, if(overwrite)
- */
- UNewTrie2 *newTrie;
- int32_t block, rest, repeatBlock;
- UChar32 limit;
- if(U_FAILURE(*pErrorCode)) {
- return;
- }
- if((uint32_t)start>0x10ffff || (uint32_t)end>0x10ffff || start>end) {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return;
- }
- newTrie=trie->newTrie;
- if(newTrie==nullptr || newTrie->isCompacted) {
- *pErrorCode=U_NO_WRITE_PERMISSION;
- return;
- }
- #ifdef UCPTRIE_DEBUG
- umutablecptrie_setRange(newTrie->t3, start, end, value, pErrorCode);
- #endif
- if(!overwrite && value==newTrie->initialValue) {
- return; /* nothing to do */
- }
- limit=end+1;
- if(start&UTRIE2_DATA_MASK) {
- UChar32 nextStart;
- /* set partial block at [start..following block boundary[ */
- block=getDataBlock(newTrie, start, true);
- if(block<0) {
- *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
- return;
- }
- nextStart=(start+UTRIE2_DATA_MASK)&~UTRIE2_DATA_MASK;
- if(nextStart<=limit) {
- fillBlock(newTrie->data+block, start&UTRIE2_DATA_MASK, UTRIE2_DATA_BLOCK_LENGTH,
- value, newTrie->initialValue, overwrite);
- start=nextStart;
- } else {
- fillBlock(newTrie->data+block, start&UTRIE2_DATA_MASK, limit&UTRIE2_DATA_MASK,
- value, newTrie->initialValue, overwrite);
- return;
- }
- }
- /* number of positions in the last, partial block */
- rest=limit&UTRIE2_DATA_MASK;
- /* round down limit to a block boundary */
- limit&=~UTRIE2_DATA_MASK;
- /* iterate over all-value blocks */
- if(value==newTrie->initialValue) {
- repeatBlock=newTrie->dataNullOffset;
- } else {
- repeatBlock=-1;
- }
- while(start<limit) {
- int32_t i2;
- UBool setRepeatBlock=false;
- if(value==newTrie->initialValue && isInNullBlock(newTrie, start, true)) {
- start+=UTRIE2_DATA_BLOCK_LENGTH; /* nothing to do */
- continue;
- }
- /* get index value */
- i2=getIndex2Block(newTrie, start, true);
- if(i2<0) {
- *pErrorCode=U_INTERNAL_PROGRAM_ERROR;
- return;
- }
- i2+=(start>>UTRIE2_SHIFT_2)&UTRIE2_INDEX_2_MASK;
- block=newTrie->index2[i2];
- if(isWritableBlock(newTrie, block)) {
- /* already allocated */
- if(overwrite && block>=UNEWTRIE2_DATA_0800_OFFSET) {
- /*
- * We overwrite all values, and it's not a
- * protected (ASCII-linear or 2-byte UTF-8) block:
- * replace with the repeatBlock.
- */
- setRepeatBlock=true;
- } else {
- /* !overwrite, or protected block: just write the values into this block */
- fillBlock(newTrie->data+block,
- 0, UTRIE2_DATA_BLOCK_LENGTH,
- value, newTrie->initialValue, overwrite);
- }
- } else if(newTrie->data[block]!=value && (overwrite || block==newTrie->dataNullOffset)) {
- /*
- * Set the repeatBlock instead of the null block or previous repeat block:
- *
- * If !isWritableBlock() then all entries in the block have the same value
- * because it's the null block or a range block (the repeatBlock from a previous
- * call to utrie2_setRange32()).
- * No other blocks are used multiple times before compacting.
- *
- * The null block is the only non-writable block with the initialValue because
- * of the repeatBlock initialization above. (If value==initialValue, then
- * the repeatBlock will be the null data block.)
- *
- * We set our repeatBlock if the desired value differs from the block's value,
- * and if we overwrite any data or if the data is all initial values
- * (which is the same as the block being the null block, see above).
- */
- setRepeatBlock=true;
- }
- if(setRepeatBlock) {
- if(repeatBlock>=0) {
- setIndex2Entry(newTrie, i2, repeatBlock);
- } else {
- /* create and set and fill the repeatBlock */
- repeatBlock=getDataBlock(newTrie, start, true);
- if(repeatBlock<0) {
- *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
- return;
- }
- writeBlock(newTrie->data+repeatBlock, value);
- }
- }
- start+=UTRIE2_DATA_BLOCK_LENGTH;
- }
- if(rest>0) {
- /* set partial block at [last block boundary..limit[ */
- block=getDataBlock(newTrie, start, true);
- if(block<0) {
- *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
- return;
- }
- fillBlock(newTrie->data+block, 0, rest, value, newTrie->initialValue, overwrite);
- }
- }
- /* compaction --------------------------------------------------------------- */
- static inline UBool
- equal_int32(const int32_t *s, const int32_t *t, int32_t length) {
- while(length>0 && *s==*t) {
- ++s;
- ++t;
- --length;
- }
- return length == 0;
- }
- static inline UBool
- equal_uint32(const uint32_t *s, const uint32_t *t, int32_t length) {
- while(length>0 && *s==*t) {
- ++s;
- ++t;
- --length;
- }
- return length == 0;
- }
- static int32_t
- findSameIndex2Block(const int32_t *idx, int32_t index2Length, int32_t otherBlock) {
- int32_t block;
- /* ensure that we do not even partially get past index2Length */
- index2Length-=UTRIE2_INDEX_2_BLOCK_LENGTH;
- for(block=0; block<=index2Length; ++block) {
- if(equal_int32(idx+block, idx+otherBlock, UTRIE2_INDEX_2_BLOCK_LENGTH)) {
- return block;
- }
- }
- return -1;
- }
- static int32_t
- findSameDataBlock(const uint32_t *data, int32_t dataLength, int32_t otherBlock, int32_t blockLength) {
- int32_t block;
- /* ensure that we do not even partially get past dataLength */
- dataLength-=blockLength;
- for(block=0; block<=dataLength; block+=UTRIE2_DATA_GRANULARITY) {
- if(equal_uint32(data+block, data+otherBlock, blockLength)) {
- return block;
- }
- }
- return -1;
- }
- /*
- * Find the start of the last range in the trie by enumerating backward.
- * Indexes for supplementary code points higher than this will be omitted.
- */
- static UChar32
- findHighStart(UNewTrie2 *trie, uint32_t highValue) {
- const uint32_t *data32;
- uint32_t value, initialValue;
- UChar32 c, prev;
- int32_t i1, i2, j, i2Block, prevI2Block, index2NullOffset, block, prevBlock, nullBlock;
- data32=trie->data;
- initialValue=trie->initialValue;
- index2NullOffset=trie->index2NullOffset;
- nullBlock=trie->dataNullOffset;
- /* set variables for previous range */
- if(highValue==initialValue) {
- prevI2Block=index2NullOffset;
- prevBlock=nullBlock;
- } else {
- prevI2Block=-1;
- prevBlock=-1;
- }
- prev=0x110000;
- /* enumerate index-2 blocks */
- i1=UNEWTRIE2_INDEX_1_LENGTH;
- c=prev;
- while(c>0) {
- i2Block=trie->index1[--i1];
- if(i2Block==prevI2Block) {
- /* the index-2 block is the same as the previous one, and filled with highValue */
- c-=UTRIE2_CP_PER_INDEX_1_ENTRY;
- continue;
- }
- prevI2Block=i2Block;
- if(i2Block==index2NullOffset) {
- /* this is the null index-2 block */
- if(highValue!=initialValue) {
- return c;
- }
- c-=UTRIE2_CP_PER_INDEX_1_ENTRY;
- } else {
- /* enumerate data blocks for one index-2 block */
- for(i2=UTRIE2_INDEX_2_BLOCK_LENGTH; i2>0;) {
- block=trie->index2[i2Block+ --i2];
- if(block==prevBlock) {
- /* the block is the same as the previous one, and filled with highValue */
- c-=UTRIE2_DATA_BLOCK_LENGTH;
- continue;
- }
- prevBlock=block;
- if(block==nullBlock) {
- /* this is the null data block */
- if(highValue!=initialValue) {
- return c;
- }
- c-=UTRIE2_DATA_BLOCK_LENGTH;
- } else {
- for(j=UTRIE2_DATA_BLOCK_LENGTH; j>0;) {
- value=data32[block+ --j];
- if(value!=highValue) {
- return c;
- }
- --c;
- }
- }
- }
- }
- }
- /* deliver last range */
- return 0;
- }
- /*
- * Compact a build-time trie.
- *
- * The compaction
- * - removes blocks that are identical with earlier ones
- * - overlaps adjacent blocks as much as possible (if overlap==true)
- * - moves blocks in steps of the data granularity
- * - moves and overlaps blocks that overlap with multiple values in the overlap region
- *
- * It does not
- * - try to move and overlap blocks that are not already adjacent
- */
- static void
- compactData(UNewTrie2 *trie) {
- #ifdef UTRIE2_DEBUG
- int32_t countSame=0, sumOverlaps=0;
- #endif
- int32_t start, newStart, movedStart;
- int32_t blockLength, overlap;
- int32_t i, mapIndex, blockCount;
- /* do not compact linear-ASCII data */
- newStart=UTRIE2_DATA_START_OFFSET;
- for(start=0, i=0; start<newStart; start+=UTRIE2_DATA_BLOCK_LENGTH, ++i) {
- trie->map[i]=start;
- }
- /*
- * Start with a block length of 64 for 2-byte UTF-8,
- * then switch to UTRIE2_DATA_BLOCK_LENGTH.
- */
- blockLength=64;
- blockCount=blockLength>>UTRIE2_SHIFT_2;
- for(start=newStart; start<trie->dataLength;) {
- /*
- * start: index of first entry of current block
- * newStart: index where the current block is to be moved
- * (right after current end of already-compacted data)
- */
- if(start==UNEWTRIE2_DATA_0800_OFFSET) {
- blockLength=UTRIE2_DATA_BLOCK_LENGTH;
- blockCount=1;
- }
- /* skip blocks that are not used */
- if(trie->map[start>>UTRIE2_SHIFT_2]<=0) {
- /* advance start to the next block */
- start+=blockLength;
- /* leave newStart with the previous block! */
- continue;
- }
- /* search for an identical block */
- if( (movedStart=findSameDataBlock(trie->data, newStart, start, blockLength))
- >=0
- ) {
- #ifdef UTRIE2_DEBUG
- ++countSame;
- #endif
- /* found an identical block, set the other block's index value for the current block */
- for(i=blockCount, mapIndex=start>>UTRIE2_SHIFT_2; i>0; --i) {
- trie->map[mapIndex++]=movedStart;
- movedStart+=UTRIE2_DATA_BLOCK_LENGTH;
- }
- /* advance start to the next block */
- start+=blockLength;
- /* leave newStart with the previous block! */
- continue;
- }
- /* see if the beginning of this block can be overlapped with the end of the previous block */
- /* look for maximum overlap (modulo granularity) with the previous, adjacent block */
- for(overlap=blockLength-UTRIE2_DATA_GRANULARITY;
- overlap>0 && !equal_uint32(trie->data+(newStart-overlap), trie->data+start, overlap);
- overlap-=UTRIE2_DATA_GRANULARITY) {}
- #ifdef UTRIE2_DEBUG
- sumOverlaps+=overlap;
- #endif
- if(overlap>0 || newStart<start) {
- /* some overlap, or just move the whole block */
- movedStart=newStart-overlap;
- for(i=blockCount, mapIndex=start>>UTRIE2_SHIFT_2; i>0; --i) {
- trie->map[mapIndex++]=movedStart;
- movedStart+=UTRIE2_DATA_BLOCK_LENGTH;
- }
- /* move the non-overlapping indexes to their new positions */
- start+=overlap;
- for(i=blockLength-overlap; i>0; --i) {
- trie->data[newStart++]=trie->data[start++];
- }
- } else /* no overlap && newStart==start */ {
- for(i=blockCount, mapIndex=start>>UTRIE2_SHIFT_2; i>0; --i) {
- trie->map[mapIndex++]=start;
- start+=UTRIE2_DATA_BLOCK_LENGTH;
- }
- newStart=start;
- }
- }
- /* now adjust the index-2 table */
- for(i=0; i<trie->index2Length; ++i) {
- if(i==UNEWTRIE2_INDEX_GAP_OFFSET) {
- /* Gap indexes are invalid (-1). Skip over the gap. */
- i+=UNEWTRIE2_INDEX_GAP_LENGTH;
- }
- trie->index2[i]=trie->map[trie->index2[i]>>UTRIE2_SHIFT_2];
- }
- trie->dataNullOffset=trie->map[trie->dataNullOffset>>UTRIE2_SHIFT_2];
- /* ensure dataLength alignment */
- while((newStart&(UTRIE2_DATA_GRANULARITY-1))!=0) {
- trie->data[newStart++]=trie->initialValue;
- }
- #ifdef UTRIE2_DEBUG
- /* we saved some space */
- printf("compacting UTrie2: count of 32-bit data words %lu->%lu countSame=%ld sumOverlaps=%ld\n",
- (long)trie->dataLength, (long)newStart, (long)countSame, (long)sumOverlaps);
- #endif
- trie->dataLength=newStart;
- }
- static void
- compactIndex2(UNewTrie2 *trie) {
- int32_t i, start, newStart, movedStart, overlap;
- /* do not compact linear-BMP index-2 blocks */
- newStart=UTRIE2_INDEX_2_BMP_LENGTH;
- for(start=0, i=0; start<newStart; start+=UTRIE2_INDEX_2_BLOCK_LENGTH, ++i) {
- trie->map[i]=start;
- }
- /* Reduce the index table gap to what will be needed at runtime. */
- newStart+=UTRIE2_UTF8_2B_INDEX_2_LENGTH+((trie->highStart-0x10000)>>UTRIE2_SHIFT_1);
- for(start=UNEWTRIE2_INDEX_2_NULL_OFFSET; start<trie->index2Length;) {
- /*
- * start: index of first entry of current block
- * newStart: index where the current block is to be moved
- * (right after current end of already-compacted data)
- */
- /* search for an identical block */
- if( (movedStart=findSameIndex2Block(trie->index2, newStart, start))
- >=0
- ) {
- /* found an identical block, set the other block's index value for the current block */
- trie->map[start>>UTRIE2_SHIFT_1_2]=movedStart;
- /* advance start to the next block */
- start+=UTRIE2_INDEX_2_BLOCK_LENGTH;
- /* leave newStart with the previous block! */
- continue;
- }
- /* see if the beginning of this block can be overlapped with the end of the previous block */
- /* look for maximum overlap with the previous, adjacent block */
- for(overlap=UTRIE2_INDEX_2_BLOCK_LENGTH-1;
- overlap>0 && !equal_int32(trie->index2+(newStart-overlap), trie->index2+start, overlap);
- --overlap) {}
- if(overlap>0 || newStart<start) {
- /* some overlap, or just move the whole block */
- trie->map[start>>UTRIE2_SHIFT_1_2]=newStart-overlap;
- /* move the non-overlapping indexes to their new positions */
- start+=overlap;
- for(i=UTRIE2_INDEX_2_BLOCK_LENGTH-overlap; i>0; --i) {
- trie->index2[newStart++]=trie->index2[start++];
- }
- } else /* no overlap && newStart==start */ {
- trie->map[start>>UTRIE2_SHIFT_1_2]=start;
- start+=UTRIE2_INDEX_2_BLOCK_LENGTH;
- newStart=start;
- }
- }
- /* now adjust the index-1 table */
- for(i=0; i<UNEWTRIE2_INDEX_1_LENGTH; ++i) {
- trie->index1[i]=trie->map[trie->index1[i]>>UTRIE2_SHIFT_1_2];
- }
- trie->index2NullOffset=trie->map[trie->index2NullOffset>>UTRIE2_SHIFT_1_2];
- /*
- * Ensure data table alignment:
- * Needs to be granularity-aligned for 16-bit trie
- * (so that dataMove will be down-shiftable),
- * and 2-aligned for uint32_t data.
- */
- while((newStart&((UTRIE2_DATA_GRANULARITY-1)|1))!=0) {
- /* Arbitrary value: 0x3fffc not possible for real data. */
- trie->index2[newStart++] = static_cast<int32_t>(0xffff) << UTRIE2_INDEX_SHIFT;
- }
- #ifdef UTRIE2_DEBUG
- /* we saved some space */
- printf("compacting UTrie2: count of 16-bit index words %lu->%lu\n",
- (long)trie->index2Length, (long)newStart);
- #endif
- trie->index2Length=newStart;
- }
- static void
- compactTrie(UTrie2 *trie, UErrorCode *pErrorCode) {
- UNewTrie2 *newTrie;
- UChar32 highStart, suppHighStart;
- uint32_t highValue;
- newTrie=trie->newTrie;
- /* find highStart and round it up */
- highValue=utrie2_get32(trie, 0x10ffff);
- highStart=findHighStart(newTrie, highValue);
- highStart=(highStart+(UTRIE2_CP_PER_INDEX_1_ENTRY-1))&~(UTRIE2_CP_PER_INDEX_1_ENTRY-1);
- if(highStart==0x110000) {
- highValue=trie->errorValue;
- }
- /*
- * Set trie->highStart only after utrie2_get32(trie, highStart).
- * Otherwise utrie2_get32(trie, highStart) would try to read the highValue.
- */
- trie->highStart=newTrie->highStart=highStart;
- #ifdef UTRIE2_DEBUG
- printf("UTrie2: highStart U+%06lx highValue 0x%lx initialValue 0x%lx\n",
- (long)highStart, (long)highValue, (long)trie->initialValue);
- #endif
- if(highStart<0x110000) {
- /* Blank out [highStart..10ffff] to release associated data blocks. */
- suppHighStart= highStart<=0x10000 ? 0x10000 : highStart;
- utrie2_setRange32(trie, suppHighStart, 0x10ffff, trie->initialValue, true, pErrorCode);
- if(U_FAILURE(*pErrorCode)) {
- return;
- }
- }
- compactData(newTrie);
- if(highStart>0x10000) {
- compactIndex2(newTrie);
- #ifdef UTRIE2_DEBUG
- } else {
- printf("UTrie2: highStart U+%04lx count of 16-bit index words %lu->%lu\n",
- (long)highStart, (long)trie->newTrie->index2Length, (long)UTRIE2_INDEX_1_OFFSET);
- #endif
- }
- /*
- * Store the highValue in the data array and round up the dataLength.
- * Must be done after compactData() because that assumes that dataLength
- * is a multiple of UTRIE2_DATA_BLOCK_LENGTH.
- */
- newTrie->data[newTrie->dataLength++]=highValue;
- while((newTrie->dataLength&(UTRIE2_DATA_GRANULARITY-1))!=0) {
- newTrie->data[newTrie->dataLength++]=trie->initialValue;
- }
- newTrie->isCompacted=true;
- }
- /* serialization ------------------------------------------------------------ */
- /**
- * Maximum length of the runtime index array.
- * Limited by its own 16-bit index values, and by uint16_t UTrie2Header.indexLength.
- * (The actual maximum length is lower,
- * (0x110000>>UTRIE2_SHIFT_2)+UTRIE2_UTF8_2B_INDEX_2_LENGTH+UTRIE2_MAX_INDEX_1_LENGTH.)
- */
- #define UTRIE2_MAX_INDEX_LENGTH 0xffff
- /**
- * Maximum length of the runtime data array.
- * Limited by 16-bit index values that are left-shifted by UTRIE2_INDEX_SHIFT,
- * and by uint16_t UTrie2Header.shiftedDataLength.
- */
- #define UTRIE2_MAX_DATA_LENGTH (0xffff<<UTRIE2_INDEX_SHIFT)
- /* Compact and internally serialize the trie. */
- U_CAPI void U_EXPORT2
- utrie2_freeze(UTrie2 *trie, UTrie2ValueBits valueBits, UErrorCode *pErrorCode) {
- UNewTrie2 *newTrie;
- UTrie2Header *header;
- uint32_t *p;
- uint16_t *dest16;
- int32_t i, length;
- int32_t allIndexesLength;
- int32_t dataMove; /* >0 if the data is moved to the end of the index array */
- UChar32 highStart;
- /* argument check */
- if(U_FAILURE(*pErrorCode)) {
- return;
- }
- if( trie==nullptr ||
- valueBits<0 || UTRIE2_COUNT_VALUE_BITS<=valueBits
- ) {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return;
- }
- newTrie=trie->newTrie;
- if(newTrie==nullptr) {
- /* already frozen */
- UTrie2ValueBits frozenValueBits=
- trie->data16!=nullptr ? UTRIE2_16_VALUE_BITS : UTRIE2_32_VALUE_BITS;
- if(valueBits!=frozenValueBits) {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- }
- return;
- }
- /* compact if necessary */
- if(!newTrie->isCompacted) {
- compactTrie(trie, pErrorCode);
- if(U_FAILURE(*pErrorCode)) {
- return;
- }
- }
- highStart=trie->highStart;
- if(highStart<=0x10000) {
- allIndexesLength=UTRIE2_INDEX_1_OFFSET;
- } else {
- allIndexesLength=newTrie->index2Length;
- }
- if(valueBits==UTRIE2_16_VALUE_BITS) {
- dataMove=allIndexesLength;
- } else {
- dataMove=0;
- }
- /* are indexLength and dataLength within limits? */
- if( /* for unshifted indexLength */
- allIndexesLength>UTRIE2_MAX_INDEX_LENGTH ||
- /* for unshifted dataNullOffset */
- (dataMove+newTrie->dataNullOffset)>0xffff ||
- /* for unshifted 2-byte UTF-8 index-2 values */
- (dataMove+UNEWTRIE2_DATA_0800_OFFSET)>0xffff ||
- /* for shiftedDataLength */
- (dataMove+newTrie->dataLength)>UTRIE2_MAX_DATA_LENGTH
- ) {
- *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
- return;
- }
- /* calculate the total serialized length */
- length=sizeof(UTrie2Header)+allIndexesLength*2;
- if(valueBits==UTRIE2_16_VALUE_BITS) {
- length+=newTrie->dataLength*2;
- } else {
- length+=newTrie->dataLength*4;
- }
- trie->memory=uprv_malloc(length);
- if(trie->memory==nullptr) {
- *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
- return;
- }
- trie->length=length;
- trie->isMemoryOwned=true;
- trie->indexLength=allIndexesLength;
- trie->dataLength=newTrie->dataLength;
- if(highStart<=0x10000) {
- trie->index2NullOffset=0xffff;
- } else {
- trie->index2NullOffset=static_cast<uint16_t>(UTRIE2_INDEX_2_OFFSET+newTrie->index2NullOffset);
- }
- trie->dataNullOffset=(uint16_t)(dataMove+newTrie->dataNullOffset);
- trie->highValueIndex=dataMove+trie->dataLength-UTRIE2_DATA_GRANULARITY;
- /* set the header fields */
- header=(UTrie2Header *)trie->memory;
- header->signature=UTRIE2_SIG; /* "Tri2" */
- header->options=(uint16_t)valueBits;
- header->indexLength=(uint16_t)trie->indexLength;
- header->shiftedDataLength=(uint16_t)(trie->dataLength>>UTRIE2_INDEX_SHIFT);
- header->index2NullOffset=trie->index2NullOffset;
- header->dataNullOffset=trie->dataNullOffset;
- header->shiftedHighStart=(uint16_t)(highStart>>UTRIE2_SHIFT_1);
- /* fill the index and data arrays */
- dest16=(uint16_t *)(header+1);
- trie->index=dest16;
- /* write the index-2 array values shifted right by UTRIE2_INDEX_SHIFT, after adding dataMove */
- p=(uint32_t *)newTrie->index2;
- for(i=UTRIE2_INDEX_2_BMP_LENGTH; i>0; --i) {
- *dest16++=(uint16_t)((dataMove + *p++)>>UTRIE2_INDEX_SHIFT);
- }
- /* write UTF-8 2-byte index-2 values, not right-shifted */
- for(i=0; i<(0xc2-0xc0); ++i) { /* C0..C1 */
- *dest16++=(uint16_t)(dataMove+UTRIE2_BAD_UTF8_DATA_OFFSET);
- }
- for(; i<(0xe0-0xc0); ++i) { /* C2..DF */
- *dest16++=(uint16_t)(dataMove+newTrie->index2[i<<(6-UTRIE2_SHIFT_2)]);
- }
- if(highStart>0x10000) {
- int32_t index1Length=(highStart-0x10000)>>UTRIE2_SHIFT_1;
- int32_t index2Offset=UTRIE2_INDEX_2_BMP_LENGTH+UTRIE2_UTF8_2B_INDEX_2_LENGTH+index1Length;
- /* write 16-bit index-1 values for supplementary code points */
- p=(uint32_t *)newTrie->index1+UTRIE2_OMITTED_BMP_INDEX_1_LENGTH;
- for(i=index1Length; i>0; --i) {
- *dest16++=(uint16_t)(UTRIE2_INDEX_2_OFFSET + *p++);
- }
- /*
- * write the index-2 array values for supplementary code points,
- * shifted right by UTRIE2_INDEX_SHIFT, after adding dataMove
- */
- p=(uint32_t *)newTrie->index2+index2Offset;
- for(i=newTrie->index2Length-index2Offset; i>0; --i) {
- *dest16++=(uint16_t)((dataMove + *p++)>>UTRIE2_INDEX_SHIFT);
- }
- }
- /* write the 16/32-bit data array */
- switch(valueBits) {
- case UTRIE2_16_VALUE_BITS:
- /* write 16-bit data values */
- trie->data16=dest16;
- trie->data32=nullptr;
- p=newTrie->data;
- for(i=newTrie->dataLength; i>0; --i) {
- *dest16++=(uint16_t)*p++;
- }
- break;
- case UTRIE2_32_VALUE_BITS:
- /* write 32-bit data values */
- trie->data16=nullptr;
- trie->data32=(uint32_t *)dest16;
- uprv_memcpy(dest16, newTrie->data, (size_t)newTrie->dataLength*4);
- break;
- default:
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return;
- }
- #ifdef UTRIE2_DEBUG
- utrie2_printLengths(trie, "");
- #endif
- #ifdef UCPTRIE_DEBUG
- umutablecptrie_setName(newTrie->t3, trie->name);
- ucptrie_close(
- umutablecptrie_buildImmutable(
- newTrie->t3, UCPTRIE_TYPE_FAST, (UCPTrieValueWidth)valueBits, pErrorCode));
- #endif
- /* Delete the UNewTrie2. */
- uprv_free(newTrie->data);
- uprv_free(newTrie);
- trie->newTrie=nullptr;
- }
|