12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045 |
- // © 2016 and later: Unicode, Inc. and others.
- // License & terms of use: http://www.unicode.org/copyright.html
- /*
- ******************************************************************************
- *
- * Copyright (C) 2000-2016, International Business Machines
- * Corporation and others. All Rights Reserved.
- *
- ******************************************************************************
- * file name: ucnvscsu.c
- * encoding: UTF-8
- * tab size: 8 (not used)
- * indentation:4
- *
- * created on: 2000nov18
- * created by: Markus W. Scherer
- *
- * This is an implementation of the Standard Compression Scheme for Unicode
- * as defined in https://www.unicode.org/reports/tr6/ .
- * Reserved commands and window settings are treated as illegal sequences and
- * will result in callback calls.
- */
- #include "unicode/utypes.h"
- #if !UCONFIG_NO_CONVERSION && !UCONFIG_ONLY_HTML_CONVERSION
- #include "unicode/ucnv.h"
- #include "unicode/ucnv_cb.h"
- #include "unicode/utf16.h"
- #include "ucnv_bld.h"
- #include "ucnv_cnv.h"
- #include "cmemory.h"
- /* SCSU definitions --------------------------------------------------------- */
- /* SCSU command byte values */
- enum {
- SQ0=0x01, /* Quote from window pair 0 */
- SQ7=0x08, /* Quote from window pair 7 */
- SDX=0x0B, /* Define a window as extended */
- Srs=0x0C, /* reserved */
- SQU=0x0E, /* Quote a single Unicode character */
- SCU=0x0F, /* Change to Unicode mode */
- SC0=0x10, /* Select window 0 */
- SC7=0x17, /* Select window 7 */
- SD0=0x18, /* Define and select window 0 */
- SD7=0x1F, /* Define and select window 7 */
- UC0=0xE0, /* Select window 0 */
- UC7=0xE7, /* Select window 7 */
- UD0=0xE8, /* Define and select window 0 */
- UD7=0xEF, /* Define and select window 7 */
- UQU=0xF0, /* Quote a single Unicode character */
- UDX=0xF1, /* Define a Window as extended */
- Urs=0xF2 /* reserved */
- };
- enum {
- /*
- * Unicode code points from 3400 to E000 are not adressible by
- * dynamic window, since in these areas no short run alphabets are
- * found. Therefore add gapOffset to all values from gapThreshold.
- */
- gapThreshold=0x68,
- gapOffset=0xAC00,
- /* values between reservedStart and fixedThreshold are reserved */
- reservedStart=0xA8,
- /* use table of predefined fixed offsets for values from fixedThreshold */
- fixedThreshold=0xF9
- };
- /* constant offsets for the 8 static windows */
- static const uint32_t staticOffsets[8]={
- 0x0000, /* ASCII for quoted tags */
- 0x0080, /* Latin - 1 Supplement (for access to punctuation) */
- 0x0100, /* Latin Extended-A */
- 0x0300, /* Combining Diacritical Marks */
- 0x2000, /* General Punctuation */
- 0x2080, /* Currency Symbols */
- 0x2100, /* Letterlike Symbols and Number Forms */
- 0x3000 /* CJK Symbols and punctuation */
- };
- /* initial offsets for the 8 dynamic (sliding) windows */
- static const uint32_t initialDynamicOffsets[8]={
- 0x0080, /* Latin-1 */
- 0x00C0, /* Latin Extended A */
- 0x0400, /* Cyrillic */
- 0x0600, /* Arabic */
- 0x0900, /* Devanagari */
- 0x3040, /* Hiragana */
- 0x30A0, /* Katakana */
- 0xFF00 /* Fullwidth ASCII */
- };
- /* Table of fixed predefined Offsets */
- static const uint32_t fixedOffsets[]={
- /* 0xF9 */ 0x00C0, /* Latin-1 Letters + half of Latin Extended A */
- /* 0xFA */ 0x0250, /* IPA extensions */
- /* 0xFB */ 0x0370, /* Greek */
- /* 0xFC */ 0x0530, /* Armenian */
- /* 0xFD */ 0x3040, /* Hiragana */
- /* 0xFE */ 0x30A0, /* Katakana */
- /* 0xFF */ 0xFF60 /* Halfwidth Katakana */
- };
- /* state values */
- enum {
- readCommand,
- quotePairOne,
- quotePairTwo,
- quoteOne,
- definePairOne,
- definePairTwo,
- defineOne
- };
- typedef struct SCSUData {
- /* dynamic window offsets, initialize to default values from initialDynamicOffsets */
- uint32_t toUDynamicOffsets[8];
- uint32_t fromUDynamicOffsets[8];
- /* state machine state - toUnicode */
- UBool toUIsSingleByteMode;
- uint8_t toUState;
- int8_t toUQuoteWindow, toUDynamicWindow;
- uint8_t toUByteOne;
- uint8_t toUPadding[3];
- /* state machine state - fromUnicode */
- UBool fromUIsSingleByteMode;
- int8_t fromUDynamicWindow;
- /*
- * windowUse[] keeps track of the use of the dynamic windows:
- * At nextWindowUseIndex there is the least recently used window,
- * and the following windows (in a wrapping manner) are more and more
- * recently used.
- * At nextWindowUseIndex-1 there is the most recently used window.
- */
- uint8_t locale;
- int8_t nextWindowUseIndex;
- int8_t windowUse[8];
- } SCSUData;
- static const int8_t initialWindowUse[8]={ 7, 0, 3, 2, 4, 5, 6, 1 };
- static const int8_t initialWindowUse_ja[8]={ 3, 2, 4, 1, 0, 7, 5, 6 };
- enum {
- lGeneric, l_ja
- };
- /* SCSU setup functions ----------------------------------------------------- */
- U_CDECL_BEGIN
- static void U_CALLCONV
- _SCSUReset(UConverter *cnv, UConverterResetChoice choice) {
- SCSUData *scsu=(SCSUData *)cnv->extraInfo;
- if(choice<=UCNV_RESET_TO_UNICODE) {
- /* reset toUnicode */
- uprv_memcpy(scsu->toUDynamicOffsets, initialDynamicOffsets, 32);
- scsu->toUIsSingleByteMode=true;
- scsu->toUState=readCommand;
- scsu->toUQuoteWindow=scsu->toUDynamicWindow=0;
- scsu->toUByteOne=0;
- cnv->toULength=0;
- }
- if(choice!=UCNV_RESET_TO_UNICODE) {
- /* reset fromUnicode */
- uprv_memcpy(scsu->fromUDynamicOffsets, initialDynamicOffsets, 32);
- scsu->fromUIsSingleByteMode=true;
- scsu->fromUDynamicWindow=0;
- scsu->nextWindowUseIndex=0;
- switch(scsu->locale) {
- case l_ja:
- uprv_memcpy(scsu->windowUse, initialWindowUse_ja, 8);
- break;
- default:
- uprv_memcpy(scsu->windowUse, initialWindowUse, 8);
- break;
- }
- cnv->fromUChar32=0;
- }
- }
- static void U_CALLCONV
- _SCSUOpen(UConverter *cnv,
- UConverterLoadArgs *pArgs,
- UErrorCode *pErrorCode) {
- const char *locale=pArgs->locale;
- if(pArgs->onlyTestIsLoadable) {
- return;
- }
- cnv->extraInfo=uprv_malloc(sizeof(SCSUData));
- if(cnv->extraInfo!=nullptr) {
- if(locale!=nullptr && locale[0]=='j' && locale[1]=='a' && (locale[2]==0 || locale[2]=='_')) {
- ((SCSUData *)cnv->extraInfo)->locale=l_ja;
- } else {
- ((SCSUData *)cnv->extraInfo)->locale=lGeneric;
- }
- _SCSUReset(cnv, UCNV_RESET_BOTH);
- } else {
- *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
- }
- /* Set the substitution character U+fffd as a Unicode string. */
- cnv->subUChars[0]=0xfffd;
- cnv->subCharLen=-1;
- }
- static void U_CALLCONV
- _SCSUClose(UConverter *cnv) {
- if(cnv->extraInfo!=nullptr) {
- if(!cnv->isExtraLocal) {
- uprv_free(cnv->extraInfo);
- }
- cnv->extraInfo=nullptr;
- }
- }
- /* SCSU-to-Unicode conversion functions ------------------------------------- */
- static void U_CALLCONV
- _SCSUToUnicodeWithOffsets(UConverterToUnicodeArgs *pArgs,
- UErrorCode *pErrorCode) {
- UConverter *cnv;
- SCSUData *scsu;
- const uint8_t *source, *sourceLimit;
- char16_t *target;
- const char16_t *targetLimit;
- int32_t *offsets;
- UBool isSingleByteMode;
- uint8_t state, byteOne;
- int8_t quoteWindow, dynamicWindow;
- int32_t sourceIndex, nextSourceIndex;
- uint8_t b;
- /* set up the local pointers */
- cnv=pArgs->converter;
- scsu=(SCSUData *)cnv->extraInfo;
- source=(const uint8_t *)pArgs->source;
- sourceLimit=(const uint8_t *)pArgs->sourceLimit;
- target=pArgs->target;
- targetLimit=pArgs->targetLimit;
- offsets=pArgs->offsets;
- /* get the state machine state */
- isSingleByteMode=scsu->toUIsSingleByteMode;
- state=scsu->toUState;
- quoteWindow=scsu->toUQuoteWindow;
- dynamicWindow=scsu->toUDynamicWindow;
- byteOne=scsu->toUByteOne;
- /* sourceIndex=-1 if the current character began in the previous buffer */
- sourceIndex=state==readCommand ? 0 : -1;
- nextSourceIndex=0;
- /*
- * conversion "loop"
- *
- * For performance, this is not a normal C loop.
- * Instead, there are two code blocks for the two SCSU modes.
- * The function branches to either one, and a change of the mode is done with a goto to
- * the other branch.
- *
- * Each branch has two conventional loops:
- * - a fast-path loop for the most common codes in the mode
- * - a loop for all other codes in the mode
- * When the fast-path runs into a code that it cannot handle, its loop ends and it
- * runs into the following loop to handle the other codes.
- * The end of the input or output buffer is also handled by the slower loop.
- * The slow loop jumps (goto) to the fast-path loop again as soon as possible.
- *
- * The callback handling is done by returning with an error code.
- * The conversion framework actually calls the callback function.
- */
- if(isSingleByteMode) {
- /* fast path for single-byte mode */
- if(state==readCommand) {
- fastSingle:
- while(source<sourceLimit && target<targetLimit && (b=*source)>=0x20) {
- ++source;
- ++nextSourceIndex;
- if(b<=0x7f) {
- /* write US-ASCII graphic character or DEL */
- *target++=(char16_t)b;
- if(offsets!=nullptr) {
- *offsets++=sourceIndex;
- }
- } else {
- /* write from dynamic window */
- uint32_t c=scsu->toUDynamicOffsets[dynamicWindow]+(b&0x7f);
- if(c<=0xffff) {
- *target++=(char16_t)c;
- if(offsets!=nullptr) {
- *offsets++=sourceIndex;
- }
- } else {
- /* output surrogate pair */
- *target++=(char16_t)(0xd7c0+(c>>10));
- if(target<targetLimit) {
- *target++=(char16_t)(0xdc00|(c&0x3ff));
- if(offsets!=nullptr) {
- *offsets++=sourceIndex;
- *offsets++=sourceIndex;
- }
- } else {
- /* target overflow */
- if(offsets!=nullptr) {
- *offsets++=sourceIndex;
- }
- cnv->UCharErrorBuffer[0]=(char16_t)(0xdc00|(c&0x3ff));
- cnv->UCharErrorBufferLength=1;
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- goto endloop;
- }
- }
- }
- sourceIndex=nextSourceIndex;
- }
- }
- /* normal state machine for single-byte mode, minus handling for what fastSingle covers */
- singleByteMode:
- while(source<sourceLimit) {
- if(target>=targetLimit) {
- /* target is full */
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- break;
- }
- b=*source++;
- ++nextSourceIndex;
- switch(state) {
- case readCommand:
- /* redundant conditions are commented out */
- /* here: b<0x20 because otherwise we would be in fastSingle */
- if((1UL<<b)&0x2601 /* binary 0010 0110 0000 0001, check for b==0xd || b==0xa || b==9 || b==0 */) {
- /* CR/LF/TAB/NUL */
- *target++=(char16_t)b;
- if(offsets!=nullptr) {
- *offsets++=sourceIndex;
- }
- sourceIndex=nextSourceIndex;
- goto fastSingle;
- } else if(SC0<=b) {
- if(b<=SC7) {
- dynamicWindow=(int8_t)(b-SC0);
- sourceIndex=nextSourceIndex;
- goto fastSingle;
- } else /* if(SD0<=b && b<=SD7) */ {
- dynamicWindow=(int8_t)(b-SD0);
- state=defineOne;
- }
- } else if(/* SQ0<=b && */ b<=SQ7) {
- quoteWindow=(int8_t)(b-SQ0);
- state=quoteOne;
- } else if(b==SDX) {
- state=definePairOne;
- } else if(b==SQU) {
- state=quotePairOne;
- } else if(b==SCU) {
- sourceIndex=nextSourceIndex;
- isSingleByteMode=false;
- goto fastUnicode;
- } else /* Srs */ {
- /* callback(illegal) */
- *pErrorCode=U_ILLEGAL_CHAR_FOUND;
- cnv->toUBytes[0]=b;
- cnv->toULength=1;
- goto endloop;
- }
- /* store the first byte of a multibyte sequence in toUBytes[] */
- cnv->toUBytes[0]=b;
- cnv->toULength=1;
- break;
- case quotePairOne:
- byteOne=b;
- cnv->toUBytes[1]=b;
- cnv->toULength=2;
- state=quotePairTwo;
- break;
- case quotePairTwo:
- *target++=(char16_t)((byteOne<<8)|b);
- if(offsets!=nullptr) {
- *offsets++=sourceIndex;
- }
- sourceIndex=nextSourceIndex;
- state=readCommand;
- goto fastSingle;
- case quoteOne:
- if(b<0x80) {
- /* all static offsets are in the BMP */
- *target++=(char16_t)(staticOffsets[quoteWindow]+b);
- if(offsets!=nullptr) {
- *offsets++=sourceIndex;
- }
- } else {
- /* write from dynamic window */
- uint32_t c=scsu->toUDynamicOffsets[quoteWindow]+(b&0x7f);
- if(c<=0xffff) {
- *target++=(char16_t)c;
- if(offsets!=nullptr) {
- *offsets++=sourceIndex;
- }
- } else {
- /* output surrogate pair */
- *target++=(char16_t)(0xd7c0+(c>>10));
- if(target<targetLimit) {
- *target++=(char16_t)(0xdc00|(c&0x3ff));
- if(offsets!=nullptr) {
- *offsets++=sourceIndex;
- *offsets++=sourceIndex;
- }
- } else {
- /* target overflow */
- if(offsets!=nullptr) {
- *offsets++=sourceIndex;
- }
- cnv->UCharErrorBuffer[0]=(char16_t)(0xdc00|(c&0x3ff));
- cnv->UCharErrorBufferLength=1;
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- goto endloop;
- }
- }
- }
- sourceIndex=nextSourceIndex;
- state=readCommand;
- goto fastSingle;
- case definePairOne:
- dynamicWindow=(int8_t)((b>>5)&7);
- byteOne=(uint8_t)(b&0x1f);
- cnv->toUBytes[1]=b;
- cnv->toULength=2;
- state=definePairTwo;
- break;
- case definePairTwo:
- scsu->toUDynamicOffsets[dynamicWindow]=0x10000+(byteOne<<15UL | b<<7UL);
- sourceIndex=nextSourceIndex;
- state=readCommand;
- goto fastSingle;
- case defineOne:
- if(b==0) {
- /* callback(illegal): Reserved window offset value 0 */
- cnv->toUBytes[1]=b;
- cnv->toULength=2;
- goto endloop;
- } else if(b<gapThreshold) {
- scsu->toUDynamicOffsets[dynamicWindow]=b<<7UL;
- } else if((uint8_t)(b-gapThreshold)<(reservedStart-gapThreshold)) {
- scsu->toUDynamicOffsets[dynamicWindow]=(b<<7UL)+gapOffset;
- } else if(b>=fixedThreshold) {
- scsu->toUDynamicOffsets[dynamicWindow]=fixedOffsets[b-fixedThreshold];
- } else {
- /* callback(illegal): Reserved window offset value 0xa8..0xf8 */
- cnv->toUBytes[1]=b;
- cnv->toULength=2;
- goto endloop;
- }
- sourceIndex=nextSourceIndex;
- state=readCommand;
- goto fastSingle;
- }
- }
- } else {
- /* fast path for Unicode mode */
- if(state==readCommand) {
- fastUnicode:
- while(source+1<sourceLimit && target<targetLimit && (uint8_t)((b=*source)-UC0)>(Urs-UC0)) {
- *target++=(char16_t)((b<<8)|source[1]);
- if(offsets!=nullptr) {
- *offsets++=sourceIndex;
- }
- sourceIndex=nextSourceIndex;
- nextSourceIndex+=2;
- source+=2;
- }
- }
- /* normal state machine for Unicode mode */
- /* unicodeByteMode: */
- while(source<sourceLimit) {
- if(target>=targetLimit) {
- /* target is full */
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- break;
- }
- b=*source++;
- ++nextSourceIndex;
- switch(state) {
- case readCommand:
- if((uint8_t)(b-UC0)>(Urs-UC0)) {
- byteOne=b;
- cnv->toUBytes[0]=b;
- cnv->toULength=1;
- state=quotePairTwo;
- } else if(/* UC0<=b && */ b<=UC7) {
- dynamicWindow=(int8_t)(b-UC0);
- sourceIndex=nextSourceIndex;
- isSingleByteMode=true;
- goto fastSingle;
- } else if(/* UD0<=b && */ b<=UD7) {
- dynamicWindow=(int8_t)(b-UD0);
- isSingleByteMode=true;
- cnv->toUBytes[0]=b;
- cnv->toULength=1;
- state=defineOne;
- goto singleByteMode;
- } else if(b==UDX) {
- isSingleByteMode=true;
- cnv->toUBytes[0]=b;
- cnv->toULength=1;
- state=definePairOne;
- goto singleByteMode;
- } else if(b==UQU) {
- cnv->toUBytes[0]=b;
- cnv->toULength=1;
- state=quotePairOne;
- } else /* Urs */ {
- /* callback(illegal) */
- *pErrorCode=U_ILLEGAL_CHAR_FOUND;
- cnv->toUBytes[0]=b;
- cnv->toULength=1;
- goto endloop;
- }
- break;
- case quotePairOne:
- byteOne=b;
- cnv->toUBytes[1]=b;
- cnv->toULength=2;
- state=quotePairTwo;
- break;
- case quotePairTwo:
- *target++=(char16_t)((byteOne<<8)|b);
- if(offsets!=nullptr) {
- *offsets++=sourceIndex;
- }
- sourceIndex=nextSourceIndex;
- state=readCommand;
- goto fastUnicode;
- }
- }
- }
- endloop:
- /* set the converter state back into UConverter */
- if(U_FAILURE(*pErrorCode) && *pErrorCode!=U_BUFFER_OVERFLOW_ERROR) {
- /* reset to deal with the next character */
- state=readCommand;
- } else if(state==readCommand) {
- /* not in a multi-byte sequence, reset toULength */
- cnv->toULength=0;
- }
- scsu->toUIsSingleByteMode=isSingleByteMode;
- scsu->toUState=state;
- scsu->toUQuoteWindow=quoteWindow;
- scsu->toUDynamicWindow=dynamicWindow;
- scsu->toUByteOne=byteOne;
- /* write back the updated pointers */
- pArgs->source=(const char *)source;
- pArgs->target=target;
- pArgs->offsets=offsets;
- return;
- }
- /*
- * Identical to _SCSUToUnicodeWithOffsets but without offset handling.
- * If a change is made in the original function, then either
- * change this function the same way or
- * re-copy the original function and remove the variables
- * offsets, sourceIndex, and nextSourceIndex.
- */
- static void U_CALLCONV
- _SCSUToUnicode(UConverterToUnicodeArgs *pArgs,
- UErrorCode *pErrorCode) {
- UConverter *cnv;
- SCSUData *scsu;
- const uint8_t *source, *sourceLimit;
- char16_t *target;
- const char16_t *targetLimit;
- UBool isSingleByteMode;
- uint8_t state, byteOne;
- int8_t quoteWindow, dynamicWindow;
- uint8_t b;
- /* set up the local pointers */
- cnv=pArgs->converter;
- scsu=(SCSUData *)cnv->extraInfo;
- source=(const uint8_t *)pArgs->source;
- sourceLimit=(const uint8_t *)pArgs->sourceLimit;
- target=pArgs->target;
- targetLimit=pArgs->targetLimit;
- /* get the state machine state */
- isSingleByteMode=scsu->toUIsSingleByteMode;
- state=scsu->toUState;
- quoteWindow=scsu->toUQuoteWindow;
- dynamicWindow=scsu->toUDynamicWindow;
- byteOne=scsu->toUByteOne;
- /*
- * conversion "loop"
- *
- * For performance, this is not a normal C loop.
- * Instead, there are two code blocks for the two SCSU modes.
- * The function branches to either one, and a change of the mode is done with a goto to
- * the other branch.
- *
- * Each branch has two conventional loops:
- * - a fast-path loop for the most common codes in the mode
- * - a loop for all other codes in the mode
- * When the fast-path runs into a code that it cannot handle, its loop ends and it
- * runs into the following loop to handle the other codes.
- * The end of the input or output buffer is also handled by the slower loop.
- * The slow loop jumps (goto) to the fast-path loop again as soon as possible.
- *
- * The callback handling is done by returning with an error code.
- * The conversion framework actually calls the callback function.
- */
- if(isSingleByteMode) {
- /* fast path for single-byte mode */
- if(state==readCommand) {
- fastSingle:
- while(source<sourceLimit && target<targetLimit && (b=*source)>=0x20) {
- ++source;
- if(b<=0x7f) {
- /* write US-ASCII graphic character or DEL */
- *target++=(char16_t)b;
- } else {
- /* write from dynamic window */
- uint32_t c=scsu->toUDynamicOffsets[dynamicWindow]+(b&0x7f);
- if(c<=0xffff) {
- *target++=(char16_t)c;
- } else {
- /* output surrogate pair */
- *target++=(char16_t)(0xd7c0+(c>>10));
- if(target<targetLimit) {
- *target++=(char16_t)(0xdc00|(c&0x3ff));
- } else {
- /* target overflow */
- cnv->UCharErrorBuffer[0]=(char16_t)(0xdc00|(c&0x3ff));
- cnv->UCharErrorBufferLength=1;
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- goto endloop;
- }
- }
- }
- }
- }
- /* normal state machine for single-byte mode, minus handling for what fastSingle covers */
- singleByteMode:
- while(source<sourceLimit) {
- if(target>=targetLimit) {
- /* target is full */
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- break;
- }
- b=*source++;
- switch(state) {
- case readCommand:
- /* redundant conditions are commented out */
- /* here: b<0x20 because otherwise we would be in fastSingle */
- if((1UL<<b)&0x2601 /* binary 0010 0110 0000 0001, check for b==0xd || b==0xa || b==9 || b==0 */) {
- /* CR/LF/TAB/NUL */
- *target++=(char16_t)b;
- goto fastSingle;
- } else if(SC0<=b) {
- if(b<=SC7) {
- dynamicWindow=(int8_t)(b-SC0);
- goto fastSingle;
- } else /* if(SD0<=b && b<=SD7) */ {
- dynamicWindow=(int8_t)(b-SD0);
- state=defineOne;
- }
- } else if(/* SQ0<=b && */ b<=SQ7) {
- quoteWindow=(int8_t)(b-SQ0);
- state=quoteOne;
- } else if(b==SDX) {
- state=definePairOne;
- } else if(b==SQU) {
- state=quotePairOne;
- } else if(b==SCU) {
- isSingleByteMode=false;
- goto fastUnicode;
- } else /* Srs */ {
- /* callback(illegal) */
- *pErrorCode=U_ILLEGAL_CHAR_FOUND;
- cnv->toUBytes[0]=b;
- cnv->toULength=1;
- goto endloop;
- }
- /* store the first byte of a multibyte sequence in toUBytes[] */
- cnv->toUBytes[0]=b;
- cnv->toULength=1;
- break;
- case quotePairOne:
- byteOne=b;
- cnv->toUBytes[1]=b;
- cnv->toULength=2;
- state=quotePairTwo;
- break;
- case quotePairTwo:
- *target++=(char16_t)((byteOne<<8)|b);
- state=readCommand;
- goto fastSingle;
- case quoteOne:
- if(b<0x80) {
- /* all static offsets are in the BMP */
- *target++=(char16_t)(staticOffsets[quoteWindow]+b);
- } else {
- /* write from dynamic window */
- uint32_t c=scsu->toUDynamicOffsets[quoteWindow]+(b&0x7f);
- if(c<=0xffff) {
- *target++=(char16_t)c;
- } else {
- /* output surrogate pair */
- *target++=(char16_t)(0xd7c0+(c>>10));
- if(target<targetLimit) {
- *target++=(char16_t)(0xdc00|(c&0x3ff));
- } else {
- /* target overflow */
- cnv->UCharErrorBuffer[0]=(char16_t)(0xdc00|(c&0x3ff));
- cnv->UCharErrorBufferLength=1;
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- goto endloop;
- }
- }
- }
- state=readCommand;
- goto fastSingle;
- case definePairOne:
- dynamicWindow=(int8_t)((b>>5)&7);
- byteOne=(uint8_t)(b&0x1f);
- cnv->toUBytes[1]=b;
- cnv->toULength=2;
- state=definePairTwo;
- break;
- case definePairTwo:
- scsu->toUDynamicOffsets[dynamicWindow]=0x10000+(byteOne<<15UL | b<<7UL);
- state=readCommand;
- goto fastSingle;
- case defineOne:
- if(b==0) {
- /* callback(illegal): Reserved window offset value 0 */
- cnv->toUBytes[1]=b;
- cnv->toULength=2;
- goto endloop;
- } else if(b<gapThreshold) {
- scsu->toUDynamicOffsets[dynamicWindow]=b<<7UL;
- } else if((uint8_t)(b-gapThreshold)<(reservedStart-gapThreshold)) {
- scsu->toUDynamicOffsets[dynamicWindow]=(b<<7UL)+gapOffset;
- } else if(b>=fixedThreshold) {
- scsu->toUDynamicOffsets[dynamicWindow]=fixedOffsets[b-fixedThreshold];
- } else {
- /* callback(illegal): Reserved window offset value 0xa8..0xf8 */
- cnv->toUBytes[1]=b;
- cnv->toULength=2;
- goto endloop;
- }
- state=readCommand;
- goto fastSingle;
- }
- }
- } else {
- /* fast path for Unicode mode */
- if(state==readCommand) {
- fastUnicode:
- while(source+1<sourceLimit && target<targetLimit && (uint8_t)((b=*source)-UC0)>(Urs-UC0)) {
- *target++=(char16_t)((b<<8)|source[1]);
- source+=2;
- }
- }
- /* normal state machine for Unicode mode */
- /* unicodeByteMode: */
- while(source<sourceLimit) {
- if(target>=targetLimit) {
- /* target is full */
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- break;
- }
- b=*source++;
- switch(state) {
- case readCommand:
- if((uint8_t)(b-UC0)>(Urs-UC0)) {
- byteOne=b;
- cnv->toUBytes[0]=b;
- cnv->toULength=1;
- state=quotePairTwo;
- } else if(/* UC0<=b && */ b<=UC7) {
- dynamicWindow=(int8_t)(b-UC0);
- isSingleByteMode=true;
- goto fastSingle;
- } else if(/* UD0<=b && */ b<=UD7) {
- dynamicWindow=(int8_t)(b-UD0);
- isSingleByteMode=true;
- cnv->toUBytes[0]=b;
- cnv->toULength=1;
- state=defineOne;
- goto singleByteMode;
- } else if(b==UDX) {
- isSingleByteMode=true;
- cnv->toUBytes[0]=b;
- cnv->toULength=1;
- state=definePairOne;
- goto singleByteMode;
- } else if(b==UQU) {
- cnv->toUBytes[0]=b;
- cnv->toULength=1;
- state=quotePairOne;
- } else /* Urs */ {
- /* callback(illegal) */
- *pErrorCode=U_ILLEGAL_CHAR_FOUND;
- cnv->toUBytes[0]=b;
- cnv->toULength=1;
- goto endloop;
- }
- break;
- case quotePairOne:
- byteOne=b;
- cnv->toUBytes[1]=b;
- cnv->toULength=2;
- state=quotePairTwo;
- break;
- case quotePairTwo:
- *target++=(char16_t)((byteOne<<8)|b);
- state=readCommand;
- goto fastUnicode;
- }
- }
- }
- endloop:
- /* set the converter state back into UConverter */
- if(U_FAILURE(*pErrorCode) && *pErrorCode!=U_BUFFER_OVERFLOW_ERROR) {
- /* reset to deal with the next character */
- state=readCommand;
- } else if(state==readCommand) {
- /* not in a multi-byte sequence, reset toULength */
- cnv->toULength=0;
- }
- scsu->toUIsSingleByteMode=isSingleByteMode;
- scsu->toUState=state;
- scsu->toUQuoteWindow=quoteWindow;
- scsu->toUDynamicWindow=dynamicWindow;
- scsu->toUByteOne=byteOne;
- /* write back the updated pointers */
- pArgs->source=(const char *)source;
- pArgs->target=target;
- return;
- }
- U_CDECL_END
- /* SCSU-from-Unicode conversion functions ----------------------------------- */
- /*
- * This SCSU Encoder is fairly simple but uses all SCSU commands to achieve
- * reasonable results. The lookahead is minimal.
- * Many cases are simple:
- * A character fits directly into the current mode, a dynamic or static window,
- * or is not compressible. These cases are tested first.
- * Real compression heuristics are applied to the rest, in code branches for
- * single/Unicode mode and BMP/supplementary code points.
- * The heuristics used here are extremely simple.
- */
- /* get the number of the window that this character is in, or -1 */
- static int8_t
- getWindow(const uint32_t offsets[8], uint32_t c) {
- int i;
- for(i=0; i<8; ++i) {
- if((uint32_t)(c-offsets[i])<=0x7f) {
- return (int8_t)(i);
- }
- }
- return -1;
- }
- /* is the character in the dynamic window starting at the offset, or in the direct-encoded range? */
- static UBool
- isInOffsetWindowOrDirect(uint32_t offset, uint32_t c) {
- return (UBool)(c<=offset+0x7f &&
- (c>=offset || (c<=0x7f &&
- (c>=0x20 || (1UL<<c)&0x2601))));
- /* binary 0010 0110 0000 0001,
- check for b==0xd || b==0xa || b==9 || b==0 */
- }
- /*
- * getNextDynamicWindow returns the next dynamic window to be redefined
- */
- static int8_t
- getNextDynamicWindow(SCSUData *scsu) {
- int8_t window=scsu->windowUse[scsu->nextWindowUseIndex];
- if(++scsu->nextWindowUseIndex==8) {
- scsu->nextWindowUseIndex=0;
- }
- return window;
- }
- /*
- * useDynamicWindow() adjusts
- * windowUse[] and nextWindowUseIndex for the algorithm to choose
- * the next dynamic window to be defined;
- * a subclass may override it and provide its own algorithm.
- */
- static void
- useDynamicWindow(SCSUData *scsu, int8_t window) {
- /*
- * move the existing window, which just became the most recently used one,
- * up in windowUse[] to nextWindowUseIndex-1
- */
- /* first, find the index of the window - backwards to favor the more recently used windows */
- int i, j;
- i=scsu->nextWindowUseIndex;
- do {
- if(--i<0) {
- i=7;
- }
- } while(scsu->windowUse[i]!=window);
- /* now copy each windowUse[i+1] to [i] */
- j=i+1;
- if(j==8) {
- j=0;
- }
- while(j!=scsu->nextWindowUseIndex) {
- scsu->windowUse[i]=scsu->windowUse[j];
- i=j;
- if(++j==8) { j=0; }
- }
- /* finally, set the window into the most recently used index */
- scsu->windowUse[i]=window;
- }
- /*
- * calculate the offset and the code for a dynamic window that contains the character
- * takes fixed offsets into account
- * the offset of the window is stored in the offset variable,
- * the code is returned
- *
- * return offset code: -1 none <=0xff code for SDn/UDn else code for SDX/UDX, subtract 0x200 to get the true code
- */
- static int
- getDynamicOffset(uint32_t c, uint32_t *pOffset) {
- int i;
- for(i=0; i<7; ++i) {
- if((uint32_t)(c-fixedOffsets[i])<=0x7f) {
- *pOffset=fixedOffsets[i];
- return 0xf9+i;
- }
- }
- if(c<0x80) {
- /* No dynamic window for US-ASCII. */
- return -1;
- } else if(c<0x3400 ||
- (uint32_t)(c-0x10000)<(0x14000-0x10000) ||
- (uint32_t)(c-0x1d000)<=(0x1ffff-0x1d000)
- ) {
- /* This character is in a code range for a "small", i.e., reasonably windowable, script. */
- *pOffset=c&0x7fffff80;
- return (int)(c>>7);
- } else if(0xe000<=c && c!=0xfeff && c<0xfff0) {
- /* For these characters we need to take the gapOffset into account. */
- *pOffset=c&0x7fffff80;
- return (int)((c-gapOffset)>>7);
- } else {
- return -1;
- }
- }
- U_CDECL_BEGIN
- /*
- * Idea for compression:
- * - save SCSUData and other state before really starting work
- * - at endloop, see if compression could be better with just unicode mode
- * - don't do this if a callback has been called
- * - if unicode mode would be smaller, then override the results with it - may need SCU at the beginning
- * - different buffer handling!
- *
- * Drawback or need for corrective handling:
- * it is desirable to encode U+feff as SQU fe ff for the SCSU signature, and
- * it is desirable to start a document in US-ASCII/Latin-1 for as long as possible
- * not only for compression but also for HTML/XML documents with following charset/encoding announcers.
- *
- * How to achieve both?
- * - Only replace the result after an SDX or SCU?
- */
- static void U_CALLCONV
- _SCSUFromUnicodeWithOffsets(UConverterFromUnicodeArgs *pArgs,
- UErrorCode *pErrorCode) {
- UConverter *cnv;
- SCSUData *scsu;
- const char16_t *source, *sourceLimit;
- uint8_t *target;
- int32_t targetCapacity;
- int32_t *offsets;
- UBool isSingleByteMode;
- uint8_t dynamicWindow;
- uint32_t currentOffset;
- uint32_t c, delta;
- int32_t sourceIndex, nextSourceIndex;
- int32_t length;
- /* variables for compression heuristics */
- uint32_t offset;
- char16_t lead, trail;
- int code;
- int8_t window;
- /* set up the local pointers */
- cnv=pArgs->converter;
- scsu=(SCSUData *)cnv->extraInfo;
- /* set up the local pointers */
- source=pArgs->source;
- sourceLimit=pArgs->sourceLimit;
- target=(uint8_t *)pArgs->target;
- targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
- offsets=pArgs->offsets;
- /* get the state machine state */
- isSingleByteMode=scsu->fromUIsSingleByteMode;
- dynamicWindow=scsu->fromUDynamicWindow;
- currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
- c=cnv->fromUChar32;
- /* sourceIndex=-1 if the current character began in the previous buffer */
- sourceIndex= c==0 ? 0 : -1;
- nextSourceIndex=0;
- /* similar conversion "loop" as in toUnicode */
- loop:
- if(isSingleByteMode) {
- if(c!=0 && targetCapacity>0) {
- goto getTrailSingle;
- }
- /* state machine for single-byte mode */
- /* singleByteMode: */
- while(source<sourceLimit) {
- if(targetCapacity<=0) {
- /* target is full */
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- break;
- }
- c=*source++;
- ++nextSourceIndex;
- if((c-0x20)<=0x5f) {
- /* pass US-ASCII graphic character through */
- *target++=(uint8_t)c;
- if(offsets!=nullptr) {
- *offsets++=sourceIndex;
- }
- --targetCapacity;
- } else if(c<0x20) {
- if((1UL<<c)&0x2601 /* binary 0010 0110 0000 0001, check for b==0xd || b==0xa || b==9 || b==0 */) {
- /* CR/LF/TAB/NUL */
- *target++=(uint8_t)c;
- if(offsets!=nullptr) {
- *offsets++=sourceIndex;
- }
- --targetCapacity;
- } else {
- /* quote C0 control character */
- c|=SQ0<<8;
- length=2;
- goto outputBytes;
- }
- } else if((delta=c-currentOffset)<=0x7f) {
- /* use the current dynamic window */
- *target++=(uint8_t)(delta|0x80);
- if(offsets!=nullptr) {
- *offsets++=sourceIndex;
- }
- --targetCapacity;
- } else if(U16_IS_SURROGATE(c)) {
- if(U16_IS_SURROGATE_LEAD(c)) {
- getTrailSingle:
- lead=(char16_t)c;
- if(source<sourceLimit) {
- /* test the following code unit */
- trail=*source;
- if(U16_IS_TRAIL(trail)) {
- ++source;
- ++nextSourceIndex;
- c=U16_GET_SUPPLEMENTARY(c, trail);
- /* convert this surrogate code point */
- /* exit this condition tree */
- } else {
- /* this is an unmatched lead code unit (1st surrogate) */
- /* callback(illegal) */
- *pErrorCode=U_ILLEGAL_CHAR_FOUND;
- goto endloop;
- }
- } else {
- /* no more input */
- break;
- }
- } else {
- /* this is an unmatched trail code unit (2nd surrogate) */
- /* callback(illegal) */
- *pErrorCode=U_ILLEGAL_CHAR_FOUND;
- goto endloop;
- }
- /* compress supplementary character U+10000..U+10ffff */
- if((delta=c-currentOffset)<=0x7f) {
- /* use the current dynamic window */
- *target++=(uint8_t)(delta|0x80);
- if(offsets!=nullptr) {
- *offsets++=sourceIndex;
- }
- --targetCapacity;
- } else if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) {
- /* there is a dynamic window that contains this character, change to it */
- dynamicWindow=window;
- currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
- useDynamicWindow(scsu, dynamicWindow);
- c=((uint32_t)(SC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
- length=2;
- goto outputBytes;
- } else if((code=getDynamicOffset(c, &offset))>=0) {
- /* might check if there are more characters in this window to come */
- /* define an extended window with this character */
- code-=0x200;
- dynamicWindow=getNextDynamicWindow(scsu);
- currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
- useDynamicWindow(scsu, dynamicWindow);
- c=((uint32_t)SDX<<24)|((uint32_t)dynamicWindow<<21)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
- length=4;
- goto outputBytes;
- } else {
- /* change to Unicode mode and output this (lead, trail) pair */
- isSingleByteMode=false;
- *target++=(uint8_t)SCU;
- if(offsets!=nullptr) {
- *offsets++=sourceIndex;
- }
- --targetCapacity;
- c=((uint32_t)lead<<16)|trail;
- length=4;
- goto outputBytes;
- }
- } else if(c<0xa0) {
- /* quote C1 control character */
- c=(c&0x7f)|(SQ0+1)<<8; /* SQ0+1==SQ1 */
- length=2;
- goto outputBytes;
- } else if(c==0xfeff || c>=0xfff0) {
- /* quote signature character=byte order mark and specials */
- c|=SQU<<16;
- length=3;
- goto outputBytes;
- } else {
- /* compress all other BMP characters */
- if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) {
- /* there is a window defined that contains this character - switch to it or quote from it? */
- if(source>=sourceLimit || isInOffsetWindowOrDirect(scsu->fromUDynamicOffsets[window], *source)) {
- /* change to dynamic window */
- dynamicWindow=window;
- currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
- useDynamicWindow(scsu, dynamicWindow);
- c=((uint32_t)(SC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
- length=2;
- goto outputBytes;
- } else {
- /* quote from dynamic window */
- c=((uint32_t)(SQ0+window)<<8)|(c-scsu->fromUDynamicOffsets[window])|0x80;
- length=2;
- goto outputBytes;
- }
- } else if((window=getWindow(staticOffsets, c))>=0) {
- /* quote from static window */
- c=((uint32_t)(SQ0+window)<<8)|(c-staticOffsets[window]);
- length=2;
- goto outputBytes;
- } else if((code=getDynamicOffset(c, &offset))>=0) {
- /* define a dynamic window with this character */
- dynamicWindow=getNextDynamicWindow(scsu);
- currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
- useDynamicWindow(scsu, dynamicWindow);
- c=((uint32_t)(SD0+dynamicWindow)<<16)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
- length=3;
- goto outputBytes;
- } else if((uint32_t)(c-0x3400)<(0xd800-0x3400) &&
- (source>=sourceLimit || (uint32_t)(*source-0x3400)<(0xd800-0x3400))
- ) {
- /*
- * this character is not compressible (a BMP ideograph or similar);
- * switch to Unicode mode if this is the last character in the block
- * or there is at least one more ideograph following immediately
- */
- isSingleByteMode=false;
- c|=SCU<<16;
- length=3;
- goto outputBytes;
- } else {
- /* quote Unicode */
- c|=SQU<<16;
- length=3;
- goto outputBytes;
- }
- }
- /* normal end of conversion: prepare for a new character */
- c=0;
- sourceIndex=nextSourceIndex;
- }
- } else {
- if(c!=0 && targetCapacity>0) {
- goto getTrailUnicode;
- }
- /* state machine for Unicode mode */
- /* unicodeByteMode: */
- while(source<sourceLimit) {
- if(targetCapacity<=0) {
- /* target is full */
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- break;
- }
- c=*source++;
- ++nextSourceIndex;
- if((uint32_t)(c-0x3400)<(0xd800-0x3400)) {
- /* not compressible, write character directly */
- if(targetCapacity>=2) {
- *target++=(uint8_t)(c>>8);
- *target++=(uint8_t)c;
- if(offsets!=nullptr) {
- *offsets++=sourceIndex;
- *offsets++=sourceIndex;
- }
- targetCapacity-=2;
- } else {
- length=2;
- goto outputBytes;
- }
- } else if((uint32_t)(c-0x3400)>=(0xf300-0x3400) /* c<0x3400 || c>=0xf300 */) {
- /* compress BMP character if the following one is not an uncompressible ideograph */
- if(!(source<sourceLimit && (uint32_t)(*source-0x3400)<(0xd800-0x3400))) {
- if(((uint32_t)(c-0x30)<10 || (uint32_t)(c-0x61)<26 || (uint32_t)(c-0x41)<26)) {
- /* ASCII digit or letter */
- isSingleByteMode=true;
- c|=((uint32_t)(UC0+dynamicWindow)<<8)|c;
- length=2;
- goto outputBytes;
- } else if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) {
- /* there is a dynamic window that contains this character, change to it */
- isSingleByteMode=true;
- dynamicWindow=window;
- currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
- useDynamicWindow(scsu, dynamicWindow);
- c=((uint32_t)(UC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
- length=2;
- goto outputBytes;
- } else if((code=getDynamicOffset(c, &offset))>=0) {
- /* define a dynamic window with this character */
- isSingleByteMode=true;
- dynamicWindow=getNextDynamicWindow(scsu);
- currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
- useDynamicWindow(scsu, dynamicWindow);
- c=((uint32_t)(UD0+dynamicWindow)<<16)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
- length=3;
- goto outputBytes;
- }
- }
- /* don't know how to compress this character, just write it directly */
- length=2;
- goto outputBytes;
- } else if(c<0xe000) {
- /* c is a surrogate */
- if(U16_IS_SURROGATE_LEAD(c)) {
- getTrailUnicode:
- lead=(char16_t)c;
- if(source<sourceLimit) {
- /* test the following code unit */
- trail=*source;
- if(U16_IS_TRAIL(trail)) {
- ++source;
- ++nextSourceIndex;
- c=U16_GET_SUPPLEMENTARY(c, trail);
- /* convert this surrogate code point */
- /* exit this condition tree */
- } else {
- /* this is an unmatched lead code unit (1st surrogate) */
- /* callback(illegal) */
- *pErrorCode=U_ILLEGAL_CHAR_FOUND;
- goto endloop;
- }
- } else {
- /* no more input */
- break;
- }
- } else {
- /* this is an unmatched trail code unit (2nd surrogate) */
- /* callback(illegal) */
- *pErrorCode=U_ILLEGAL_CHAR_FOUND;
- goto endloop;
- }
- /* compress supplementary character */
- if( (window=getWindow(scsu->fromUDynamicOffsets, c))>=0 &&
- !(source<sourceLimit && (uint32_t)(*source-0x3400)<(0xd800-0x3400))
- ) {
- /*
- * there is a dynamic window that contains this character and
- * the following character is not uncompressible,
- * change to the window
- */
- isSingleByteMode=true;
- dynamicWindow=window;
- currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
- useDynamicWindow(scsu, dynamicWindow);
- c=((uint32_t)(UC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
- length=2;
- goto outputBytes;
- } else if(source<sourceLimit && lead==*source && /* too lazy to check trail in same window as source[1] */
- (code=getDynamicOffset(c, &offset))>=0
- ) {
- /* two supplementary characters in (probably) the same window - define an extended one */
- isSingleByteMode=true;
- code-=0x200;
- dynamicWindow=getNextDynamicWindow(scsu);
- currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
- useDynamicWindow(scsu, dynamicWindow);
- c=((uint32_t)UDX<<24)|((uint32_t)dynamicWindow<<21)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
- length=4;
- goto outputBytes;
- } else {
- /* don't know how to compress this character, just write it directly */
- c=((uint32_t)lead<<16)|trail;
- length=4;
- goto outputBytes;
- }
- } else /* 0xe000<=c<0xf300 */ {
- /* quote to avoid SCSU tags */
- c|=UQU<<16;
- length=3;
- goto outputBytes;
- }
- /* normal end of conversion: prepare for a new character */
- c=0;
- sourceIndex=nextSourceIndex;
- }
- }
- endloop:
- /* set the converter state back into UConverter */
- scsu->fromUIsSingleByteMode=isSingleByteMode;
- scsu->fromUDynamicWindow=dynamicWindow;
- cnv->fromUChar32=c;
- /* write back the updated pointers */
- pArgs->source=source;
- pArgs->target=(char *)target;
- pArgs->offsets=offsets;
- return;
- outputBytes:
- /* write the output character bytes from c and length [code copied from ucnvmbcs.c] */
- /* from the first if in the loop we know that targetCapacity>0 */
- if(length<=targetCapacity) {
- if(offsets==nullptr) {
- switch(length) {
- /* each branch falls through to the next one */
- case 4:
- *target++=(uint8_t)(c>>24);
- U_FALLTHROUGH;
- case 3:
- *target++=(uint8_t)(c>>16);
- U_FALLTHROUGH;
- case 2:
- *target++=(uint8_t)(c>>8);
- U_FALLTHROUGH;
- case 1:
- *target++=(uint8_t)c;
- U_FALLTHROUGH;
- default:
- /* will never occur */
- break;
- }
- } else {
- switch(length) {
- /* each branch falls through to the next one */
- case 4:
- *target++=(uint8_t)(c>>24);
- *offsets++=sourceIndex;
- U_FALLTHROUGH;
- case 3:
- *target++=(uint8_t)(c>>16);
- *offsets++=sourceIndex;
- U_FALLTHROUGH;
- case 2:
- *target++=(uint8_t)(c>>8);
- *offsets++=sourceIndex;
- U_FALLTHROUGH;
- case 1:
- *target++=(uint8_t)c;
- *offsets++=sourceIndex;
- U_FALLTHROUGH;
- default:
- /* will never occur */
- break;
- }
- }
- targetCapacity-=length;
- /* normal end of conversion: prepare for a new character */
- c=0;
- sourceIndex=nextSourceIndex;
- goto loop;
- } else {
- uint8_t *p;
- /*
- * We actually do this backwards here:
- * In order to save an intermediate variable, we output
- * first to the overflow buffer what does not fit into the
- * regular target.
- */
- /* we know that 0<=targetCapacity<length<=4 */
- /* targetCapacity==0 when SCU+supplementary where SCU used up targetCapacity==1 */
- length-=targetCapacity;
- p=(uint8_t *)cnv->charErrorBuffer;
- switch(length) {
- /* each branch falls through to the next one */
- case 4:
- *p++=(uint8_t)(c>>24);
- U_FALLTHROUGH;
- case 3:
- *p++=(uint8_t)(c>>16);
- U_FALLTHROUGH;
- case 2:
- *p++=(uint8_t)(c>>8);
- U_FALLTHROUGH;
- case 1:
- *p=(uint8_t)c;
- U_FALLTHROUGH;
- default:
- /* will never occur */
- break;
- }
- cnv->charErrorBufferLength=(int8_t)length;
- /* now output what fits into the regular target */
- c>>=8*length; /* length was reduced by targetCapacity */
- switch(targetCapacity) {
- /* each branch falls through to the next one */
- case 3:
- *target++=(uint8_t)(c>>16);
- if(offsets!=nullptr) {
- *offsets++=sourceIndex;
- }
- U_FALLTHROUGH;
- case 2:
- *target++=(uint8_t)(c>>8);
- if(offsets!=nullptr) {
- *offsets++=sourceIndex;
- }
- U_FALLTHROUGH;
- case 1:
- *target++=(uint8_t)c;
- if(offsets!=nullptr) {
- *offsets++=sourceIndex;
- }
- U_FALLTHROUGH;
- default:
- break;
- }
- /* target overflow */
- targetCapacity=0;
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- c=0;
- goto endloop;
- }
- }
- /*
- * Identical to _SCSUFromUnicodeWithOffsets but without offset handling.
- * If a change is made in the original function, then either
- * change this function the same way or
- * re-copy the original function and remove the variables
- * offsets, sourceIndex, and nextSourceIndex.
- */
- static void U_CALLCONV
- _SCSUFromUnicode(UConverterFromUnicodeArgs *pArgs,
- UErrorCode *pErrorCode) {
- UConverter *cnv;
- SCSUData *scsu;
- const char16_t *source, *sourceLimit;
- uint8_t *target;
- int32_t targetCapacity;
- UBool isSingleByteMode;
- uint8_t dynamicWindow;
- uint32_t currentOffset;
- uint32_t c, delta;
- int32_t length;
- /* variables for compression heuristics */
- uint32_t offset;
- char16_t lead, trail;
- int code;
- int8_t window;
- /* set up the local pointers */
- cnv=pArgs->converter;
- scsu=(SCSUData *)cnv->extraInfo;
- /* set up the local pointers */
- source=pArgs->source;
- sourceLimit=pArgs->sourceLimit;
- target=(uint8_t *)pArgs->target;
- targetCapacity=(int32_t)(pArgs->targetLimit-pArgs->target);
- /* get the state machine state */
- isSingleByteMode=scsu->fromUIsSingleByteMode;
- dynamicWindow=scsu->fromUDynamicWindow;
- currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
- c=cnv->fromUChar32;
- /* similar conversion "loop" as in toUnicode */
- loop:
- if(isSingleByteMode) {
- if(c!=0 && targetCapacity>0) {
- goto getTrailSingle;
- }
- /* state machine for single-byte mode */
- /* singleByteMode: */
- while(source<sourceLimit) {
- if(targetCapacity<=0) {
- /* target is full */
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- break;
- }
- c=*source++;
- if((c-0x20)<=0x5f) {
- /* pass US-ASCII graphic character through */
- *target++=(uint8_t)c;
- --targetCapacity;
- } else if(c<0x20) {
- if((1UL<<c)&0x2601 /* binary 0010 0110 0000 0001, check for b==0xd || b==0xa || b==9 || b==0 */) {
- /* CR/LF/TAB/NUL */
- *target++=(uint8_t)c;
- --targetCapacity;
- } else {
- /* quote C0 control character */
- c|=SQ0<<8;
- length=2;
- goto outputBytes;
- }
- } else if((delta=c-currentOffset)<=0x7f) {
- /* use the current dynamic window */
- *target++=(uint8_t)(delta|0x80);
- --targetCapacity;
- } else if(U16_IS_SURROGATE(c)) {
- if(U16_IS_SURROGATE_LEAD(c)) {
- getTrailSingle:
- lead=(char16_t)c;
- if(source<sourceLimit) {
- /* test the following code unit */
- trail=*source;
- if(U16_IS_TRAIL(trail)) {
- ++source;
- c=U16_GET_SUPPLEMENTARY(c, trail);
- /* convert this surrogate code point */
- /* exit this condition tree */
- } else {
- /* this is an unmatched lead code unit (1st surrogate) */
- /* callback(illegal) */
- *pErrorCode=U_ILLEGAL_CHAR_FOUND;
- goto endloop;
- }
- } else {
- /* no more input */
- break;
- }
- } else {
- /* this is an unmatched trail code unit (2nd surrogate) */
- /* callback(illegal) */
- *pErrorCode=U_ILLEGAL_CHAR_FOUND;
- goto endloop;
- }
- /* compress supplementary character U+10000..U+10ffff */
- if((delta=c-currentOffset)<=0x7f) {
- /* use the current dynamic window */
- *target++=(uint8_t)(delta|0x80);
- --targetCapacity;
- } else if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) {
- /* there is a dynamic window that contains this character, change to it */
- dynamicWindow=window;
- currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
- useDynamicWindow(scsu, dynamicWindow);
- c=((uint32_t)(SC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
- length=2;
- goto outputBytes;
- } else if((code=getDynamicOffset(c, &offset))>=0) {
- /* might check if there are more characters in this window to come */
- /* define an extended window with this character */
- code-=0x200;
- dynamicWindow=getNextDynamicWindow(scsu);
- currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
- useDynamicWindow(scsu, dynamicWindow);
- c=((uint32_t)SDX<<24)|((uint32_t)dynamicWindow<<21)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
- length=4;
- goto outputBytes;
- } else {
- /* change to Unicode mode and output this (lead, trail) pair */
- isSingleByteMode=false;
- *target++=(uint8_t)SCU;
- --targetCapacity;
- c=((uint32_t)lead<<16)|trail;
- length=4;
- goto outputBytes;
- }
- } else if(c<0xa0) {
- /* quote C1 control character */
- c=(c&0x7f)|(SQ0+1)<<8; /* SQ0+1==SQ1 */
- length=2;
- goto outputBytes;
- } else if(c==0xfeff || c>=0xfff0) {
- /* quote signature character=byte order mark and specials */
- c|=SQU<<16;
- length=3;
- goto outputBytes;
- } else {
- /* compress all other BMP characters */
- if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) {
- /* there is a window defined that contains this character - switch to it or quote from it? */
- if(source>=sourceLimit || isInOffsetWindowOrDirect(scsu->fromUDynamicOffsets[window], *source)) {
- /* change to dynamic window */
- dynamicWindow=window;
- currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
- useDynamicWindow(scsu, dynamicWindow);
- c=((uint32_t)(SC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
- length=2;
- goto outputBytes;
- } else {
- /* quote from dynamic window */
- c=((uint32_t)(SQ0+window)<<8)|(c-scsu->fromUDynamicOffsets[window])|0x80;
- length=2;
- goto outputBytes;
- }
- } else if((window=getWindow(staticOffsets, c))>=0) {
- /* quote from static window */
- c=((uint32_t)(SQ0+window)<<8)|(c-staticOffsets[window]);
- length=2;
- goto outputBytes;
- } else if((code=getDynamicOffset(c, &offset))>=0) {
- /* define a dynamic window with this character */
- dynamicWindow=getNextDynamicWindow(scsu);
- currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
- useDynamicWindow(scsu, dynamicWindow);
- c=((uint32_t)(SD0+dynamicWindow)<<16)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
- length=3;
- goto outputBytes;
- } else if((uint32_t)(c-0x3400)<(0xd800-0x3400) &&
- (source>=sourceLimit || (uint32_t)(*source-0x3400)<(0xd800-0x3400))
- ) {
- /*
- * this character is not compressible (a BMP ideograph or similar);
- * switch to Unicode mode if this is the last character in the block
- * or there is at least one more ideograph following immediately
- */
- isSingleByteMode=false;
- c|=SCU<<16;
- length=3;
- goto outputBytes;
- } else {
- /* quote Unicode */
- c|=SQU<<16;
- length=3;
- goto outputBytes;
- }
- }
- /* normal end of conversion: prepare for a new character */
- c=0;
- }
- } else {
- if(c!=0 && targetCapacity>0) {
- goto getTrailUnicode;
- }
- /* state machine for Unicode mode */
- /* unicodeByteMode: */
- while(source<sourceLimit) {
- if(targetCapacity<=0) {
- /* target is full */
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- break;
- }
- c=*source++;
- if((uint32_t)(c-0x3400)<(0xd800-0x3400)) {
- /* not compressible, write character directly */
- if(targetCapacity>=2) {
- *target++=(uint8_t)(c>>8);
- *target++=(uint8_t)c;
- targetCapacity-=2;
- } else {
- length=2;
- goto outputBytes;
- }
- } else if((uint32_t)(c-0x3400)>=(0xf300-0x3400) /* c<0x3400 || c>=0xf300 */) {
- /* compress BMP character if the following one is not an uncompressible ideograph */
- if(!(source<sourceLimit && (uint32_t)(*source-0x3400)<(0xd800-0x3400))) {
- if(((uint32_t)(c-0x30)<10 || (uint32_t)(c-0x61)<26 || (uint32_t)(c-0x41)<26)) {
- /* ASCII digit or letter */
- isSingleByteMode=true;
- c|=((uint32_t)(UC0+dynamicWindow)<<8)|c;
- length=2;
- goto outputBytes;
- } else if((window=getWindow(scsu->fromUDynamicOffsets, c))>=0) {
- /* there is a dynamic window that contains this character, change to it */
- isSingleByteMode=true;
- dynamicWindow=window;
- currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
- useDynamicWindow(scsu, dynamicWindow);
- c=((uint32_t)(UC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
- length=2;
- goto outputBytes;
- } else if((code=getDynamicOffset(c, &offset))>=0) {
- /* define a dynamic window with this character */
- isSingleByteMode=true;
- dynamicWindow=getNextDynamicWindow(scsu);
- currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
- useDynamicWindow(scsu, dynamicWindow);
- c=((uint32_t)(UD0+dynamicWindow)<<16)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
- length=3;
- goto outputBytes;
- }
- }
- /* don't know how to compress this character, just write it directly */
- length=2;
- goto outputBytes;
- } else if(c<0xe000) {
- /* c is a surrogate */
- if(U16_IS_SURROGATE_LEAD(c)) {
- getTrailUnicode:
- lead=(char16_t)c;
- if(source<sourceLimit) {
- /* test the following code unit */
- trail=*source;
- if(U16_IS_TRAIL(trail)) {
- ++source;
- c=U16_GET_SUPPLEMENTARY(c, trail);
- /* convert this surrogate code point */
- /* exit this condition tree */
- } else {
- /* this is an unmatched lead code unit (1st surrogate) */
- /* callback(illegal) */
- *pErrorCode=U_ILLEGAL_CHAR_FOUND;
- goto endloop;
- }
- } else {
- /* no more input */
- break;
- }
- } else {
- /* this is an unmatched trail code unit (2nd surrogate) */
- /* callback(illegal) */
- *pErrorCode=U_ILLEGAL_CHAR_FOUND;
- goto endloop;
- }
- /* compress supplementary character */
- if( (window=getWindow(scsu->fromUDynamicOffsets, c))>=0 &&
- !(source<sourceLimit && (uint32_t)(*source-0x3400)<(0xd800-0x3400))
- ) {
- /*
- * there is a dynamic window that contains this character and
- * the following character is not uncompressible,
- * change to the window
- */
- isSingleByteMode=true;
- dynamicWindow=window;
- currentOffset=scsu->fromUDynamicOffsets[dynamicWindow];
- useDynamicWindow(scsu, dynamicWindow);
- c=((uint32_t)(UC0+dynamicWindow)<<8)|(c-currentOffset)|0x80;
- length=2;
- goto outputBytes;
- } else if(source<sourceLimit && lead==*source && /* too lazy to check trail in same window as source[1] */
- (code=getDynamicOffset(c, &offset))>=0
- ) {
- /* two supplementary characters in (probably) the same window - define an extended one */
- isSingleByteMode=true;
- code-=0x200;
- dynamicWindow=getNextDynamicWindow(scsu);
- currentOffset=scsu->fromUDynamicOffsets[dynamicWindow]=offset;
- useDynamicWindow(scsu, dynamicWindow);
- c=((uint32_t)UDX<<24)|((uint32_t)dynamicWindow<<21)|((uint32_t)code<<8)|(c-currentOffset)|0x80;
- length=4;
- goto outputBytes;
- } else {
- /* don't know how to compress this character, just write it directly */
- c=((uint32_t)lead<<16)|trail;
- length=4;
- goto outputBytes;
- }
- } else /* 0xe000<=c<0xf300 */ {
- /* quote to avoid SCSU tags */
- c|=UQU<<16;
- length=3;
- goto outputBytes;
- }
- /* normal end of conversion: prepare for a new character */
- c=0;
- }
- }
- endloop:
- /* set the converter state back into UConverter */
- scsu->fromUIsSingleByteMode=isSingleByteMode;
- scsu->fromUDynamicWindow=dynamicWindow;
- cnv->fromUChar32=c;
- /* write back the updated pointers */
- pArgs->source=source;
- pArgs->target=(char *)target;
- return;
- outputBytes:
- /* write the output character bytes from c and length [code copied from ucnvmbcs.c] */
- /* from the first if in the loop we know that targetCapacity>0 */
- if(length<=targetCapacity) {
- switch(length) {
- /* each branch falls through to the next one */
- case 4:
- *target++=(uint8_t)(c>>24);
- U_FALLTHROUGH;
- case 3:
- *target++=(uint8_t)(c>>16);
- U_FALLTHROUGH;
- case 2:
- *target++=(uint8_t)(c>>8);
- U_FALLTHROUGH;
- case 1:
- *target++=(uint8_t)c;
- U_FALLTHROUGH;
- default:
- /* will never occur */
- break;
- }
- targetCapacity-=length;
- /* normal end of conversion: prepare for a new character */
- c=0;
- goto loop;
- } else {
- uint8_t *p;
- /*
- * We actually do this backwards here:
- * In order to save an intermediate variable, we output
- * first to the overflow buffer what does not fit into the
- * regular target.
- */
- /* we know that 0<=targetCapacity<length<=4 */
- /* targetCapacity==0 when SCU+supplementary where SCU used up targetCapacity==1 */
- length-=targetCapacity;
- p=(uint8_t *)cnv->charErrorBuffer;
- switch(length) {
- /* each branch falls through to the next one */
- case 4:
- *p++=(uint8_t)(c>>24);
- U_FALLTHROUGH;
- case 3:
- *p++=(uint8_t)(c>>16);
- U_FALLTHROUGH;
- case 2:
- *p++=(uint8_t)(c>>8);
- U_FALLTHROUGH;
- case 1:
- *p=(uint8_t)c;
- U_FALLTHROUGH;
- default:
- /* will never occur */
- break;
- }
- cnv->charErrorBufferLength=(int8_t)length;
- /* now output what fits into the regular target */
- c = (length == 4) ? 0 : c >> 8*length; /* length was reduced by targetCapacity */
- switch(targetCapacity) {
- /* each branch falls through to the next one */
- case 3:
- *target++=(uint8_t)(c>>16);
- U_FALLTHROUGH;
- case 2:
- *target++=(uint8_t)(c>>8);
- U_FALLTHROUGH;
- case 1:
- *target++=(uint8_t)c;
- U_FALLTHROUGH;
- default:
- break;
- }
- /* target overflow */
- targetCapacity=0;
- *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
- c=0;
- goto endloop;
- }
- }
- /* miscellaneous ------------------------------------------------------------ */
- static const char * U_CALLCONV
- _SCSUGetName(const UConverter *cnv) {
- SCSUData *scsu=(SCSUData *)cnv->extraInfo;
- switch(scsu->locale) {
- case l_ja:
- return "SCSU,locale=ja";
- default:
- return "SCSU";
- }
- }
- /* structure for SafeClone calculations */
- struct cloneSCSUStruct
- {
- UConverter cnv;
- SCSUData mydata;
- };
- static UConverter * U_CALLCONV
- _SCSUSafeClone(const UConverter *cnv,
- void *stackBuffer,
- int32_t *pBufferSize,
- UErrorCode *status)
- {
- struct cloneSCSUStruct * localClone;
- int32_t bufferSizeNeeded = sizeof(struct cloneSCSUStruct);
- if (U_FAILURE(*status)){
- return 0;
- }
- if (*pBufferSize == 0){ /* 'preflighting' request - set needed size into *pBufferSize */
- *pBufferSize = bufferSizeNeeded;
- return 0;
- }
- localClone = (struct cloneSCSUStruct *)stackBuffer;
- /* ucnv.c/ucnv_safeClone() copied the main UConverter already */
- uprv_memcpy(&localClone->mydata, cnv->extraInfo, sizeof(SCSUData));
- localClone->cnv.extraInfo = &localClone->mydata;
- localClone->cnv.isExtraLocal = true;
- return &localClone->cnv;
- }
- U_CDECL_END
- static const UConverterImpl _SCSUImpl={
- UCNV_SCSU,
- nullptr,
- nullptr,
- _SCSUOpen,
- _SCSUClose,
- _SCSUReset,
- _SCSUToUnicode,
- _SCSUToUnicodeWithOffsets,
- _SCSUFromUnicode,
- _SCSUFromUnicodeWithOffsets,
- nullptr,
- nullptr,
- _SCSUGetName,
- nullptr,
- _SCSUSafeClone,
- ucnv_getCompleteUnicodeSet,
- nullptr,
- nullptr
- };
- static const UConverterStaticData _SCSUStaticData={
- sizeof(UConverterStaticData),
- "SCSU",
- 1212, /* CCSID for SCSU */
- UCNV_IBM, UCNV_SCSU,
- 1, 3, /* one char16_t generates at least 1 byte and at most 3 bytes */
- /*
- * The subchar here is ignored because _SCSUOpen() sets U+fffd as a Unicode
- * substitution string.
- */
- { 0x0e, 0xff, 0xfd, 0 }, 3,
- false, false,
- 0,
- 0,
- { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 } /* reserved */
- };
- const UConverterSharedData _SCSUData=
- UCNV_IMMUTABLE_SHARED_DATA_INITIALIZER(&_SCSUStaticData, &_SCSUImpl);
- #endif
|