1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462 |
- // © 2016 and later: Unicode, Inc. and others.
- // License & terms of use: http://www.unicode.org/copyright.html
- /*
- *******************************************************************************
- *
- * Copyright (C) 1998-2016, International Business Machines
- * Corporation and others. All Rights Reserved.
- *
- *******************************************************************************
- *
- * File uscnnf_p.c
- *
- * Modification History:
- *
- * Date Name Description
- * 12/02/98 stephen Creation.
- * 03/13/99 stephen Modified for new C API.
- *******************************************************************************
- */
- #include "unicode/utypes.h"
- #if !UCONFIG_NO_FORMATTING && !UCONFIG_NO_CONVERSION
- #include "unicode/uchar.h"
- #include "unicode/ustring.h"
- #include "unicode/unum.h"
- #include "unicode/udat.h"
- #include "unicode/uset.h"
- #include "uscanf.h"
- #include "ufmt_cmn.h"
- #include "ufile.h"
- #include "locbund.h"
- #include "cmemory.h"
- #include "ustr_cnv.h"
- /* flag characters for u_scanf */
- #define FLAG_ASTERISK 0x002A
- #define FLAG_PAREN 0x0028
- #define ISFLAG(s) (s) == FLAG_ASTERISK || \
- (s) == FLAG_PAREN
- /* special characters for u_scanf */
- #define SPEC_DOLLARSIGN 0x0024
- /* unicode digits */
- #define DIGIT_ZERO 0x0030
- #define DIGIT_ONE 0x0031
- #define DIGIT_TWO 0x0032
- #define DIGIT_THREE 0x0033
- #define DIGIT_FOUR 0x0034
- #define DIGIT_FIVE 0x0035
- #define DIGIT_SIX 0x0036
- #define DIGIT_SEVEN 0x0037
- #define DIGIT_EIGHT 0x0038
- #define DIGIT_NINE 0x0039
- #define ISDIGIT(s) (s) == DIGIT_ZERO || \
- (s) == DIGIT_ONE || \
- (s) == DIGIT_TWO || \
- (s) == DIGIT_THREE || \
- (s) == DIGIT_FOUR || \
- (s) == DIGIT_FIVE || \
- (s) == DIGIT_SIX || \
- (s) == DIGIT_SEVEN || \
- (s) == DIGIT_EIGHT || \
- (s) == DIGIT_NINE
- /* u_scanf modifiers */
- #define MOD_H 0x0068
- #define MOD_LOWERL 0x006C
- #define MOD_L 0x004C
- #define ISMOD(s) (s) == MOD_H || \
- (s) == MOD_LOWERL || \
- (s) == MOD_L
- /**
- * Struct encapsulating a single uscanf format specification.
- */
- typedef struct u_scanf_spec_info {
- int32_t fWidth; /* Width */
- char16_t fSpec; /* Format specification */
- char16_t fPadChar; /* Padding character */
- UBool fSkipArg; /* true if arg should be skipped */
- UBool fIsLongDouble; /* L flag */
- UBool fIsShort; /* h flag */
- UBool fIsLong; /* l flag */
- UBool fIsLongLong; /* ll flag */
- UBool fIsString; /* true if this is a NUL-terminated string. */
- } u_scanf_spec_info;
- /**
- * Struct encapsulating a single u_scanf format specification.
- */
- typedef struct u_scanf_spec {
- u_scanf_spec_info fInfo; /* Information on this spec */
- int32_t fArgPos; /* Position of data in arg list */
- } u_scanf_spec;
- /**
- * Parse a single u_scanf format specifier in Unicode.
- * @param fmt A pointer to a '%' character in a u_scanf format specification.
- * @param spec A pointer to a <TT>u_scanf_spec</TT> to receive the parsed
- * format specifier.
- * @return The number of characters contained in this specifier.
- */
- static int32_t
- u_scanf_parse_spec (const char16_t *fmt,
- u_scanf_spec *spec)
- {
- const char16_t *s = fmt;
- const char16_t *backup;
- u_scanf_spec_info *info = &(spec->fInfo);
- /* initialize spec to default values */
- spec->fArgPos = -1;
- info->fWidth = -1;
- info->fSpec = 0x0000;
- info->fPadChar = 0x0020;
- info->fSkipArg = false;
- info->fIsLongDouble = false;
- info->fIsShort = false;
- info->fIsLong = false;
- info->fIsLongLong = false;
- info->fIsString = true;
- /* skip over the initial '%' */
- s++;
- /* Check for positional argument */
- if(ISDIGIT(*s)) {
- /* Save the current position */
- backup = s;
- /* handle positional parameters */
- if(ISDIGIT(*s)) {
- spec->fArgPos = *s++ - DIGIT_ZERO;
- while(ISDIGIT(*s)) {
- spec->fArgPos *= 10;
- spec->fArgPos += *s++ - DIGIT_ZERO;
- }
- }
- /* if there is no '$', don't read anything */
- if(*s != SPEC_DOLLARSIGN) {
- spec->fArgPos = -1;
- s = backup;
- }
- /* munge the '$' */
- else
- s++;
- }
- /* Get any format flags */
- while(ISFLAG(*s)) {
- switch(*s++) {
- /* skip argument */
- case FLAG_ASTERISK:
- info->fSkipArg = true;
- break;
- /* pad character specified */
- case FLAG_PAREN:
- /* first four characters are hex values for pad char */
- info->fPadChar = static_cast<char16_t>(ufmt_digitvalue(*s++));
- info->fPadChar = static_cast<char16_t>((info->fPadChar * 16) + ufmt_digitvalue(*s++));
- info->fPadChar = static_cast<char16_t>((info->fPadChar * 16) + ufmt_digitvalue(*s++));
- info->fPadChar = static_cast<char16_t>((info->fPadChar * 16) + ufmt_digitvalue(*s++));
- /* final character is ignored */
- s++;
- break;
- }
- }
- /* Get the width */
- if(ISDIGIT(*s)){
- info->fWidth = *s++ - DIGIT_ZERO;
- while(ISDIGIT(*s)) {
- info->fWidth *= 10;
- info->fWidth += *s++ - DIGIT_ZERO;
- }
- }
- /* Get any modifiers */
- if(ISMOD(*s)) {
- switch(*s++) {
- /* short */
- case MOD_H:
- info->fIsShort = true;
- break;
- /* long or long long */
- case MOD_LOWERL:
- if(*s == MOD_LOWERL) {
- info->fIsLongLong = true;
- /* skip over the next 'l' */
- s++;
- }
- else
- info->fIsLong = true;
- break;
- /* long double */
- case MOD_L:
- info->fIsLongDouble = true;
- break;
- }
- }
- /* finally, get the specifier letter */
- info->fSpec = *s++;
- /* return # of characters in this specifier */
- return static_cast<int32_t>(s - fmt);
- }
- #define UP_PERCENT 0x0025
- /* ANSI style formatting */
- /* Use US-ASCII characters only for formatting */
- /* % */
- #define UFMT_SIMPLE_PERCENT {ufmt_simple_percent, u_scanf_simple_percent_handler}
- /* s */
- #define UFMT_STRING {ufmt_string, u_scanf_string_handler}
- /* c */
- #define UFMT_CHAR {ufmt_string, u_scanf_char_handler}
- /* d, i */
- #define UFMT_INT {ufmt_int, u_scanf_integer_handler}
- /* u */
- #define UFMT_UINT {ufmt_int, u_scanf_uinteger_handler}
- /* o */
- #define UFMT_OCTAL {ufmt_int, u_scanf_octal_handler}
- /* x, X */
- #define UFMT_HEX {ufmt_int, u_scanf_hex_handler}
- /* f */
- #define UFMT_DOUBLE {ufmt_double, u_scanf_double_handler}
- /* e, E */
- #define UFMT_SCIENTIFIC {ufmt_double, u_scanf_scientific_handler}
- /* g, G */
- #define UFMT_SCIDBL {ufmt_double, u_scanf_scidbl_handler}
- /* n */
- #define UFMT_COUNT {ufmt_count, u_scanf_count_handler}
- /* [ */
- #define UFMT_SCANSET {ufmt_string, u_scanf_scanset_handler}
- /* non-ANSI extensions */
- /* Use US-ASCII characters only for formatting */
- /* p */
- #define UFMT_POINTER {ufmt_pointer, u_scanf_pointer_handler}
- /* V */
- #define UFMT_SPELLOUT {ufmt_double, u_scanf_spellout_handler}
- /* P */
- #define UFMT_PERCENT {ufmt_double, u_scanf_percent_handler}
- /* C K is old format */
- #define UFMT_UCHAR {ufmt_uchar, u_scanf_uchar_handler}
- /* S U is old format */
- #define UFMT_USTRING {ufmt_ustring, u_scanf_ustring_handler}
- #define UFMT_EMPTY {ufmt_empty, nullptr}
- /**
- * A u_scanf handler function.
- * A u_scanf handler is responsible for handling a single u_scanf
- * format specification, for example 'd' or 's'.
- * @param stream The UFILE to which to write output.
- * @param info A pointer to a <TT>u_scanf_spec_info</TT> struct containing
- * information on the format specification.
- * @param args A pointer to the argument data
- * @param fmt A pointer to the first character in the format string
- * following the spec.
- * @param fmtConsumed On output, set to the number of characters consumed
- * in <TT>fmt</TT>. Do nothing, if the argument isn't variable width.
- * @param argConverted The number of arguments converted and assigned, or -1 if an
- * error occurred.
- * @return The number of code points consumed during reading.
- */
- typedef int32_t (*u_scanf_handler) (UFILE *stream,
- u_scanf_spec_info *info,
- ufmt_args *args,
- const char16_t *fmt,
- int32_t *fmtConsumed,
- int32_t *argConverted);
- typedef struct u_scanf_info {
- ufmt_type_info info;
- u_scanf_handler handler;
- } u_scanf_info;
- #define USCANF_NUM_FMT_HANDLERS 108
- #define USCANF_SYMBOL_BUFFER_SIZE 8
- /* We do not use handlers for 0-0x1f */
- #define USCANF_BASE_FMT_HANDLERS 0x20
- static int32_t
- u_scanf_skip_leading_ws(UFILE *input,
- char16_t pad)
- {
- char16_t c;
- int32_t count = 0;
- UBool isNotEOF;
- /* skip all leading ws in the input */
- while (((isNotEOF = ufile_getch(input, &c)) == static_cast<UBool>(true)) && (c == pad || u_isWhitespace(c)))
- {
- count++;
- }
- /* put the final character back on the input */
- if(isNotEOF)
- u_fungetc(c, input);
- return count;
- }
- /* TODO: Is always skipping the prefix symbol as a positive sign a good idea in all locales? */
- static int32_t
- u_scanf_skip_leading_positive_sign(UFILE *input,
- UNumberFormat *format,
- UErrorCode *status)
- {
- char16_t c;
- int32_t count = 0;
- UBool isNotEOF;
- char16_t plusSymbol[USCANF_SYMBOL_BUFFER_SIZE];
- int32_t symbolLen;
- UErrorCode localStatus = U_ZERO_ERROR;
- if (U_SUCCESS(*status)) {
- symbolLen = unum_getSymbol(format,
- UNUM_PLUS_SIGN_SYMBOL,
- plusSymbol,
- UPRV_LENGTHOF(plusSymbol),
- &localStatus);
- if (U_SUCCESS(localStatus)) {
- /* skip all leading ws in the input */
- while (((isNotEOF = ufile_getch(input, &c)) == static_cast<UBool>(true)) && (count < symbolLen && c == plusSymbol[count]))
- {
- count++;
- }
- /* put the final character back on the input */
- if(isNotEOF) {
- u_fungetc(c, input);
- }
- }
- }
- return count;
- }
- static int32_t
- u_scanf_simple_percent_handler(UFILE *input,
- u_scanf_spec_info *info,
- ufmt_args *args,
- const char16_t *fmt,
- int32_t *fmtConsumed,
- int32_t *argConverted)
- {
- (void)info;
- (void)args;
- (void)fmt;
- (void)fmtConsumed;
- /* make sure the next character in the input is a percent */
- *argConverted = 0;
- if(u_fgetc(input) != 0x0025) {
- *argConverted = -1;
- }
- return 1;
- }
- static int32_t
- u_scanf_count_handler(UFILE *input,
- u_scanf_spec_info *info,
- ufmt_args *args,
- const char16_t *fmt,
- int32_t *fmtConsumed,
- int32_t *argConverted)
- {
- (void)input;
- (void)fmt;
- (void)fmtConsumed;
- /* in the special case of count, the u_scanf_spec_info's width */
- /* will contain the # of items converted thus far */
- if (!info->fSkipArg) {
- if (info->fIsShort)
- *static_cast<int16_t*>(args[0].ptrValue) = static_cast<int16_t>(UINT16_MAX & info->fWidth);
- else if (info->fIsLongLong)
- *static_cast<int64_t*>(args[0].ptrValue) = info->fWidth;
- else
- *static_cast<int32_t*>(args[0].ptrValue) = static_cast<int32_t>(UINT32_MAX & info->fWidth);
- }
- *argConverted = 0;
- /* we converted 0 args */
- return 0;
- }
- static int32_t
- u_scanf_double_handler(UFILE *input,
- u_scanf_spec_info *info,
- ufmt_args *args,
- const char16_t *fmt,
- int32_t *fmtConsumed,
- int32_t *argConverted)
- {
- (void)fmt;
- (void)fmtConsumed;
- int32_t len;
- double num;
- UNumberFormat *format;
- int32_t parsePos = 0;
- int32_t skipped;
- UErrorCode status = U_ZERO_ERROR;
- /* skip all ws in the input */
- skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
- /* fill the input's internal buffer */
- ufile_fill_uchar_buffer(input);
- /* determine the size of the input's buffer */
- len = static_cast<int32_t>(input->str.fLimit - input->str.fPos);
- /* truncate to the width, if specified */
- if(info->fWidth != -1)
- len = ufmt_min(len, info->fWidth);
- /* get the formatter */
- format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_DECIMAL);
- /* handle error */
- if (format == nullptr)
- return 0;
- /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */
- skipped += u_scanf_skip_leading_positive_sign(input, format, &status);
- /* parse the number */
- num = unum_parseDouble(format, input->str.fPos, len, &parsePos, &status);
- if (!info->fSkipArg) {
- if (info->fIsLong)
- *static_cast<double*>(args[0].ptrValue) = num;
- else if (info->fIsLongDouble)
- *static_cast<long double*>(args[0].ptrValue) = num;
- else
- *static_cast<float*>(args[0].ptrValue) = static_cast<float>(num);
- }
- /* mask off any necessary bits */
- /* if(! info->fIsLong_double)
- num &= DBL_MAX;*/
- /* update the input's position to reflect consumed data */
- input->str.fPos += parsePos;
- /* we converted 1 arg */
- *argConverted = !info->fSkipArg;
- return parsePos + skipped;
- }
- #define UPRINTF_SYMBOL_BUFFER_SIZE 8
- static int32_t
- u_scanf_scientific_handler(UFILE *input,
- u_scanf_spec_info *info,
- ufmt_args *args,
- const char16_t *fmt,
- int32_t *fmtConsumed,
- int32_t *argConverted)
- {
- (void)fmt;
- (void)fmtConsumed;
- int32_t len;
- double num;
- UNumberFormat *format;
- int32_t parsePos = 0;
- int32_t skipped;
- UErrorCode status = U_ZERO_ERROR;
- char16_t srcExpBuf[UPRINTF_SYMBOL_BUFFER_SIZE];
- int32_t srcLen, expLen;
- char16_t expBuf[UPRINTF_SYMBOL_BUFFER_SIZE];
- /* skip all ws in the input */
- skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
- /* fill the input's internal buffer */
- ufile_fill_uchar_buffer(input);
- /* determine the size of the input's buffer */
- len = static_cast<int32_t>(input->str.fLimit - input->str.fPos);
- /* truncate to the width, if specified */
- if(info->fWidth != -1)
- len = ufmt_min(len, info->fWidth);
- /* get the formatter */
- format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_SCIENTIFIC);
- /* handle error */
- if (format == nullptr)
- return 0;
- /* set the appropriate flags on the formatter */
- srcLen = unum_getSymbol(format,
- UNUM_EXPONENTIAL_SYMBOL,
- srcExpBuf,
- sizeof(srcExpBuf),
- &status);
- /* Upper/lower case the e */
- if (info->fSpec == static_cast<char16_t>(0x65) /* e */) {
- expLen = u_strToLower(expBuf, static_cast<int32_t>(sizeof(expBuf)),
- srcExpBuf, srcLen,
- input->str.fBundle.fLocale,
- &status);
- }
- else {
- expLen = u_strToUpper(expBuf, static_cast<int32_t>(sizeof(expBuf)),
- srcExpBuf, srcLen,
- input->str.fBundle.fLocale,
- &status);
- }
- unum_setSymbol(format,
- UNUM_EXPONENTIAL_SYMBOL,
- expBuf,
- expLen,
- &status);
- /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */
- skipped += u_scanf_skip_leading_positive_sign(input, format, &status);
- /* parse the number */
- num = unum_parseDouble(format, input->str.fPos, len, &parsePos, &status);
- if (!info->fSkipArg) {
- if (info->fIsLong)
- *static_cast<double*>(args[0].ptrValue) = num;
- else if (info->fIsLongDouble)
- *static_cast<long double*>(args[0].ptrValue) = num;
- else
- *static_cast<float*>(args[0].ptrValue) = static_cast<float>(num);
- }
- /* mask off any necessary bits */
- /* if(! info->fIsLong_double)
- num &= DBL_MAX;*/
- /* update the input's position to reflect consumed data */
- input->str.fPos += parsePos;
- /* we converted 1 arg */
- *argConverted = !info->fSkipArg;
- return parsePos + skipped;
- }
- static int32_t
- u_scanf_scidbl_handler(UFILE *input,
- u_scanf_spec_info *info,
- ufmt_args *args,
- const char16_t *fmt,
- int32_t *fmtConsumed,
- int32_t *argConverted)
- {
- (void)fmt;
- (void)fmtConsumed;
- int32_t len;
- double num;
- UNumberFormat *scientificFormat, *genericFormat;
- /*int32_t scientificResult, genericResult;*/
- double scientificResult, genericResult;
- int32_t scientificParsePos = 0, genericParsePos = 0, parsePos = 0;
- int32_t skipped;
- UErrorCode scientificStatus = U_ZERO_ERROR;
- UErrorCode genericStatus = U_ZERO_ERROR;
- /* since we can't determine by scanning the characters whether */
- /* a number was formatted in the 'f' or 'g' styles, parse the */
- /* string with both formatters, and assume whichever one */
- /* parsed the most is the correct formatter to use */
- /* skip all ws in the input */
- skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
- /* fill the input's internal buffer */
- ufile_fill_uchar_buffer(input);
- /* determine the size of the input's buffer */
- len = static_cast<int32_t>(input->str.fLimit - input->str.fPos);
- /* truncate to the width, if specified */
- if(info->fWidth != -1)
- len = ufmt_min(len, info->fWidth);
- /* get the formatters */
- scientificFormat = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_SCIENTIFIC);
- genericFormat = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_DECIMAL);
- /* handle error */
- if (scientificFormat == nullptr || genericFormat == nullptr)
- return 0;
- /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */
- skipped += u_scanf_skip_leading_positive_sign(input, genericFormat, &genericStatus);
- /* parse the number using each format*/
- scientificResult = unum_parseDouble(scientificFormat, input->str.fPos, len,
- &scientificParsePos, &scientificStatus);
- genericResult = unum_parseDouble(genericFormat, input->str.fPos, len,
- &genericParsePos, &genericStatus);
- /* determine which parse made it farther */
- if(scientificParsePos > genericParsePos) {
- /* stash the result in num */
- num = scientificResult;
- /* update the input's position to reflect consumed data */
- parsePos += scientificParsePos;
- }
- else {
- /* stash the result in num */
- num = genericResult;
- /* update the input's position to reflect consumed data */
- parsePos += genericParsePos;
- }
- input->str.fPos += parsePos;
- if (!info->fSkipArg) {
- if (info->fIsLong)
- *static_cast<double*>(args[0].ptrValue) = num;
- else if (info->fIsLongDouble)
- *static_cast<long double*>(args[0].ptrValue) = num;
- else
- *static_cast<float*>(args[0].ptrValue) = static_cast<float>(num);
- }
- /* mask off any necessary bits */
- /* if(! info->fIsLong_double)
- num &= DBL_MAX;*/
- /* we converted 1 arg */
- *argConverted = !info->fSkipArg;
- return parsePos + skipped;
- }
- static int32_t
- u_scanf_integer_handler(UFILE *input,
- u_scanf_spec_info *info,
- ufmt_args *args,
- const char16_t *fmt,
- int32_t *fmtConsumed,
- int32_t *argConverted)
- {
- (void)fmt;
- (void)fmtConsumed;
- int32_t len;
- void* num = args[0].ptrValue;
- UNumberFormat *format, *localFormat;
- int32_t parsePos = 0;
- int32_t skipped;
- int32_t parseIntOnly = 0;
- UErrorCode status = U_ZERO_ERROR;
- int64_t result;
- /* skip all ws in the input */
- skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
- /* fill the input's internal buffer */
- ufile_fill_uchar_buffer(input);
- /* determine the size of the input's buffer */
- len = static_cast<int32_t>(input->str.fLimit - input->str.fPos);
- /* truncate to the width, if specified */
- if(info->fWidth != -1)
- len = ufmt_min(len, info->fWidth);
- /* get the formatter */
- format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_DECIMAL);
- /* handle error */
- if (format == nullptr)
- return 0;
- /* for integer types, do not attempt to parse fractions */
- localFormat = unum_clone(format, &status);
- if(U_FAILURE(status))
- return 0;
- if(info->fSpec == 'd' || info->fSpec == 'i' || info->fSpec == 'u')
- parseIntOnly = 1;
- unum_setAttribute(localFormat, UNUM_PARSE_INT_ONLY, parseIntOnly);
- /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */
- skipped += u_scanf_skip_leading_positive_sign(input, localFormat, &status);
- /* parse the number */
- result = unum_parseInt64(localFormat, input->str.fPos, len, &parsePos, &status);
- /* mask off any necessary bits */
- if (!info->fSkipArg) {
- if (info->fIsShort)
- *static_cast<int16_t*>(num) = static_cast<int16_t>(UINT16_MAX & result);
- else if (info->fIsLongLong)
- *static_cast<int64_t*>(num) = result;
- else
- *static_cast<int32_t*>(num) = static_cast<int32_t>(UINT32_MAX & result);
- }
- /* update the input's position to reflect consumed data */
- input->str.fPos += parsePos;
- /* cleanup cloned formatter */
- unum_close(localFormat);
- /* we converted 1 arg */
- *argConverted = !info->fSkipArg;
- return parsePos + skipped;
- }
- static int32_t
- u_scanf_uinteger_handler(UFILE *input,
- u_scanf_spec_info *info,
- ufmt_args *args,
- const char16_t *fmt,
- int32_t *fmtConsumed,
- int32_t *argConverted)
- {
- /* TODO Fix this when Numberformat handles uint64_t */
- return u_scanf_integer_handler(input, info, args, fmt, fmtConsumed, argConverted);
- }
- static int32_t
- u_scanf_percent_handler(UFILE *input,
- u_scanf_spec_info *info,
- ufmt_args *args,
- const char16_t *fmt,
- int32_t *fmtConsumed,
- int32_t *argConverted)
- {
- (void)fmt;
- (void)fmtConsumed;
- int32_t len;
- double num;
- UNumberFormat *format;
- int32_t parsePos = 0;
- UErrorCode status = U_ZERO_ERROR;
- /* skip all ws in the input */
- u_scanf_skip_leading_ws(input, info->fPadChar);
- /* fill the input's internal buffer */
- ufile_fill_uchar_buffer(input);
- /* determine the size of the input's buffer */
- len = static_cast<int32_t>(input->str.fLimit - input->str.fPos);
- /* truncate to the width, if specified */
- if(info->fWidth != -1)
- len = ufmt_min(len, info->fWidth);
- /* get the formatter */
- format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_PERCENT);
- /* handle error */
- if (format == nullptr)
- return 0;
- /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */
- u_scanf_skip_leading_positive_sign(input, format, &status);
- /* parse the number */
- num = unum_parseDouble(format, input->str.fPos, len, &parsePos, &status);
- if (!info->fSkipArg) {
- *static_cast<double*>(args[0].ptrValue) = num;
- }
- /* mask off any necessary bits */
- /* if(! info->fIsLong_double)
- num &= DBL_MAX;*/
- /* update the input's position to reflect consumed data */
- input->str.fPos += parsePos;
- /* we converted 1 arg */
- *argConverted = !info->fSkipArg;
- return parsePos;
- }
- static int32_t
- u_scanf_string_handler(UFILE *input,
- u_scanf_spec_info *info,
- ufmt_args *args,
- const char16_t *fmt,
- int32_t *fmtConsumed,
- int32_t *argConverted)
- {
- (void)fmt;
- (void)fmtConsumed;
- const char16_t *source;
- UConverter *conv;
- char* arg = static_cast<char*>(args[0].ptrValue);
- char *alias = arg;
- char *limit;
- UErrorCode status = U_ZERO_ERROR;
- int32_t count;
- int32_t skipped = 0;
- char16_t c;
- UBool isNotEOF = false;
- /* skip all ws in the input */
- if (info->fIsString) {
- skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
- }
- /* get the string one character at a time, truncating to the width */
- count = 0;
- /* open the default converter */
- conv = u_getDefaultConverter(&status);
- if(U_FAILURE(status))
- return -1;
- while( (info->fWidth == -1 || count < info->fWidth)
- && ((isNotEOF = ufile_getch(input, &c)) == static_cast<UBool>(true))
- && (!info->fIsString || (c != info->fPadChar && !u_isWhitespace(c))))
- {
- if (!info->fSkipArg) {
- /* put the character from the input onto the target */
- source = &c;
- /* Since we do this one character at a time, do it this way. */
- if (info->fWidth > 0) {
- limit = alias + info->fWidth - count;
- }
- else {
- limit = alias + ucnv_getMaxCharSize(conv);
- }
- /* convert the character to the default codepage */
- ucnv_fromUnicode(conv, &alias, limit, &source, source + 1,
- nullptr, true, &status);
- if(U_FAILURE(status)) {
- /* clean up */
- u_releaseDefaultConverter(conv);
- return -1;
- }
- }
- /* increment the count */
- ++count;
- }
- /* put the final character we read back on the input */
- if (!info->fSkipArg) {
- if ((info->fWidth == -1 || count < info->fWidth) && isNotEOF)
- u_fungetc(c, input);
- /* add the terminator */
- if (info->fIsString) {
- *alias = 0x00;
- }
- }
- /* clean up */
- u_releaseDefaultConverter(conv);
- /* we converted 1 arg */
- *argConverted = !info->fSkipArg;
- return count + skipped;
- }
- static int32_t
- u_scanf_char_handler(UFILE *input,
- u_scanf_spec_info *info,
- ufmt_args *args,
- const char16_t *fmt,
- int32_t *fmtConsumed,
- int32_t *argConverted)
- {
- if (info->fWidth < 0) {
- info->fWidth = 1;
- }
- info->fIsString = false;
- return u_scanf_string_handler(input, info, args, fmt, fmtConsumed, argConverted);
- }
- static int32_t
- u_scanf_ustring_handler(UFILE *input,
- u_scanf_spec_info *info,
- ufmt_args *args,
- const char16_t *fmt,
- int32_t *fmtConsumed,
- int32_t *argConverted)
- {
- (void)fmt;
- (void)fmtConsumed;
- char16_t* arg = static_cast<char16_t*>(args[0].ptrValue);
- char16_t *alias = arg;
- int32_t count;
- int32_t skipped = 0;
- char16_t c;
- UBool isNotEOF = false;
- /* skip all ws in the input */
- if (info->fIsString) {
- skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
- }
- /* get the string one character at a time, truncating to the width */
- count = 0;
- while( (info->fWidth == -1 || count < info->fWidth)
- && ((isNotEOF = ufile_getch(input, &c)) == static_cast<UBool>(true))
- && (!info->fIsString || (c != info->fPadChar && !u_isWhitespace(c))))
- {
- /* put the character from the input onto the target */
- if (!info->fSkipArg) {
- *alias++ = c;
- }
- /* increment the count */
- ++count;
- }
- /* put the final character we read back on the input */
- if (!info->fSkipArg) {
- if((info->fWidth == -1 || count < info->fWidth) && isNotEOF) {
- u_fungetc(c, input);
- }
- /* add the terminator */
- if (info->fIsString) {
- *alias = 0x0000;
- }
- }
- /* we converted 1 arg */
- *argConverted = !info->fSkipArg;
- return count + skipped;
- }
- static int32_t
- u_scanf_uchar_handler(UFILE *input,
- u_scanf_spec_info *info,
- ufmt_args *args,
- const char16_t *fmt,
- int32_t *fmtConsumed,
- int32_t *argConverted)
- {
- if (info->fWidth < 0) {
- info->fWidth = 1;
- }
- info->fIsString = false;
- return u_scanf_ustring_handler(input, info, args, fmt, fmtConsumed, argConverted);
- }
- static int32_t
- u_scanf_spellout_handler(UFILE *input,
- u_scanf_spec_info *info,
- ufmt_args *args,
- const char16_t *fmt,
- int32_t *fmtConsumed,
- int32_t *argConverted)
- {
- (void)fmt;
- (void)fmtConsumed;
- int32_t len;
- double num;
- UNumberFormat *format;
- int32_t parsePos = 0;
- int32_t skipped;
- UErrorCode status = U_ZERO_ERROR;
- /* skip all ws in the input */
- skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
- /* fill the input's internal buffer */
- ufile_fill_uchar_buffer(input);
- /* determine the size of the input's buffer */
- len = static_cast<int32_t>(input->str.fLimit - input->str.fPos);
- /* truncate to the width, if specified */
- if(info->fWidth != -1)
- len = ufmt_min(len, info->fWidth);
- /* get the formatter */
- format = u_locbund_getNumberFormat(&input->str.fBundle, UNUM_SPELLOUT);
- /* handle error */
- if (format == nullptr)
- return 0;
- /* Skip the positive prefix. ICU normally can't handle this due to strict parsing. */
- /* This is not applicable to RBNF. */
- /*skipped += u_scanf_skip_leading_positive_sign(input, format, &status);*/
- /* parse the number */
- num = unum_parseDouble(format, input->str.fPos, len, &parsePos, &status);
- if (!info->fSkipArg) {
- *static_cast<double*>(args[0].ptrValue) = num;
- }
- /* mask off any necessary bits */
- /* if(! info->fIsLong_double)
- num &= DBL_MAX;*/
- /* update the input's position to reflect consumed data */
- input->str.fPos += parsePos;
- /* we converted 1 arg */
- *argConverted = !info->fSkipArg;
- return parsePos + skipped;
- }
- static int32_t
- u_scanf_hex_handler(UFILE *input,
- u_scanf_spec_info *info,
- ufmt_args *args,
- const char16_t *fmt,
- int32_t *fmtConsumed,
- int32_t *argConverted)
- {
- (void)fmt;
- (void)fmtConsumed;
- int32_t len;
- int32_t skipped;
- void* num = args[0].ptrValue;
- int64_t result;
- /* skip all ws in the input */
- skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
- /* fill the input's internal buffer */
- ufile_fill_uchar_buffer(input);
- /* determine the size of the input's buffer */
- len = static_cast<int32_t>(input->str.fLimit - input->str.fPos);
- /* truncate to the width, if specified */
- if(info->fWidth != -1)
- len = ufmt_min(len, info->fWidth);
- /* check for alternate form */
- if( *(input->str.fPos) == 0x0030 &&
- (*(input->str.fPos + 1) == 0x0078 || *(input->str.fPos + 1) == 0x0058) ) {
- /* skip the '0' and 'x' or 'X' if present */
- input->str.fPos += 2;
- len -= 2;
- }
- /* parse the number */
- result = ufmt_uto64(input->str.fPos, &len, 16);
- /* update the input's position to reflect consumed data */
- input->str.fPos += len;
- /* mask off any necessary bits */
- if (!info->fSkipArg) {
- if (info->fIsShort)
- *static_cast<int16_t*>(num) = static_cast<int16_t>(UINT16_MAX & result);
- else if (info->fIsLongLong)
- *static_cast<int64_t*>(num) = result;
- else
- *static_cast<int32_t*>(num) = static_cast<int32_t>(UINT32_MAX & result);
- }
- /* we converted 1 arg */
- *argConverted = !info->fSkipArg;
- return len + skipped;
- }
- static int32_t
- u_scanf_octal_handler(UFILE *input,
- u_scanf_spec_info *info,
- ufmt_args *args,
- const char16_t *fmt,
- int32_t *fmtConsumed,
- int32_t *argConverted)
- {
- (void)fmt;
- (void)fmtConsumed;
- int32_t len;
- int32_t skipped;
- void* num = args[0].ptrValue;
- int64_t result;
- /* skip all ws in the input */
- skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
- /* fill the input's internal buffer */
- ufile_fill_uchar_buffer(input);
- /* determine the size of the input's buffer */
- len = static_cast<int32_t>(input->str.fLimit - input->str.fPos);
- /* truncate to the width, if specified */
- if(info->fWidth != -1)
- len = ufmt_min(len, info->fWidth);
- /* parse the number */
- result = ufmt_uto64(input->str.fPos, &len, 8);
- /* update the input's position to reflect consumed data */
- input->str.fPos += len;
- /* mask off any necessary bits */
- if (!info->fSkipArg) {
- if (info->fIsShort)
- *static_cast<int16_t*>(num) = static_cast<int16_t>(UINT16_MAX & result);
- else if (info->fIsLongLong)
- *static_cast<int64_t*>(num) = result;
- else
- *static_cast<int32_t*>(num) = static_cast<int32_t>(UINT32_MAX & result);
- }
- /* we converted 1 arg */
- *argConverted = !info->fSkipArg;
- return len + skipped;
- }
- static int32_t
- u_scanf_pointer_handler(UFILE *input,
- u_scanf_spec_info *info,
- ufmt_args *args,
- const char16_t *fmt,
- int32_t *fmtConsumed,
- int32_t *argConverted)
- {
- (void)fmt;
- (void)fmtConsumed;
- int32_t len;
- int32_t skipped;
- void *result;
- void** p = static_cast<void**>(args[0].ptrValue);
- /* skip all ws in the input */
- skipped = u_scanf_skip_leading_ws(input, info->fPadChar);
- /* fill the input's internal buffer */
- ufile_fill_uchar_buffer(input);
- /* determine the size of the input's buffer */
- len = static_cast<int32_t>(input->str.fLimit - input->str.fPos);
- /* truncate to the width, if specified */
- if(info->fWidth != -1) {
- len = ufmt_min(len, info->fWidth);
- }
- /* Make sure that we don't consume too much */
- if (len > static_cast<int32_t>(sizeof(void*) * 2)) {
- len = static_cast<int32_t>(sizeof(void*) * 2);
- }
- /* parse the pointer - assign to temporary value */
- result = ufmt_utop(input->str.fPos, &len);
- if (!info->fSkipArg) {
- *p = result;
- }
- /* update the input's position to reflect consumed data */
- input->str.fPos += len;
- /* we converted 1 arg */
- *argConverted = !info->fSkipArg;
- return len + skipped;
- }
- static int32_t
- u_scanf_scanset_handler(UFILE *input,
- u_scanf_spec_info *info,
- ufmt_args *args,
- const char16_t *fmt,
- int32_t *fmtConsumed,
- int32_t *argConverted)
- {
- USet *scanset;
- UErrorCode status = U_ZERO_ERROR;
- int32_t chLeft = INT32_MAX;
- UChar32 c;
- char16_t* alias = static_cast<char16_t*>(args[0].ptrValue);
- UBool isNotEOF = false;
- UBool readCharacter = false;
- /* Create an empty set */
- scanset = uset_open(0, -1);
- /* Back up one to get the [ */
- fmt--;
- /* truncate to the width, if specified and alias the target */
- if(info->fWidth >= 0) {
- chLeft = info->fWidth;
- }
- /* parse the scanset from the fmt string */
- *fmtConsumed = uset_applyPattern(scanset, fmt, -1, 0, &status);
- /* verify that the parse was successful */
- if (U_SUCCESS(status)) {
- c=0;
- /* grab characters one at a time and make sure they are in the scanset */
- while(chLeft > 0) {
- if (((isNotEOF = ufile_getch32(input, &c)) == static_cast<UBool>(true)) && uset_contains(scanset, c)) {
- readCharacter = true;
- if (!info->fSkipArg) {
- int32_t idx = 0;
- UBool isError = false;
- U16_APPEND(alias, idx, chLeft, c, isError);
- if (isError) {
- break;
- }
- alias += idx;
- }
- chLeft -= (1 + U_IS_SUPPLEMENTARY(c));
- }
- else {
- /* if the character's not in the scanset, break out */
- break;
- }
- }
- /* put the final character we read back on the input */
- if(isNotEOF && chLeft > 0) {
- u_fungetc(c, input);
- }
- }
- uset_close(scanset);
- /* if we didn't match at least 1 character, fail */
- if(!readCharacter)
- return -1;
- /* otherwise, add the terminator */
- else if (!info->fSkipArg) {
- *alias = 0x00;
- }
- /* we converted 1 arg */
- *argConverted = !info->fSkipArg;
- return (info->fWidth >= 0 ? info->fWidth : INT32_MAX) - chLeft;
- }
- /* Use US-ASCII characters only for formatting. Most codepages have
- characters 20-7F from Unicode. Using any other codepage specific
- characters will make it very difficult to format the string on
- non-Unicode machines */
- static const u_scanf_info g_u_scanf_infos[USCANF_NUM_FMT_HANDLERS] = {
- /* 0x20 */
- UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY,
- UFMT_EMPTY, UFMT_SIMPLE_PERCENT,UFMT_EMPTY, UFMT_EMPTY,
- UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY,
- UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY,
- /* 0x30 */
- UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY,
- UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY,
- UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY,
- UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY,
- /* 0x40 */
- UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_UCHAR,
- UFMT_EMPTY, UFMT_SCIENTIFIC, UFMT_EMPTY, UFMT_SCIDBL,
- #ifdef U_USE_OBSOLETE_IO_FORMATTING
- UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_UCHAR/*deprecated*/,
- #else
- UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY,
- #endif
- UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY,
- /* 0x50 */
- UFMT_PERCENT, UFMT_EMPTY, UFMT_EMPTY, UFMT_USTRING,
- #ifdef U_USE_OBSOLETE_IO_FORMATTING
- UFMT_EMPTY, UFMT_USTRING/*deprecated*/,UFMT_SPELLOUT, UFMT_EMPTY,
- #else
- UFMT_EMPTY, UFMT_EMPTY, UFMT_SPELLOUT, UFMT_EMPTY,
- #endif
- UFMT_HEX, UFMT_EMPTY, UFMT_EMPTY, UFMT_SCANSET,
- UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY,
- /* 0x60 */
- UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_CHAR,
- UFMT_INT, UFMT_SCIENTIFIC, UFMT_DOUBLE, UFMT_SCIDBL,
- UFMT_EMPTY, UFMT_INT, UFMT_EMPTY, UFMT_EMPTY,
- UFMT_EMPTY, UFMT_EMPTY, UFMT_COUNT, UFMT_OCTAL,
- /* 0x70 */
- UFMT_POINTER, UFMT_EMPTY, UFMT_EMPTY, UFMT_STRING,
- UFMT_EMPTY, UFMT_UINT, UFMT_EMPTY, UFMT_EMPTY,
- UFMT_HEX, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY,
- UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY, UFMT_EMPTY,
- };
- U_CFUNC int32_t
- u_scanf_parse(UFILE *f,
- const char16_t *patternSpecification,
- va_list ap)
- {
- const char16_t *alias;
- int32_t count, converted, argConsumed, cpConsumed;
- uint16_t handlerNum;
- ufmt_args args;
- u_scanf_spec spec;
- ufmt_type_info info;
- u_scanf_handler handler;
- /* alias the pattern */
- alias = patternSpecification;
- /* haven't converted anything yet */
- argConsumed = 0;
- converted = 0;
- cpConsumed = 0;
- /* iterate through the pattern */
- for(;;) {
- /* match any characters up to the next '%' */
- while(*alias != UP_PERCENT && *alias != 0x0000 && u_fgetc(f) == *alias) {
- alias++;
- }
- /* if we aren't at a '%', or if we're at end of string, break*/
- if(*alias != UP_PERCENT || *alias == 0x0000)
- break;
- /* parse the specifier */
- count = u_scanf_parse_spec(alias, &spec);
- /* update the pointer in pattern */
- alias += count;
- handlerNum = (uint16_t)(spec.fInfo.fSpec - USCANF_BASE_FMT_HANDLERS);
- if (handlerNum < USCANF_NUM_FMT_HANDLERS) {
- /* skip the argument, if necessary */
- /* query the info function for argument information */
- info = g_u_scanf_infos[ handlerNum ].info;
- if (info != ufmt_count && u_feof(f)) {
- break;
- }
- else if(spec.fInfo.fSkipArg) {
- args.ptrValue = nullptr;
- }
- else {
- switch(info) {
- case ufmt_count:
- /* set the spec's width to the # of items converted */
- spec.fInfo.fWidth = cpConsumed;
- U_FALLTHROUGH;
- case ufmt_char:
- case ufmt_uchar:
- case ufmt_int:
- case ufmt_string:
- case ufmt_ustring:
- case ufmt_pointer:
- case ufmt_float:
- case ufmt_double:
- args.ptrValue = va_arg(ap, void*);
- break;
- default:
- /* else args is ignored */
- args.ptrValue = nullptr;
- break;
- }
- }
- /* call the handler function */
- handler = g_u_scanf_infos[ handlerNum ].handler;
- if (handler != nullptr) {
- /* reset count to 1 so that += for alias works. */
- count = 1;
- cpConsumed += (*handler)(f, &spec.fInfo, &args, alias, &count, &argConsumed);
- /* if the handler encountered an error condition, break */
- if(argConsumed < 0) {
- converted = -1;
- break;
- }
- /* add to the # of items converted */
- converted += argConsumed;
- /* update the pointer in pattern */
- alias += count-1;
- }
- /* else do nothing */
- }
- /* else do nothing */
- /* just ignore unknown tags */
- }
- /* return # of items converted */
- return converted;
- }
- #endif /* #if !UCONFIG_NO_FORMATTING */
|