123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732 |
- // © 2016 and later: Unicode, Inc. and others.
- // License & terms of use: http://www.unicode.org/copyright.html
- /*
- ******************************************************************************
- *
- * Copyright (C) 1998-2016, International Business Machines
- * Corporation and others. All Rights Reserved.
- *
- ******************************************************************************
- *
- * File ustdio.c
- *
- * Modification History:
- *
- * Date Name Description
- * 11/18/98 stephen Creation.
- * 03/12/99 stephen Modified for new C API.
- * 07/19/99 stephen Fixed read() and gets()
- ******************************************************************************
- */
- #include "unicode/ustdio.h"
- #if !UCONFIG_NO_CONVERSION
- #include "unicode/putil.h"
- #include "cmemory.h"
- #include "cstring.h"
- #include "ufile.h"
- #include "ufmt_cmn.h"
- #include "unicode/ucnv.h"
- #include "unicode/ustring.h"
- #include <string.h>
- #define DELIM_LF 0x000A
- #define DELIM_VT 0x000B
- #define DELIM_FF 0x000C
- #define DELIM_CR 0x000D
- #define DELIM_NEL 0x0085
- #define DELIM_LS 0x2028
- #define DELIM_PS 0x2029
- /* TODO: is this correct for all codepages? Should we just use \n and let the converter handle it? */
- #if U_PLATFORM_USES_ONLY_WIN32_API
- static const char16_t DELIMITERS [] = { DELIM_CR, DELIM_LF, 0x0000 };
- static const uint32_t DELIMITERS_LEN = 2;
- /* TODO: Default newline writing should be detected based upon the converter being used. */
- #else
- static const char16_t DELIMITERS [] = { DELIM_LF, 0x0000 };
- static const uint32_t DELIMITERS_LEN = 1;
- #endif
- #define IS_FIRST_STRING_DELIMITER(c1) \
- (UBool)((DELIM_LF <= (c1) && (c1) <= DELIM_CR) \
- || (c1) == DELIM_NEL \
- || (c1) == DELIM_LS \
- || (c1) == DELIM_PS)
- #define CAN_HAVE_COMBINED_STRING_DELIMITER(c1) (UBool)((c1) == DELIM_CR)
- #define IS_COMBINED_STRING_DELIMITER(c1, c2) \
- (UBool)((c1) == DELIM_CR && (c2) == DELIM_LF)
- #if !UCONFIG_NO_TRANSLITERATION
- U_CAPI UTransliterator* U_EXPORT2
- u_fsettransliterator(UFILE *file, UFileDirection direction,
- UTransliterator *adopt, UErrorCode *status)
- {
- UTransliterator *old = nullptr;
- if(U_FAILURE(*status))
- {
- return adopt;
- }
- if(!file)
- {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- return adopt;
- }
- if(direction & U_READ)
- {
- /** TODO: implement */
- *status = U_UNSUPPORTED_ERROR;
- return adopt;
- }
- if(adopt == nullptr) /* they are clearing it */
- {
- if(file->fTranslit != nullptr)
- {
- /* TODO: Check side */
- old = file->fTranslit->translit;
- uprv_free(file->fTranslit->buffer);
- file->fTranslit->buffer=nullptr;
- uprv_free(file->fTranslit);
- file->fTranslit=nullptr;
- }
- }
- else
- {
- if(file->fTranslit == nullptr)
- {
- file->fTranslit = (UFILETranslitBuffer*) uprv_malloc(sizeof(UFILETranslitBuffer));
- if(!file->fTranslit)
- {
- *status = U_MEMORY_ALLOCATION_ERROR;
- return adopt;
- }
- file->fTranslit->capacity = 0;
- file->fTranslit->length = 0;
- file->fTranslit->pos = 0;
- file->fTranslit->buffer = nullptr;
- }
- else
- {
- old = file->fTranslit->translit;
- ufile_flush_translit(file);
- }
- file->fTranslit->translit = adopt;
- }
- return old;
- }
- static const char16_t * u_file_translit(UFILE *f, const char16_t *src, int32_t *count, UBool flush)
- {
- int32_t newlen;
- int32_t junkCount = 0;
- int32_t textLength;
- int32_t textLimit;
- UTransPosition pos;
- UErrorCode status = U_ZERO_ERROR;
- if(count == nullptr)
- {
- count = &junkCount;
- }
- if ((!f)||(!f->fTranslit)||(!f->fTranslit->translit))
- {
- /* fast path */
- return src;
- }
- /* First: slide over everything */
- if(f->fTranslit->length > f->fTranslit->pos)
- {
- memmove(f->fTranslit->buffer, f->fTranslit->buffer + f->fTranslit->pos,
- (f->fTranslit->length - f->fTranslit->pos)*sizeof(char16_t));
- }
- f->fTranslit->length -= f->fTranslit->pos; /* always */
- f->fTranslit->pos = 0;
- /* Calculate new buffer size needed */
- newlen = (*count + f->fTranslit->length) * 4;
- if(newlen > f->fTranslit->capacity)
- {
- if(f->fTranslit->buffer == nullptr)
- {
- f->fTranslit->buffer = (char16_t*)uprv_malloc(newlen * sizeof(char16_t));
- }
- else
- {
- f->fTranslit->buffer = (char16_t*)uprv_realloc(f->fTranslit->buffer, newlen * sizeof(char16_t));
- }
- /* Check for malloc/realloc failure. */
- if (f->fTranslit->buffer == nullptr) {
- return nullptr;
- }
- f->fTranslit->capacity = newlen;
- }
- /* Now, copy any data over */
- u_strncpy(f->fTranslit->buffer + f->fTranslit->length,
- src,
- *count);
- f->fTranslit->length += *count;
- /* Now, translit in place as much as we can */
- if(flush == false)
- {
- textLength = f->fTranslit->length;
- pos.contextStart = 0;
- pos.contextLimit = textLength;
- pos.start = 0;
- pos.limit = textLength;
- utrans_transIncrementalUChars(f->fTranslit->translit,
- f->fTranslit->buffer, /* because we shifted */
- &textLength,
- f->fTranslit->capacity,
- &pos,
- &status);
- /* now: start/limit point to the transliterated text */
- /* Transliterated is [buffer..pos.start) */
- *count = pos.start;
- f->fTranslit->pos = pos.start;
- f->fTranslit->length = pos.limit;
- return f->fTranslit->buffer;
- }
- else
- {
- textLength = f->fTranslit->length;
- textLimit = f->fTranslit->length;
- utrans_transUChars(f->fTranslit->translit,
- f->fTranslit->buffer,
- &textLength,
- f->fTranslit->capacity,
- 0,
- &textLimit,
- &status);
- /* out: converted len */
- *count = textLimit;
- /* Set pointers to 0 */
- f->fTranslit->pos = 0;
- f->fTranslit->length = 0;
- return f->fTranslit->buffer;
- }
- }
- #endif
- void
- ufile_flush_translit(UFILE *f)
- {
- #if !UCONFIG_NO_TRANSLITERATION
- if((!f)||(!f->fTranslit))
- return;
- #endif
- u_file_write_flush(nullptr, 0, f, false, true);
- }
- void
- ufile_flush_io(UFILE *f)
- {
- if((!f) || (!f->fFile)) {
- return; /* skip if no file */
- }
- u_file_write_flush(nullptr, 0, f, true, false);
- }
- void
- ufile_close_translit(UFILE *f)
- {
- #if !UCONFIG_NO_TRANSLITERATION
- if((!f)||(!f->fTranslit))
- return;
- #endif
- ufile_flush_translit(f);
- #if !UCONFIG_NO_TRANSLITERATION
- if(f->fTranslit->translit)
- utrans_close(f->fTranslit->translit);
- if(f->fTranslit->buffer)
- {
- uprv_free(f->fTranslit->buffer);
- }
- uprv_free(f->fTranslit);
- f->fTranslit = nullptr;
- #endif
- }
- /* Input/output */
- U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
- u_fputs(const char16_t *s,
- UFILE *f)
- {
- int32_t count = u_file_write(s, u_strlen(s), f);
- count += u_file_write(DELIMITERS, DELIMITERS_LEN, f);
- return count;
- }
- U_CAPI UChar32 U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
- u_fputc(UChar32 uc,
- UFILE *f)
- {
- char16_t buf[2];
- int32_t idx = 0;
- UBool isError = false;
- U16_APPEND(buf, idx, UPRV_LENGTHOF(buf), uc, isError);
- if (isError) {
- return U_EOF;
- }
- return u_file_write(buf, idx, f) == idx ? uc : U_EOF;
- }
- U_CFUNC int32_t U_EXPORT2
- u_file_write_flush(const char16_t *chars,
- int32_t count,
- UFILE *f,
- UBool flushIO,
- UBool flushTranslit)
- {
- /* Set up conversion parameters */
- UErrorCode status = U_ZERO_ERROR;
- const char16_t *mySource = chars;
- const char16_t *mySourceBegin;
- const char16_t *mySourceEnd;
- char charBuffer[UFILE_CHARBUFFER_SIZE];
- char *myTarget = charBuffer;
- int32_t written = 0;
- int32_t numConverted = 0;
- if (count < 0) {
- count = u_strlen(chars);
- }
- #if !UCONFIG_NO_TRANSLITERATION
- if((f->fTranslit) && (f->fTranslit->translit))
- {
- /* Do the transliteration */
- mySource = u_file_translit(f, chars, &count, flushTranslit);
- }
- #endif
- /* Write to a string. */
- if (!f->fFile) {
- int32_t charsLeft = (int32_t)(f->str.fLimit - f->str.fPos);
- if (flushIO && charsLeft > count) {
- count++;
- }
- written = ufmt_min(count, charsLeft);
- u_strncpy(f->str.fPos, mySource, written);
- f->str.fPos += written;
- return written;
- }
- mySourceEnd = mySource + count;
- /* Perform the conversion in a loop */
- do {
- mySourceBegin = mySource; /* beginning location for this loop */
- status = U_ZERO_ERROR;
- if(f->fConverter != nullptr) { /* We have a valid converter */
- ucnv_fromUnicode(f->fConverter,
- &myTarget,
- charBuffer + UFILE_CHARBUFFER_SIZE,
- &mySource,
- mySourceEnd,
- nullptr,
- flushIO,
- &status);
- } else { /*weiv: do the invariant conversion */
- int32_t convertChars = (int32_t) (mySourceEnd - mySource);
- if (convertChars > UFILE_CHARBUFFER_SIZE) {
- convertChars = UFILE_CHARBUFFER_SIZE;
- status = U_BUFFER_OVERFLOW_ERROR;
- }
- u_UCharsToChars(mySource, myTarget, convertChars);
- mySource += convertChars;
- myTarget += convertChars;
- }
- numConverted = (int32_t)(myTarget - charBuffer);
- if (numConverted > 0) {
- /* write the converted bytes */
- fwrite(charBuffer,
- sizeof(char),
- numConverted,
- f->fFile);
- written += (int32_t) (mySource - mySourceBegin);
- }
- myTarget = charBuffer;
- }
- while(status == U_BUFFER_OVERFLOW_ERROR);
- /* return # of chars written */
- return written;
- }
- U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
- u_file_write( const char16_t *chars,
- int32_t count,
- UFILE *f)
- {
- return u_file_write_flush(chars,count,f,false,false);
- }
- /* private function used for buffering input */
- void
- ufile_fill_uchar_buffer(UFILE *f)
- {
- UErrorCode status;
- const char *mySource;
- const char *mySourceEnd;
- char16_t *myTarget;
- int32_t bufferSize;
- int32_t maxCPBytes;
- int32_t bytesRead;
- int32_t availLength;
- int32_t dataSize;
- char charBuffer[UFILE_CHARBUFFER_SIZE];
- u_localized_string *str;
- if (f->fFile == nullptr) {
- /* There is nothing to do. It's a string. */
- return;
- }
- str = &f->str;
- dataSize = (int32_t)(str->fLimit - str->fPos);
- if (f->fFileno == 0 && dataSize > 0) {
- /* Don't read from stdin too many times. There is still some data. */
- return;
- }
- /* shift the buffer if it isn't empty */
- if(dataSize != 0) {
- u_memmove(f->fUCBuffer, str->fPos, dataSize); /* not accessing beyond memory */
- }
- /* record how much buffer space is available */
- availLength = UFILE_UCHARBUFFER_SIZE - dataSize;
- /* Determine the # of codepage bytes needed to fill our char16_t buffer */
- /* weiv: if converter is nullptr, we use invariant converter with charwidth = 1)*/
- maxCPBytes = availLength / (f->fConverter!=nullptr?(2*ucnv_getMinCharSize(f->fConverter)):1);
- /* Read in the data to convert */
- if (f->fFileno == 0) {
- /* Special case. Read from stdin one line at a time. */
- char *retStr = fgets(charBuffer, ufmt_min(maxCPBytes, UFILE_CHARBUFFER_SIZE), f->fFile);
- bytesRead = (int32_t)(retStr ? uprv_strlen(charBuffer) : 0);
- }
- else {
- /* A normal file */
- bytesRead = (int32_t)fread(charBuffer,
- sizeof(char),
- ufmt_min(maxCPBytes, UFILE_CHARBUFFER_SIZE),
- f->fFile);
- }
- /* Set up conversion parameters */
- status = U_ZERO_ERROR;
- mySource = charBuffer;
- mySourceEnd = charBuffer + bytesRead;
- myTarget = f->fUCBuffer + dataSize;
- bufferSize = UFILE_UCHARBUFFER_SIZE;
- if(f->fConverter != nullptr) { /* We have a valid converter */
- /* Perform the conversion */
- ucnv_toUnicode(f->fConverter,
- &myTarget,
- f->fUCBuffer + bufferSize,
- &mySource,
- mySourceEnd,
- nullptr,
- (UBool)(feof(f->fFile) != 0),
- &status);
- } else { /*weiv: do the invariant conversion */
- u_charsToUChars(mySource, myTarget, bytesRead);
- myTarget += bytesRead;
- }
- /* update the pointers into our array */
- str->fPos = str->fBuffer;
- str->fLimit = myTarget;
- }
- U_CAPI char16_t* U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
- u_fgets(char16_t *s,
- int32_t n,
- UFILE *f)
- {
- int32_t dataSize;
- int32_t count;
- char16_t *alias;
- const char16_t *limit;
- char16_t *sItr;
- char16_t currDelim = 0;
- u_localized_string *str;
- if (n <= 0) {
- /* Caller screwed up. We need to write the null terminatior. */
- return nullptr;
- }
- /* fill the buffer if needed */
- str = &f->str;
- if (str->fPos >= str->fLimit) {
- ufile_fill_uchar_buffer(f);
- }
- /* subtract 1 from n to compensate for the terminator */
- --n;
- /* determine the amount of data in the buffer */
- dataSize = (int32_t)(str->fLimit - str->fPos);
- /* if 0 characters were left, return 0 */
- if (dataSize == 0)
- return nullptr;
- /* otherwise, iteratively fill the buffer and copy */
- count = 0;
- sItr = s;
- currDelim = 0;
- while (dataSize > 0 && count < n) {
- alias = str->fPos;
- /* Find how much to copy */
- if (dataSize < (n - count)) {
- limit = str->fLimit;
- }
- else {
- limit = alias + (n - count);
- }
- if (!currDelim) {
- /* Copy UChars until we find the first occurrence of a delimiter character */
- while (alias < limit && !IS_FIRST_STRING_DELIMITER(*alias)) {
- count++;
- *(sItr++) = *(alias++);
- }
- /* Preserve the newline */
- if (alias < limit && IS_FIRST_STRING_DELIMITER(*alias)) {
- if (CAN_HAVE_COMBINED_STRING_DELIMITER(*alias)) {
- currDelim = *alias;
- }
- else {
- currDelim = 1; /* This isn't a newline, but it's used to say
- that we should break later. We've checked all
- possible newline combinations even across buffer
- boundaries. */
- }
- count++;
- *(sItr++) = *(alias++);
- }
- }
- /* If we have a CRLF combination, preserve that too. */
- if (alias < limit) {
- if (currDelim && IS_COMBINED_STRING_DELIMITER(currDelim, *alias)) {
- count++;
- *(sItr++) = *(alias++);
- }
- currDelim = 1; /* This isn't a newline, but it's used to say
- that we should break later. We've checked all
- possible newline combinations even across buffer
- boundaries. */
- }
- /* update the current buffer position */
- str->fPos = alias;
- /* if we found a delimiter */
- if (currDelim == 1) {
- /* break out */
- break;
- }
- /* refill the buffer */
- ufile_fill_uchar_buffer(f);
- /* determine the amount of data in the buffer */
- dataSize = (int32_t)(str->fLimit - str->fPos);
- }
- /* add the terminator and return s */
- *sItr = 0x0000;
- return s;
- }
- U_CFUNC UBool U_EXPORT2
- ufile_getch(UFILE *f, char16_t *ch)
- {
- UBool isValidChar = false;
- *ch = U_EOF;
- /* if we have an available character in the buffer, return it */
- if(f->str.fPos < f->str.fLimit){
- *ch = *(f->str.fPos)++;
- isValidChar = true;
- }
- else {
- /* otherwise, fill the buffer and return the next character */
- if(f->str.fPos >= f->str.fLimit) {
- ufile_fill_uchar_buffer(f);
- }
- if(f->str.fPos < f->str.fLimit) {
- *ch = *(f->str.fPos)++;
- isValidChar = true;
- }
- }
- return isValidChar;
- }
- U_CAPI char16_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
- u_fgetc(UFILE *f)
- {
- char16_t ch;
- ufile_getch(f, &ch);
- return ch;
- }
- U_CFUNC UBool U_EXPORT2
- ufile_getch32(UFILE *f, UChar32 *c32)
- {
- UBool isValidChar = false;
- u_localized_string *str;
- *c32 = U_EOF;
- /* Fill the buffer if it is empty */
- str = &f->str;
- if (str->fPos + 1 >= str->fLimit) {
- ufile_fill_uchar_buffer(f);
- }
- /* Get the next character in the buffer */
- if (str->fPos < str->fLimit) {
- *c32 = *(str->fPos)++;
- if (U_IS_LEAD(*c32)) {
- if (str->fPos < str->fLimit) {
- char16_t c16 = *(str->fPos)++;
- *c32 = U16_GET_SUPPLEMENTARY(*c32, c16);
- isValidChar = true;
- }
- else {
- *c32 = U_EOF;
- }
- }
- else {
- isValidChar = true;
- }
- }
- return isValidChar;
- }
- U_CAPI UChar32 U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
- u_fgetcx(UFILE *f)
- {
- UChar32 ch;
- ufile_getch32(f, &ch);
- return ch;
- }
- U_CAPI UChar32 U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
- u_fungetc(UChar32 ch,
- UFILE *f)
- {
- u_localized_string *str;
- str = &f->str;
- /* if we're at the beginning of the buffer, sorry! */
- if (str->fPos == str->fBuffer
- || (U_IS_LEAD(ch) && (str->fPos - 1) == str->fBuffer))
- {
- ch = U_EOF;
- }
- else {
- /* otherwise, put the character back */
- /* Remember, read them back on in the reverse order. */
- if (U_IS_LEAD(ch)) {
- if (*--(str->fPos) != U16_TRAIL(ch)
- || *--(str->fPos) != U16_LEAD(ch))
- {
- ch = U_EOF;
- }
- }
- else if (*--(str->fPos) != ch) {
- ch = U_EOF;
- }
- }
- return ch;
- }
- U_CAPI int32_t U_EXPORT2 /* U_CAPI ... U_EXPORT2 added by Peter Kirk 17 Nov 2001 */
- u_file_read( char16_t *chars,
- int32_t count,
- UFILE *f)
- {
- int32_t dataSize;
- int32_t read = 0;
- u_localized_string *str = &f->str;
- do {
- /* determine the amount of data in the buffer */
- dataSize = (int32_t)(str->fLimit - str->fPos);
- if (dataSize <= 0) {
- /* fill the buffer */
- ufile_fill_uchar_buffer(f);
- dataSize = (int32_t)(str->fLimit - str->fPos);
- }
- /* Make sure that we don't read too much */
- if (dataSize > (count - read)) {
- dataSize = count - read;
- }
- /* copy the current data in the buffer */
- memcpy(chars + read, str->fPos, dataSize * sizeof(char16_t));
- /* update number of items read */
- read += dataSize;
- /* update the current buffer position */
- str->fPos += dataSize;
- }
- while (dataSize != 0 && read < count);
- return read;
- }
- #endif
|