123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482 |
- // © 2016 and later: Unicode, Inc. and others.
- // License & terms of use: http://www.unicode.org/copyright.html
- /*
- *****************************************************************************
- *
- * Copyright (C) 1998-2016, International Business Machines
- * Corporation and others. All Rights Reserved.
- *
- *****************************************************************************
- *
- * ucnv_err.c
- * Implements error behaviour functions called by T_UConverter_{from,to}Unicode
- *
- *
- * Change history:
- *
- * 06/29/2000 helena Major rewrite of the callback APIs.
- */
- #include "unicode/utypes.h"
- #if !UCONFIG_NO_CONVERSION
- #include "unicode/ucnv_err.h"
- #include "unicode/ucnv_cb.h"
- #include "ucnv_cnv.h"
- #include "cmemory.h"
- #include "unicode/ucnv.h"
- #include "ustrfmt.h"
- #define VALUE_STRING_LENGTH 48
- /*Magic # 32 = 4(number of char in value string) * 8(max number of bytes per char for any converter) */
- #define UNICODE_PERCENT_SIGN_CODEPOINT 0x0025
- #define UNICODE_U_CODEPOINT 0x0055
- #define UNICODE_X_CODEPOINT 0x0058
- #define UNICODE_RS_CODEPOINT 0x005C
- #define UNICODE_U_LOW_CODEPOINT 0x0075
- #define UNICODE_X_LOW_CODEPOINT 0x0078
- #define UNICODE_AMP_CODEPOINT 0x0026
- #define UNICODE_HASH_CODEPOINT 0x0023
- #define UNICODE_SEMICOLON_CODEPOINT 0x003B
- #define UNICODE_PLUS_CODEPOINT 0x002B
- #define UNICODE_LEFT_CURLY_CODEPOINT 0x007B
- #define UNICODE_RIGHT_CURLY_CODEPOINT 0x007D
- #define UNICODE_SPACE_CODEPOINT 0x0020
- #define UCNV_PRV_ESCAPE_ICU 0
- #define UCNV_PRV_ESCAPE_C 'C'
- #define UCNV_PRV_ESCAPE_XML_DEC 'D'
- #define UCNV_PRV_ESCAPE_XML_HEX 'X'
- #define UCNV_PRV_ESCAPE_JAVA 'J'
- #define UCNV_PRV_ESCAPE_UNICODE 'U'
- #define UCNV_PRV_ESCAPE_CSS2 'S'
- #define UCNV_PRV_STOP_ON_ILLEGAL 'i'
- /*
- * IS_DEFAULT_IGNORABLE_CODE_POINT
- * This is to check if a code point has the default ignorable unicode property.
- * As such, this list needs to be updated if the ignorable code point list ever
- * changes.
- * To avoid dependency on other code, this list is hard coded here.
- * When an ignorable code point is found and is unmappable, the default callbacks
- * will ignore them.
- * For a list of the default ignorable code points, use this link:
- * https://util.unicode.org/UnicodeJsps/list-unicodeset.jsp?a=%5B%3ADI%3A%5D&abb=on&g=&i=
- *
- * This list should be sync with the one in CharsetCallback.java
- */
- #define IS_DEFAULT_IGNORABLE_CODE_POINT(c) ( \
- (c == 0x00AD) || \
- (c == 0x034F) || \
- (c == 0x061C) || \
- (c == 0x115F) || \
- (c == 0x1160) || \
- (0x17B4 <= c && c <= 0x17B5) || \
- (0x180B <= c && c <= 0x180F) || \
- (0x200B <= c && c <= 0x200F) || \
- (0x202A <= c && c <= 0x202E) || \
- (0x2060 <= c && c <= 0x206F) || \
- (c == 0x3164) || \
- (0xFE00 <= c && c <= 0xFE0F) || \
- (c == 0xFEFF) || \
- (c == 0xFFA0) || \
- (0xFFF0 <= c && c <= 0xFFF8) || \
- (0x1BCA0 <= c && c <= 0x1BCA3) || \
- (0x1D173 <= c && c <= 0x1D17A) || \
- (0xE0000 <= c && c <= 0xE0FFF))
- /*Function Pointer STOPS at the ILLEGAL_SEQUENCE */
- U_CAPI void U_EXPORT2
- UCNV_FROM_U_CALLBACK_STOP (
- const void *context,
- UConverterFromUnicodeArgs *fromUArgs,
- const char16_t* codeUnits,
- int32_t length,
- UChar32 codePoint,
- UConverterCallbackReason reason,
- UErrorCode * err)
- {
- (void)context;
- (void)fromUArgs;
- (void)codeUnits;
- (void)length;
- if (reason == UCNV_UNASSIGNED && IS_DEFAULT_IGNORABLE_CODE_POINT(codePoint))
- {
- /*
- * Skip if the codepoint has unicode property of default ignorable.
- */
- *err = U_ZERO_ERROR;
- }
- /* the caller must have set the error code accordingly */
- }
- /*Function Pointer STOPS at the ILLEGAL_SEQUENCE */
- U_CAPI void U_EXPORT2
- UCNV_TO_U_CALLBACK_STOP (
- const void *context,
- UConverterToUnicodeArgs *toUArgs,
- const char* codePoints,
- int32_t length,
- UConverterCallbackReason reason,
- UErrorCode * err)
- {
- /* the caller must have set the error code accordingly */
- (void)context; (void)toUArgs; (void)codePoints; (void)length; (void)reason; (void)err;
- }
- U_CAPI void U_EXPORT2
- UCNV_FROM_U_CALLBACK_SKIP (
- const void *context,
- UConverterFromUnicodeArgs *fromUArgs,
- const char16_t* codeUnits,
- int32_t length,
- UChar32 codePoint,
- UConverterCallbackReason reason,
- UErrorCode * err)
- {
- (void)fromUArgs;
- (void)codeUnits;
- (void)length;
- if (reason <= UCNV_IRREGULAR)
- {
- if (reason == UCNV_UNASSIGNED && IS_DEFAULT_IGNORABLE_CODE_POINT(codePoint))
- {
- /*
- * Skip if the codepoint has unicode property of default ignorable.
- */
- *err = U_ZERO_ERROR;
- }
- else if (context == nullptr || (*((char*)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED))
- {
- *err = U_ZERO_ERROR;
- }
- /* else the caller must have set the error code accordingly. */
- }
- /* else ignore the reset, close and clone calls. */
- }
- U_CAPI void U_EXPORT2
- UCNV_FROM_U_CALLBACK_SUBSTITUTE (
- const void *context,
- UConverterFromUnicodeArgs *fromArgs,
- const char16_t* codeUnits,
- int32_t length,
- UChar32 codePoint,
- UConverterCallbackReason reason,
- UErrorCode * err)
- {
- (void)codeUnits;
- (void)length;
- if (reason <= UCNV_IRREGULAR)
- {
- if (reason == UCNV_UNASSIGNED && IS_DEFAULT_IGNORABLE_CODE_POINT(codePoint))
- {
- /*
- * Skip if the codepoint has unicode property of default ignorable.
- */
- *err = U_ZERO_ERROR;
- }
- else if (context == nullptr || (*((char*)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED))
- {
- *err = U_ZERO_ERROR;
- ucnv_cbFromUWriteSub(fromArgs, 0, err);
- }
- /* else the caller must have set the error code accordingly. */
- }
- /* else ignore the reset, close and clone calls. */
- }
- /*uses uprv_itou to get a unicode escape sequence of the offensive sequence,
- *uses a clean copy (resetted) of the converter, to convert that unicode
- *escape sequence to the target codepage (if conversion failure happens then
- *we revert to substituting with subchar)
- */
- U_CAPI void U_EXPORT2
- UCNV_FROM_U_CALLBACK_ESCAPE (
- const void *context,
- UConverterFromUnicodeArgs *fromArgs,
- const char16_t *codeUnits,
- int32_t length,
- UChar32 codePoint,
- UConverterCallbackReason reason,
- UErrorCode * err)
- {
- char16_t valueString[VALUE_STRING_LENGTH];
- int32_t valueStringLength = 0;
- int32_t i = 0;
- const char16_t *myValueSource = nullptr;
- UErrorCode err2 = U_ZERO_ERROR;
- UConverterFromUCallback original = nullptr;
- const void *originalContext;
- UConverterFromUCallback ignoredCallback = nullptr;
- const void *ignoredContext;
-
- if (reason > UCNV_IRREGULAR)
- {
- return;
- }
- else if (reason == UCNV_UNASSIGNED && IS_DEFAULT_IGNORABLE_CODE_POINT(codePoint))
- {
- /*
- * Skip if the codepoint has unicode property of default ignorable.
- */
- *err = U_ZERO_ERROR;
- return;
- }
- ucnv_setFromUCallBack (fromArgs->converter,
- (UConverterFromUCallback) UCNV_FROM_U_CALLBACK_SUBSTITUTE,
- nullptr,
- &original,
- &originalContext,
- &err2);
-
- if (U_FAILURE (err2))
- {
- *err = err2;
- return;
- }
- if(context==nullptr)
- {
- while (i < length)
- {
- valueString[valueStringLength++] = (char16_t) UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */
- valueString[valueStringLength++] = (char16_t) UNICODE_U_CODEPOINT; /* adding U */
- valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[i++], 16, 4);
- }
- }
- else
- {
- switch(*((char*)context))
- {
- case UCNV_PRV_ESCAPE_JAVA:
- while (i < length)
- {
- valueString[valueStringLength++] = (char16_t) UNICODE_RS_CODEPOINT; /* adding \ */
- valueString[valueStringLength++] = (char16_t) UNICODE_U_LOW_CODEPOINT; /* adding u */
- valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[i++], 16, 4);
- }
- break;
- case UCNV_PRV_ESCAPE_C:
- valueString[valueStringLength++] = (char16_t) UNICODE_RS_CODEPOINT; /* adding \ */
- if(length==2){
- valueString[valueStringLength++] = (char16_t) UNICODE_U_CODEPOINT; /* adding U */
- valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 8);
- }
- else{
- valueString[valueStringLength++] = (char16_t) UNICODE_U_LOW_CODEPOINT; /* adding u */
- valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 16, 4);
- }
- break;
- case UCNV_PRV_ESCAPE_XML_DEC:
- valueString[valueStringLength++] = (char16_t) UNICODE_AMP_CODEPOINT; /* adding & */
- valueString[valueStringLength++] = (char16_t) UNICODE_HASH_CODEPOINT; /* adding # */
- if(length==2){
- valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 10, 0);
- }
- else{
- valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 10, 0);
- }
- valueString[valueStringLength++] = (char16_t) UNICODE_SEMICOLON_CODEPOINT; /* adding ; */
- break;
- case UCNV_PRV_ESCAPE_XML_HEX:
- valueString[valueStringLength++] = (char16_t) UNICODE_AMP_CODEPOINT; /* adding & */
- valueString[valueStringLength++] = (char16_t) UNICODE_HASH_CODEPOINT; /* adding # */
- valueString[valueStringLength++] = (char16_t) UNICODE_X_LOW_CODEPOINT; /* adding x */
- if(length==2){
- valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 0);
- }
- else{
- valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 16, 0);
- }
- valueString[valueStringLength++] = (char16_t) UNICODE_SEMICOLON_CODEPOINT; /* adding ; */
- break;
- case UCNV_PRV_ESCAPE_UNICODE:
- valueString[valueStringLength++] = (char16_t) UNICODE_LEFT_CURLY_CODEPOINT; /* adding { */
- valueString[valueStringLength++] = (char16_t) UNICODE_U_CODEPOINT; /* adding U */
- valueString[valueStringLength++] = (char16_t) UNICODE_PLUS_CODEPOINT; /* adding + */
- if (length == 2) {
- valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 4);
- } else {
- valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[0], 16, 4);
- }
- valueString[valueStringLength++] = (char16_t) UNICODE_RIGHT_CURLY_CODEPOINT; /* adding } */
- break;
- case UCNV_PRV_ESCAPE_CSS2:
- valueString[valueStringLength++] = (char16_t) UNICODE_RS_CODEPOINT; /* adding \ */
- valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, codePoint, 16, 0);
- /* Always add space character, because the next character might be whitespace,
- which would erroneously be considered the termination of the escape sequence. */
- valueString[valueStringLength++] = (char16_t) UNICODE_SPACE_CODEPOINT;
- break;
- default:
- while (i < length)
- {
- valueString[valueStringLength++] = (char16_t) UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */
- valueString[valueStringLength++] = (char16_t) UNICODE_U_CODEPOINT; /* adding U */
- valueStringLength += uprv_itou (valueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint16_t)codeUnits[i++], 16, 4);
- }
- }
- }
- myValueSource = valueString;
- /* reset the error */
- *err = U_ZERO_ERROR;
- ucnv_cbFromUWriteUChars(fromArgs, &myValueSource, myValueSource+valueStringLength, 0, err);
- ucnv_setFromUCallBack (fromArgs->converter,
- original,
- originalContext,
- &ignoredCallback,
- &ignoredContext,
- &err2);
- if (U_FAILURE (err2))
- {
- *err = err2;
- return;
- }
- }
- U_CAPI void U_EXPORT2
- UCNV_TO_U_CALLBACK_SKIP (
- const void *context,
- UConverterToUnicodeArgs *toArgs,
- const char* codeUnits,
- int32_t length,
- UConverterCallbackReason reason,
- UErrorCode * err)
- {
- (void)toArgs;
- (void)codeUnits;
- (void)length;
- if (reason <= UCNV_IRREGULAR)
- {
- if (context == nullptr || (*((char*)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED))
- {
- *err = U_ZERO_ERROR;
- }
- /* else the caller must have set the error code accordingly. */
- }
- /* else ignore the reset, close and clone calls. */
- }
- U_CAPI void U_EXPORT2
- UCNV_TO_U_CALLBACK_SUBSTITUTE (
- const void *context,
- UConverterToUnicodeArgs *toArgs,
- const char* codeUnits,
- int32_t length,
- UConverterCallbackReason reason,
- UErrorCode * err)
- {
- (void)codeUnits;
- (void)length;
- if (reason <= UCNV_IRREGULAR)
- {
- if (context == nullptr || (*((char*)context) == UCNV_PRV_STOP_ON_ILLEGAL && reason == UCNV_UNASSIGNED))
- {
- *err = U_ZERO_ERROR;
- ucnv_cbToUWriteSub(toArgs,0,err);
- }
- /* else the caller must have set the error code accordingly. */
- }
- /* else ignore the reset, close and clone calls. */
- }
- /*uses uprv_itou to get a unicode escape sequence of the offensive sequence,
- *and uses that as the substitution sequence
- */
- U_CAPI void U_EXPORT2
- UCNV_TO_U_CALLBACK_ESCAPE (
- const void *context,
- UConverterToUnicodeArgs *toArgs,
- const char* codeUnits,
- int32_t length,
- UConverterCallbackReason reason,
- UErrorCode * err)
- {
- char16_t uniValueString[VALUE_STRING_LENGTH];
- int32_t valueStringLength = 0;
- int32_t i = 0;
- if (reason > UCNV_IRREGULAR)
- {
- return;
- }
- if(context==nullptr)
- {
- while (i < length)
- {
- uniValueString[valueStringLength++] = (char16_t) UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */
- uniValueString[valueStringLength++] = (char16_t) UNICODE_X_CODEPOINT; /* adding X */
- valueStringLength += uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t) codeUnits[i++], 16, 2);
- }
- }
- else
- {
- switch(*((char*)context))
- {
- case UCNV_PRV_ESCAPE_XML_DEC:
- while (i < length)
- {
- uniValueString[valueStringLength++] = (char16_t) UNICODE_AMP_CODEPOINT; /* adding & */
- uniValueString[valueStringLength++] = (char16_t) UNICODE_HASH_CODEPOINT; /* adding # */
- valueStringLength += uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t)codeUnits[i++], 10, 0);
- uniValueString[valueStringLength++] = (char16_t) UNICODE_SEMICOLON_CODEPOINT; /* adding ; */
- }
- break;
- case UCNV_PRV_ESCAPE_XML_HEX:
- while (i < length)
- {
- uniValueString[valueStringLength++] = (char16_t) UNICODE_AMP_CODEPOINT; /* adding & */
- uniValueString[valueStringLength++] = (char16_t) UNICODE_HASH_CODEPOINT; /* adding # */
- uniValueString[valueStringLength++] = (char16_t) UNICODE_X_LOW_CODEPOINT; /* adding x */
- valueStringLength += uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t)codeUnits[i++], 16, 0);
- uniValueString[valueStringLength++] = (char16_t) UNICODE_SEMICOLON_CODEPOINT; /* adding ; */
- }
- break;
- case UCNV_PRV_ESCAPE_C:
- while (i < length)
- {
- uniValueString[valueStringLength++] = (char16_t) UNICODE_RS_CODEPOINT; /* adding \ */
- uniValueString[valueStringLength++] = (char16_t) UNICODE_X_LOW_CODEPOINT; /* adding x */
- valueStringLength += uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t)codeUnits[i++], 16, 2);
- }
- break;
- default:
- while (i < length)
- {
- uniValueString[valueStringLength++] = (char16_t) UNICODE_PERCENT_SIGN_CODEPOINT; /* adding % */
- uniValueString[valueStringLength++] = (char16_t) UNICODE_X_CODEPOINT; /* adding X */
- uprv_itou (uniValueString + valueStringLength, VALUE_STRING_LENGTH - valueStringLength, (uint8_t) codeUnits[i++], 16, 2);
- valueStringLength += 2;
- }
- }
- }
- /* reset the error */
- *err = U_ZERO_ERROR;
- ucnv_cbToUWriteUChars(toArgs, uniValueString, valueStringLength, 0, err);
- }
- #endif
|