123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627 |
- // © 2016 and later: Unicode, Inc. and others.
- // License & terms of use: http://www.unicode.org/copyright.html
- /*
- *******************************************************************************
- * Copyright (C) 1996-2015, International Business Machines
- * Corporation and others. All Rights Reserved.
- *******************************************************************************
- * file name: ucol.cpp
- * encoding: UTF-8
- * tab size: 8 (not used)
- * indentation:4
- *
- * Modification history
- * Date Name Comments
- * 1996-1999 various members of ICU team maintained C API for collation framework
- * 02/16/2001 synwee Added internal method getPrevSpecialCE
- * 03/01/2001 synwee Added maxexpansion functionality.
- * 03/16/2001 weiv Collation framework is rewritten in C and made UCA compliant
- * 2012-2014 markus Rewritten in C++ again.
- */
- #include "unicode/utypes.h"
- #if !UCONFIG_NO_COLLATION
- #include "unicode/coll.h"
- #include "unicode/tblcoll.h"
- #include "unicode/bytestream.h"
- #include "unicode/coleitr.h"
- #include "unicode/ucoleitr.h"
- #include "unicode/ustring.h"
- #include "cmemory.h"
- #include "collation.h"
- #include "cstring.h"
- #include "putilimp.h"
- #include "uassert.h"
- #include "utracimp.h"
- U_NAMESPACE_USE
- U_CAPI UCollator* U_EXPORT2
- ucol_openBinary(const uint8_t *bin, int32_t length,
- const UCollator *base,
- UErrorCode *status)
- {
- if(U_FAILURE(*status)) { return nullptr; }
- RuleBasedCollator *coll = new RuleBasedCollator(
- bin, length,
- RuleBasedCollator::rbcFromUCollator(base),
- *status);
- if(coll == nullptr) {
- *status = U_MEMORY_ALLOCATION_ERROR;
- return nullptr;
- }
- if(U_FAILURE(*status)) {
- delete coll;
- return nullptr;
- }
- return coll->toUCollator();
- }
- U_CAPI int32_t U_EXPORT2
- ucol_cloneBinary(const UCollator *coll,
- uint8_t *buffer, int32_t capacity,
- UErrorCode *status)
- {
- if(U_FAILURE(*status)) {
- return 0;
- }
- const RuleBasedCollator *rbc = RuleBasedCollator::rbcFromUCollator(coll);
- if(rbc == nullptr && coll != nullptr) {
- *status = U_UNSUPPORTED_ERROR;
- return 0;
- }
- return rbc->cloneBinary(buffer, capacity, *status);
- }
- U_CAPI UCollator* U_EXPORT2
- ucol_safeClone(const UCollator *coll, void * /*stackBuffer*/, int32_t * pBufferSize, UErrorCode *status)
- {
- if (status == nullptr || U_FAILURE(*status)){
- return nullptr;
- }
- if (coll == nullptr) {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- return nullptr;
- }
- if (pBufferSize != nullptr) {
- int32_t inputSize = *pBufferSize;
- *pBufferSize = 1;
- if (inputSize == 0) {
- return nullptr; // preflighting for deprecated functionality
- }
- }
- Collator *newColl = Collator::fromUCollator(coll)->clone();
- if (newColl == nullptr) {
- *status = U_MEMORY_ALLOCATION_ERROR;
- return nullptr;
- } else if (pBufferSize != nullptr) {
- *status = U_SAFECLONE_ALLOCATED_WARNING;
- }
- return newColl->toUCollator();
- }
- U_CAPI UCollator* U_EXPORT2
- ucol_clone(const UCollator *coll, UErrorCode *status)
- {
- return ucol_safeClone(coll, nullptr, nullptr, status);
- }
- U_CAPI void U_EXPORT2
- ucol_close(UCollator *coll)
- {
- UTRACE_ENTRY_OC(UTRACE_UCOL_CLOSE);
- UTRACE_DATA1(UTRACE_INFO, "coll = %p", coll);
- if(coll != nullptr) {
- delete Collator::fromUCollator(coll);
- }
- UTRACE_EXIT();
- }
- U_CAPI int32_t U_EXPORT2
- ucol_mergeSortkeys(const uint8_t *src1, int32_t src1Length,
- const uint8_t *src2, int32_t src2Length,
- uint8_t *dest, int32_t destCapacity) {
- /* check arguments */
- if( src1==nullptr || src1Length<-1 || src1Length==0 || (src1Length>0 && src1[src1Length-1]!=0) ||
- src2==nullptr || src2Length<-1 || src2Length==0 || (src2Length>0 && src2[src2Length-1]!=0) ||
- destCapacity<0 || (destCapacity>0 && dest==nullptr)
- ) {
- /* error, attempt to write a zero byte and return 0 */
- if(dest!=nullptr && destCapacity>0) {
- *dest=0;
- }
- return 0;
- }
- /* check lengths and capacity */
- if(src1Length<0) {
- src1Length=(int32_t)uprv_strlen((const char *)src1)+1;
- }
- if(src2Length<0) {
- src2Length=(int32_t)uprv_strlen((const char *)src2)+1;
- }
- int32_t destLength=src1Length+src2Length;
- if(destLength>destCapacity) {
- /* the merged sort key does not fit into the destination */
- return destLength;
- }
- /* merge the sort keys with the same number of levels */
- uint8_t *p=dest;
- for(;;) {
- /* copy level from src1 not including 00 or 01 */
- uint8_t b;
- while((b=*src1)>=2) {
- ++src1;
- *p++=b;
- }
- /* add a 02 merge separator */
- *p++=2;
- /* copy level from src2 not including 00 or 01 */
- while((b=*src2)>=2) {
- ++src2;
- *p++=b;
- }
- /* if both sort keys have another level, then add a 01 level separator and continue */
- if(*src1==1 && *src2==1) {
- ++src1;
- ++src2;
- *p++=1;
- } else {
- break;
- }
- }
- /*
- * here, at least one sort key is finished now, but the other one
- * might have some contents left from containing more levels;
- * that contents is just appended to the result
- */
- if(*src1!=0) {
- /* src1 is not finished, therefore *src2==0, and src1 is appended */
- src2=src1;
- }
- /* append src2, "the other, unfinished sort key" */
- while((*p++=*src2++)!=0) {}
- /* the actual length might be less than destLength if either sort key contained illegally embedded zero bytes */
- return (int32_t)(p-dest);
- }
- U_CAPI int32_t U_EXPORT2
- ucol_getSortKey(const UCollator *coll,
- const char16_t *source,
- int32_t sourceLength,
- uint8_t *result,
- int32_t resultLength)
- {
- UTRACE_ENTRY(UTRACE_UCOL_GET_SORTKEY);
- if (UTRACE_LEVEL(UTRACE_VERBOSE)) {
- UTRACE_DATA3(UTRACE_VERBOSE, "coll=%p, source string = %vh ", coll, source,
- ((sourceLength==-1 && source!=nullptr) ? u_strlen(source) : sourceLength));
- }
- int32_t keySize = Collator::fromUCollator(coll)->
- getSortKey(source, sourceLength, result, resultLength);
- UTRACE_DATA2(UTRACE_VERBOSE, "Sort Key = %vb", result, keySize);
- UTRACE_EXIT_VALUE(keySize);
- return keySize;
- }
- U_CAPI int32_t U_EXPORT2
- ucol_nextSortKeyPart(const UCollator *coll,
- UCharIterator *iter,
- uint32_t state[2],
- uint8_t *dest, int32_t count,
- UErrorCode *status)
- {
- /* error checking */
- if(status==nullptr || U_FAILURE(*status)) {
- return 0;
- }
- UTRACE_ENTRY(UTRACE_UCOL_NEXTSORTKEYPART);
- UTRACE_DATA6(UTRACE_VERBOSE, "coll=%p, iter=%p, state=%d %d, dest=%p, count=%d",
- coll, iter, state[0], state[1], dest, count);
- int32_t i = Collator::fromUCollator(coll)->
- internalNextSortKeyPart(iter, state, dest, count, *status);
- // Return number of meaningful sortkey bytes.
- UTRACE_DATA4(UTRACE_VERBOSE, "dest = %vb, state=%d %d",
- dest,i, state[0], state[1]);
- UTRACE_EXIT_VALUE_STATUS(i, *status);
- return i;
- }
- /**
- * Produce a bound for a given sortkey and a number of levels.
- */
- U_CAPI int32_t U_EXPORT2
- ucol_getBound(const uint8_t *source,
- int32_t sourceLength,
- UColBoundMode boundType,
- uint32_t noOfLevels,
- uint8_t *result,
- int32_t resultLength,
- UErrorCode *status)
- {
- // consistency checks
- if(status == nullptr || U_FAILURE(*status)) {
- return 0;
- }
- if(source == nullptr) {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
- int32_t sourceIndex = 0;
- // Scan the string until we skip enough of the key OR reach the end of the key
- do {
- sourceIndex++;
- if(source[sourceIndex] == Collation::LEVEL_SEPARATOR_BYTE) {
- noOfLevels--;
- }
- } while (noOfLevels > 0
- && (source[sourceIndex] != 0 || sourceIndex < sourceLength));
- if((source[sourceIndex] == 0 || sourceIndex == sourceLength)
- && noOfLevels > 0) {
- *status = U_SORT_KEY_TOO_SHORT_WARNING;
- }
- // READ ME: this code assumes that the values for boundType
- // enum will not changes. They are set so that the enum value
- // corresponds to the number of extra bytes each bound type
- // needs.
- if(result != nullptr && resultLength >= sourceIndex+boundType) {
- uprv_memcpy(result, source, sourceIndex);
- switch(boundType) {
- // Lower bound just gets terminated. No extra bytes
- case UCOL_BOUND_LOWER: // = 0
- break;
- // Upper bound needs one extra byte
- case UCOL_BOUND_UPPER: // = 1
- result[sourceIndex++] = 2;
- break;
- // Upper long bound needs two extra bytes
- case UCOL_BOUND_UPPER_LONG: // = 2
- result[sourceIndex++] = 0xFF;
- result[sourceIndex++] = 0xFF;
- break;
- default:
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
- result[sourceIndex++] = 0;
- return sourceIndex;
- } else {
- return sourceIndex+boundType+1;
- }
- }
- U_CAPI void U_EXPORT2
- ucol_setMaxVariable(UCollator *coll, UColReorderCode group, UErrorCode *pErrorCode) {
- if(U_FAILURE(*pErrorCode)) { return; }
- Collator::fromUCollator(coll)->setMaxVariable(group, *pErrorCode);
- }
- U_CAPI UColReorderCode U_EXPORT2
- ucol_getMaxVariable(const UCollator *coll) {
- return Collator::fromUCollator(coll)->getMaxVariable();
- }
- U_CAPI uint32_t U_EXPORT2
- ucol_setVariableTop(UCollator *coll, const char16_t *varTop, int32_t len, UErrorCode *status) {
- if(U_FAILURE(*status) || coll == nullptr) {
- return 0;
- }
- return Collator::fromUCollator(coll)->setVariableTop(varTop, len, *status);
- }
- U_CAPI uint32_t U_EXPORT2 ucol_getVariableTop(const UCollator *coll, UErrorCode *status) {
- if(U_FAILURE(*status) || coll == nullptr) {
- return 0;
- }
- return Collator::fromUCollator(coll)->getVariableTop(*status);
- }
- U_CAPI void U_EXPORT2
- ucol_restoreVariableTop(UCollator *coll, const uint32_t varTop, UErrorCode *status) {
- if(U_FAILURE(*status) || coll == nullptr) {
- return;
- }
- Collator::fromUCollator(coll)->setVariableTop(varTop, *status);
- }
- U_CAPI void U_EXPORT2
- ucol_setAttribute(UCollator *coll, UColAttribute attr, UColAttributeValue value, UErrorCode *status) {
- if(U_FAILURE(*status) || coll == nullptr) {
- return;
- }
- Collator::fromUCollator(coll)->setAttribute(attr, value, *status);
- }
- U_CAPI UColAttributeValue U_EXPORT2
- ucol_getAttribute(const UCollator *coll, UColAttribute attr, UErrorCode *status) {
- if(U_FAILURE(*status) || coll == nullptr) {
- return UCOL_DEFAULT;
- }
- return Collator::fromUCollator(coll)->getAttribute(attr, *status);
- }
- U_CAPI void U_EXPORT2
- ucol_setStrength( UCollator *coll,
- UCollationStrength strength)
- {
- UErrorCode status = U_ZERO_ERROR;
- ucol_setAttribute(coll, UCOL_STRENGTH, strength, &status);
- }
- U_CAPI UCollationStrength U_EXPORT2
- ucol_getStrength(const UCollator *coll)
- {
- UErrorCode status = U_ZERO_ERROR;
- return ucol_getAttribute(coll, UCOL_STRENGTH, &status);
- }
- U_CAPI int32_t U_EXPORT2
- ucol_getReorderCodes(const UCollator *coll,
- int32_t *dest,
- int32_t destCapacity,
- UErrorCode *status) {
- if (U_FAILURE(*status)) {
- return 0;
- }
- return Collator::fromUCollator(coll)->getReorderCodes(dest, destCapacity, *status);
- }
- U_CAPI void U_EXPORT2
- ucol_setReorderCodes(UCollator* coll,
- const int32_t* reorderCodes,
- int32_t reorderCodesLength,
- UErrorCode *status) {
- if (U_FAILURE(*status)) {
- return;
- }
- Collator::fromUCollator(coll)->setReorderCodes(reorderCodes, reorderCodesLength, *status);
- }
- U_CAPI int32_t U_EXPORT2
- ucol_getEquivalentReorderCodes(int32_t reorderCode,
- int32_t* dest,
- int32_t destCapacity,
- UErrorCode *pErrorCode) {
- return Collator::getEquivalentReorderCodes(reorderCode, dest, destCapacity, *pErrorCode);
- }
- U_CAPI void U_EXPORT2
- ucol_getVersion(const UCollator* coll,
- UVersionInfo versionInfo)
- {
- Collator::fromUCollator(coll)->getVersion(versionInfo);
- }
- U_CAPI UCollationResult U_EXPORT2
- ucol_strcollIter( const UCollator *coll,
- UCharIterator *sIter,
- UCharIterator *tIter,
- UErrorCode *status)
- {
- if(!status || U_FAILURE(*status)) {
- return UCOL_EQUAL;
- }
- UTRACE_ENTRY(UTRACE_UCOL_STRCOLLITER);
- UTRACE_DATA3(UTRACE_VERBOSE, "coll=%p, sIter=%p, tIter=%p", coll, sIter, tIter);
- if(sIter == nullptr || tIter == nullptr || coll == nullptr) {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- UTRACE_EXIT_VALUE_STATUS(UCOL_EQUAL, *status);
- return UCOL_EQUAL;
- }
- UCollationResult result = Collator::fromUCollator(coll)->compare(*sIter, *tIter, *status);
- UTRACE_EXIT_VALUE_STATUS(result, *status);
- return result;
- }
- /* */
- /* ucol_strcoll Main public API string comparison function */
- /* */
- U_CAPI UCollationResult U_EXPORT2
- ucol_strcoll( const UCollator *coll,
- const char16_t *source,
- int32_t sourceLength,
- const char16_t *target,
- int32_t targetLength)
- {
- UTRACE_ENTRY(UTRACE_UCOL_STRCOLL);
- if (UTRACE_LEVEL(UTRACE_VERBOSE)) {
- UTRACE_DATA3(UTRACE_VERBOSE, "coll=%p, source=%p, target=%p", coll, source, target);
- UTRACE_DATA2(UTRACE_VERBOSE, "source string = %vh ", source, sourceLength);
- UTRACE_DATA2(UTRACE_VERBOSE, "target string = %vh ", target, targetLength);
- }
- UErrorCode status = U_ZERO_ERROR;
- UCollationResult returnVal = Collator::fromUCollator(coll)->
- compare(source, sourceLength, target, targetLength, status);
- UTRACE_EXIT_VALUE_STATUS(returnVal, status);
- return returnVal;
- }
- U_CAPI UCollationResult U_EXPORT2
- ucol_strcollUTF8(
- const UCollator *coll,
- const char *source,
- int32_t sourceLength,
- const char *target,
- int32_t targetLength,
- UErrorCode *status)
- {
- UTRACE_ENTRY(UTRACE_UCOL_STRCOLLUTF8);
- if (UTRACE_LEVEL(UTRACE_VERBOSE)) {
- UTRACE_DATA3(UTRACE_VERBOSE, "coll=%p, source=%p, target=%p", coll, source, target);
- UTRACE_DATA2(UTRACE_VERBOSE, "source string = %vb ", source, sourceLength);
- UTRACE_DATA2(UTRACE_VERBOSE, "target string = %vb ", target, targetLength);
- }
- if (U_FAILURE(*status)) {
- /* do nothing */
- UTRACE_EXIT_VALUE_STATUS(UCOL_EQUAL, *status);
- return UCOL_EQUAL;
- }
- UCollationResult returnVal = Collator::fromUCollator(coll)->internalCompareUTF8(
- source, sourceLength, target, targetLength, *status);
- UTRACE_EXIT_VALUE_STATUS(returnVal, *status);
- return returnVal;
- }
- /* convenience function for comparing strings */
- U_CAPI UBool U_EXPORT2
- ucol_greater( const UCollator *coll,
- const char16_t *source,
- int32_t sourceLength,
- const char16_t *target,
- int32_t targetLength)
- {
- return (ucol_strcoll(coll, source, sourceLength, target, targetLength)
- == UCOL_GREATER);
- }
- /* convenience function for comparing strings */
- U_CAPI UBool U_EXPORT2
- ucol_greaterOrEqual( const UCollator *coll,
- const char16_t *source,
- int32_t sourceLength,
- const char16_t *target,
- int32_t targetLength)
- {
- return (ucol_strcoll(coll, source, sourceLength, target, targetLength)
- != UCOL_LESS);
- }
- /* convenience function for comparing strings */
- U_CAPI UBool U_EXPORT2
- ucol_equal( const UCollator *coll,
- const char16_t *source,
- int32_t sourceLength,
- const char16_t *target,
- int32_t targetLength)
- {
- return (ucol_strcoll(coll, source, sourceLength, target, targetLength)
- == UCOL_EQUAL);
- }
- U_CAPI void U_EXPORT2
- ucol_getUCAVersion(const UCollator* coll, UVersionInfo info) {
- const Collator *c = Collator::fromUCollator(coll);
- if(c != nullptr) {
- UVersionInfo v;
- c->getVersion(v);
- // Note: This is tied to how the current implementation encodes the UCA version
- // in the overall getVersion().
- // Alternatively, we could load the root collator and get at lower-level data from there.
- // Either way, it will reflect the input collator's UCA version only
- // if it is a known implementation.
- // It would be cleaner to make this a virtual Collator method.
- info[0] = v[1] >> 3;
- info[1] = v[1] & 7;
- info[2] = v[2] >> 6;
- info[3] = 0;
- }
- }
- U_CAPI const char16_t * U_EXPORT2
- ucol_getRules(const UCollator *coll, int32_t *length) {
- const RuleBasedCollator *rbc = RuleBasedCollator::rbcFromUCollator(coll);
- // OK to crash if coll==nullptr: We do not want to check "this" pointers.
- if(rbc != nullptr || coll == nullptr) {
- const UnicodeString &rules = rbc->getRules();
- U_ASSERT(rules.getBuffer()[rules.length()] == 0);
- *length = rules.length();
- return rules.getBuffer();
- }
- static const char16_t _NUL = 0;
- *length = 0;
- return &_NUL;
- }
- U_CAPI int32_t U_EXPORT2
- ucol_getRulesEx(const UCollator *coll, UColRuleOption delta, char16_t *buffer, int32_t bufferLen) {
- UnicodeString rules;
- const RuleBasedCollator *rbc = RuleBasedCollator::rbcFromUCollator(coll);
- if(rbc != nullptr || coll == nullptr) {
- rbc->getRules(delta, rules);
- }
- if(buffer != nullptr && bufferLen > 0) {
- UErrorCode errorCode = U_ZERO_ERROR;
- return rules.extract(buffer, bufferLen, errorCode);
- } else {
- return rules.length();
- }
- }
- U_CAPI const char * U_EXPORT2
- ucol_getLocale(const UCollator *coll, ULocDataLocaleType type, UErrorCode *status) {
- return ucol_getLocaleByType(coll, type, status);
- }
- U_CAPI const char * U_EXPORT2
- ucol_getLocaleByType(const UCollator *coll, ULocDataLocaleType type, UErrorCode *status) {
- if(U_FAILURE(*status)) {
- return nullptr;
- }
- UTRACE_ENTRY(UTRACE_UCOL_GETLOCALE);
- UTRACE_DATA1(UTRACE_INFO, "coll=%p", coll);
- const char *result;
- const RuleBasedCollator *rbc = RuleBasedCollator::rbcFromUCollator(coll);
- if(rbc == nullptr && coll != nullptr) {
- *status = U_UNSUPPORTED_ERROR;
- result = nullptr;
- } else {
- result = rbc->internalGetLocaleID(type, *status);
- }
- UTRACE_DATA1(UTRACE_INFO, "result = %s", result);
- UTRACE_EXIT_STATUS(*status);
- return result;
- }
- U_CAPI USet * U_EXPORT2
- ucol_getTailoredSet(const UCollator *coll, UErrorCode *status) {
- if(U_FAILURE(*status)) {
- return nullptr;
- }
- UnicodeSet *set = Collator::fromUCollator(coll)->getTailoredSet(*status);
- if(U_FAILURE(*status)) {
- delete set;
- return nullptr;
- }
- return set->toUSet();
- }
- U_CAPI UBool U_EXPORT2
- ucol_equals(const UCollator *source, const UCollator *target) {
- return source == target ||
- (*Collator::fromUCollator(source)) == (*Collator::fromUCollator(target));
- }
- #endif /* #if !UCONFIG_NO_COLLATION */
|