123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144 |
- // © 2016 and later: Unicode, Inc. and others.
- // License & terms of use: http://www.unicode.org/copyright.html
- /*
- *******************************************************************************
- * Copyright (C) 2013-2014, International Business Machines
- * Corporation and others. All Rights Reserved.
- *******************************************************************************
- * collationsets.h
- *
- * created on: 2013feb09
- * created by: Markus W. Scherer
- */
- #ifndef __COLLATIONSETS_H__
- #define __COLLATIONSETS_H__
- #include "unicode/utypes.h"
- #if !UCONFIG_NO_COLLATION
- #include "unicode/uniset.h"
- #include "collation.h"
- U_NAMESPACE_BEGIN
- struct CollationData;
- /**
- * Finds the set of characters and strings that sort differently in the tailoring
- * from the base data.
- *
- * Every mapping in the tailoring needs to be compared to the base,
- * because some mappings are copied for optimization, and
- * all contractions for a character are copied if any contractions for that character
- * are added, modified or removed.
- *
- * It might be simpler to re-parse the rule string, but:
- * - That would require duplicating some of the from-rules builder code.
- * - That would make the runtime code depend on the builder.
- * - That would only work if we have the rule string, and we allow users to
- * omit the rule string from data files.
- */
- class TailoredSet : public UMemory {
- public:
- TailoredSet(UnicodeSet *t)
- : data(nullptr), baseData(nullptr),
- tailored(t),
- suffix(nullptr),
- errorCode(U_ZERO_ERROR) {}
- void forData(const CollationData *d, UErrorCode &errorCode);
- /**
- * @return U_SUCCESS(errorCode) in C++, void in Java
- * @internal only public for access by callback
- */
- UBool handleCE32(UChar32 start, UChar32 end, uint32_t ce32);
- private:
- void compare(UChar32 c, uint32_t ce32, uint32_t baseCE32);
- void comparePrefixes(UChar32 c, const char16_t *p, const char16_t *q);
- void compareContractions(UChar32 c, const char16_t *p, const char16_t *q);
- void addPrefixes(const CollationData *d, UChar32 c, const char16_t *p);
- void addPrefix(const CollationData *d, const UnicodeString &pfx, UChar32 c, uint32_t ce32);
- void addContractions(UChar32 c, const char16_t *p);
- void addSuffix(UChar32 c, const UnicodeString &sfx);
- void add(UChar32 c);
- /** Prefixes are reversed in the data structure. */
- void setPrefix(const UnicodeString &pfx) {
- unreversedPrefix = pfx;
- unreversedPrefix.reverse();
- }
- void resetPrefix() {
- unreversedPrefix.remove();
- }
- const CollationData *data;
- const CollationData *baseData;
- UnicodeSet *tailored;
- UnicodeString unreversedPrefix;
- const UnicodeString *suffix;
- UErrorCode errorCode;
- };
- class ContractionsAndExpansions : public UMemory {
- public:
- class CESink : public UMemory {
- public:
- virtual ~CESink();
- virtual void handleCE(int64_t ce) = 0;
- virtual void handleExpansion(const int64_t ces[], int32_t length) = 0;
- };
- ContractionsAndExpansions(UnicodeSet *con, UnicodeSet *exp, CESink *s, UBool prefixes)
- : data(nullptr),
- contractions(con), expansions(exp),
- sink(s),
- addPrefixes(prefixes),
- checkTailored(0),
- suffix(nullptr),
- errorCode(U_ZERO_ERROR) {}
- void forData(const CollationData *d, UErrorCode &errorCode);
- void forCodePoint(const CollationData *d, UChar32 c, UErrorCode &ec);
- // all following: @internal, only public for access by callback
- void handleCE32(UChar32 start, UChar32 end, uint32_t ce32);
- void handlePrefixes(UChar32 start, UChar32 end, uint32_t ce32);
- void handleContractions(UChar32 start, UChar32 end, uint32_t ce32);
- void addExpansions(UChar32 start, UChar32 end);
- void addStrings(UChar32 start, UChar32 end, UnicodeSet *set);
- /** Prefixes are reversed in the data structure. */
- void setPrefix(const UnicodeString &pfx) {
- unreversedPrefix = pfx;
- unreversedPrefix.reverse();
- }
- void resetPrefix() {
- unreversedPrefix.remove();
- }
- const CollationData *data;
- UnicodeSet *contractions;
- UnicodeSet *expansions;
- CESink *sink;
- UBool addPrefixes;
- int8_t checkTailored; // -1: collected tailored +1: exclude tailored
- UnicodeSet tailored;
- UnicodeSet ranges;
- UnicodeString unreversedPrefix;
- const UnicodeString *suffix;
- int64_t ces[Collation::MAX_EXPANSION_LENGTH];
- UErrorCode errorCode;
- };
- U_NAMESPACE_END
- #endif // !UCONFIG_NO_COLLATION
- #endif // __COLLATIONSETS_H__
|