|
@@ -64,6 +64,7 @@ U_NAMESPACE_BEGIN
|
|
|
|
|
|
UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CanonicalIterator)
|
|
|
|
|
|
+
|
|
|
/**
|
|
|
*@param source string to get results for
|
|
|
*/
|
|
@@ -73,10 +74,10 @@ CanonicalIterator::CanonicalIterator(const UnicodeString &sourceStr, UErrorCode
|
|
|
pieces_lengths(nullptr),
|
|
|
current(nullptr),
|
|
|
current_length(0),
|
|
|
- nfd(*Normalizer2::getNFDInstance(status)),
|
|
|
- nfcImpl(*Normalizer2Factory::getNFCImpl(status))
|
|
|
+ nfd(Normalizer2::getNFDInstance(status)),
|
|
|
+ nfcImpl(Normalizer2Factory::getNFCImpl(status))
|
|
|
{
|
|
|
- if(U_SUCCESS(status) && nfcImpl.ensureCanonIterData(status)) {
|
|
|
+ if(U_SUCCESS(status) && nfcImpl->ensureCanonIterData(status)) {
|
|
|
setSource(sourceStr, status);
|
|
|
}
|
|
|
}
|
|
@@ -172,7 +173,7 @@ void CanonicalIterator::setSource(const UnicodeString &newSource, UErrorCode &st
|
|
|
int32_t i = 0;
|
|
|
UnicodeString *list = nullptr;
|
|
|
|
|
|
- nfd.normalize(newSource, source, status);
|
|
|
+ nfd->normalize(newSource, source, status);
|
|
|
if(U_FAILURE(status)) {
|
|
|
return;
|
|
|
}
|
|
@@ -194,7 +195,7 @@ void CanonicalIterator::setSource(const UnicodeString &newSource, UErrorCode &st
|
|
|
current[0] = 0;
|
|
|
pieces[0] = new UnicodeString[1];
|
|
|
pieces_lengths[0] = 1;
|
|
|
- if (pieces[0] == 0) {
|
|
|
+ if (pieces[0] == nullptr) {
|
|
|
status = U_MEMORY_ALLOCATION_ERROR;
|
|
|
goto CleanPartialInitialization;
|
|
|
}
|
|
@@ -203,7 +204,7 @@ void CanonicalIterator::setSource(const UnicodeString &newSource, UErrorCode &st
|
|
|
|
|
|
|
|
|
list = new UnicodeString[source.length()];
|
|
|
- if (list == 0) {
|
|
|
+ if (list == nullptr) {
|
|
|
status = U_MEMORY_ALLOCATION_ERROR;
|
|
|
goto CleanPartialInitialization;
|
|
|
}
|
|
@@ -219,7 +220,7 @@ void CanonicalIterator::setSource(const UnicodeString &newSource, UErrorCode &st
|
|
|
// on the NFD form - see above).
|
|
|
for (; i < source.length(); i += U16_LENGTH(cp)) {
|
|
|
cp = source.char32At(i);
|
|
|
- if (nfcImpl.isCanonSegmentStarter(cp)) {
|
|
|
+ if (nfcImpl->isCanonSegmentStarter(cp)) {
|
|
|
source.extract(start, i-start, list[list_length++]); // add up to i
|
|
|
start = i;
|
|
|
}
|
|
@@ -252,9 +253,7 @@ void CanonicalIterator::setSource(const UnicodeString &newSource, UErrorCode &st
|
|
|
return;
|
|
|
// Common section to cleanup all local variables and reset object variables.
|
|
|
CleanPartialInitialization:
|
|
|
- if (list != nullptr) {
|
|
|
- delete[] list;
|
|
|
- }
|
|
|
+ delete[] list;
|
|
|
cleanPieces();
|
|
|
}
|
|
|
|
|
@@ -264,10 +263,19 @@ CleanPartialInitialization:
|
|
|
* @param source the string to find permutations for
|
|
|
* @return the results in a set.
|
|
|
*/
|
|
|
-void U_EXPORT2 CanonicalIterator::permute(UnicodeString &source, UBool skipZeros, Hashtable *result, UErrorCode &status) {
|
|
|
+void U_EXPORT2 CanonicalIterator::permute(UnicodeString &source, UBool skipZeros, Hashtable *result, UErrorCode &status, int32_t depth) {
|
|
|
if(U_FAILURE(status)) {
|
|
|
return;
|
|
|
}
|
|
|
+ // To avoid infinity loop caused by permute, we limit the depth of recursive
|
|
|
+ // call to permute and return U_UNSUPPORTED_ERROR.
|
|
|
+ // We know in some unit test we need at least 4. Set to 8 just in case some
|
|
|
+ // unforseen use cases.
|
|
|
+ constexpr int32_t kPermuteDepthLimit = 8;
|
|
|
+ if (depth > kPermuteDepthLimit) {
|
|
|
+ status = U_UNSUPPORTED_ERROR;
|
|
|
+ return;
|
|
|
+ }
|
|
|
//if (PROGRESS) printf("Permute: %s\n", UToS(Tr(source)));
|
|
|
int32_t i = 0;
|
|
|
|
|
@@ -277,7 +285,7 @@ void U_EXPORT2 CanonicalIterator::permute(UnicodeString &source, UBool skipZeros
|
|
|
if (source.length() <= 2 && source.countChar32() <= 1) {
|
|
|
UnicodeString *toPut = new UnicodeString(source);
|
|
|
/* test for nullptr */
|
|
|
- if (toPut == 0) {
|
|
|
+ if (toPut == nullptr) {
|
|
|
status = U_MEMORY_ALLOCATION_ERROR;
|
|
|
return;
|
|
|
}
|
|
@@ -311,7 +319,7 @@ void U_EXPORT2 CanonicalIterator::permute(UnicodeString &source, UBool skipZeros
|
|
|
|
|
|
// see what the permutations of the characters before and after this one are
|
|
|
//Hashtable *subpermute = permute(source.substring(0,i) + source.substring(i + UTF16.getCharCount(cp)));
|
|
|
- permute(subPermuteString.remove(i, U16_LENGTH(cp)), skipZeros, &subpermute, status);
|
|
|
+ permute(subPermuteString.remove(i, U16_LENGTH(cp)), skipZeros, &subpermute, status, depth+1);
|
|
|
/* Test for buffer overflows */
|
|
|
if(U_FAILURE(status)) {
|
|
|
return;
|
|
@@ -346,7 +354,7 @@ UnicodeString* CanonicalIterator::getEquivalents(const UnicodeString &segment, i
|
|
|
Hashtable permutations(status);
|
|
|
Hashtable basic(status);
|
|
|
if (U_FAILURE(status)) {
|
|
|
- return 0;
|
|
|
+ return nullptr;
|
|
|
}
|
|
|
result.setValueDeleter(uprv_deleteUObject);
|
|
|
permutations.setValueDeleter(uprv_deleteUObject);
|
|
@@ -381,7 +389,7 @@ UnicodeString* CanonicalIterator::getEquivalents(const UnicodeString &segment, i
|
|
|
//UnicodeString *possible = new UnicodeString(*((UnicodeString *)(ne2->value.pointer)));
|
|
|
UnicodeString possible(*((UnicodeString *)(ne2->value.pointer)));
|
|
|
UnicodeString attempt;
|
|
|
- nfd.normalize(possible, attempt, status);
|
|
|
+ nfd->normalize(possible, attempt, status);
|
|
|
|
|
|
// TODO: check if operator == is semanticaly the same as attempt.equals(segment)
|
|
|
if (attempt==segment) {
|
|
@@ -399,7 +407,7 @@ UnicodeString* CanonicalIterator::getEquivalents(const UnicodeString &segment, i
|
|
|
|
|
|
/* Test for buffer overflows */
|
|
|
if(U_FAILURE(status)) {
|
|
|
- return 0;
|
|
|
+ return nullptr;
|
|
|
}
|
|
|
// convert into a String[] to clean up storage
|
|
|
//String[] finalResult = new String[result.size()];
|
|
@@ -407,7 +415,7 @@ UnicodeString* CanonicalIterator::getEquivalents(const UnicodeString &segment, i
|
|
|
int32_t resultCount;
|
|
|
if((resultCount = result.count()) != 0) {
|
|
|
finalResult = new UnicodeString[resultCount];
|
|
|
- if (finalResult == 0) {
|
|
|
+ if (finalResult == nullptr) {
|
|
|
status = U_MEMORY_ALLOCATION_ERROR;
|
|
|
return nullptr;
|
|
|
}
|
|
@@ -448,7 +456,7 @@ Hashtable *CanonicalIterator::getEquivalents2(Hashtable *fillinResult, const cha
|
|
|
for (int32_t i = 0; i < segLen; i += U16_LENGTH(cp)) {
|
|
|
// see if any character is at the start of some decomposition
|
|
|
U16_GET(segment, 0, i, segLen, cp);
|
|
|
- if (!nfcImpl.getCanonStartSet(cp, starts)) {
|
|
|
+ if (!nfcImpl->getCanonStartSet(cp, starts)) {
|
|
|
continue;
|
|
|
}
|
|
|
// if so, see which decompositions match
|
|
@@ -471,7 +479,7 @@ Hashtable *CanonicalIterator::getEquivalents2(Hashtable *fillinResult, const cha
|
|
|
UnicodeString item = *((UnicodeString *)(ne->value.pointer));
|
|
|
UnicodeString *toAdd = new UnicodeString(prefix);
|
|
|
/* test for nullptr */
|
|
|
- if (toAdd == 0) {
|
|
|
+ if (toAdd == nullptr) {
|
|
|
status = U_MEMORY_ALLOCATION_ERROR;
|
|
|
return nullptr;
|
|
|
}
|
|
@@ -509,7 +517,7 @@ Hashtable *CanonicalIterator::extract(Hashtable *fillinResult, UChar32 comp, con
|
|
|
UnicodeString temp(comp);
|
|
|
int32_t inputLen=temp.length();
|
|
|
UnicodeString decompString;
|
|
|
- nfd.normalize(temp, decompString, status);
|
|
|
+ nfd->normalize(temp, decompString, status);
|
|
|
if (U_FAILURE(status)) {
|
|
|
return nullptr;
|
|
|
}
|
|
@@ -573,7 +581,7 @@ Hashtable *CanonicalIterator::extract(Hashtable *fillinResult, UChar32 comp, con
|
|
|
// brute force approach
|
|
|
// check to make sure result is canonically equivalent
|
|
|
UnicodeString trial;
|
|
|
- nfd.normalize(temp, trial, status);
|
|
|
+ nfd->normalize(temp, trial, status);
|
|
|
if(U_FAILURE(status) || trial.compare(segment+segmentPos, segLen - segmentPos) != 0) {
|
|
|
return nullptr;
|
|
|
}
|