123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470 |
- // © 2017 and later: Unicode, Inc. and others.
- // License & terms of use: http://www.unicode.org/copyright.html
- #include "unicode/utypes.h"
- #if !UCONFIG_NO_FORMATTING
- #include "formatted_string_builder.h"
- #include "putilimp.h"
- #include "unicode/ustring.h"
- #include "unicode/utf16.h"
- #include "unicode/unum.h" // for UNumberFormatFields literals
- namespace {
- // A version of uprv_memcpy that checks for length 0.
- // By default, uprv_memcpy requires a length of at least 1.
- inline void uprv_memcpy2(void* dest, const void* src, size_t len) {
- if (len > 0) {
- uprv_memcpy(dest, src, len);
- }
- }
- // A version of uprv_memmove that checks for length 0.
- // By default, uprv_memmove requires a length of at least 1.
- inline void uprv_memmove2(void* dest, const void* src, size_t len) {
- if (len > 0) {
- uprv_memmove(dest, src, len);
- }
- }
- } // namespace
- U_NAMESPACE_BEGIN
- FormattedStringBuilder::FormattedStringBuilder() {
- #if U_DEBUG
- // Initializing the memory to non-zero helps catch some bugs that involve
- // reading from an improperly terminated string.
- for (int32_t i=0; i<getCapacity(); i++) {
- getCharPtr()[i] = 1;
- }
- #endif
- }
- FormattedStringBuilder::~FormattedStringBuilder() {
- if (fUsingHeap) {
- uprv_free(fChars.heap.ptr);
- uprv_free(fFields.heap.ptr);
- }
- }
- FormattedStringBuilder::FormattedStringBuilder(const FormattedStringBuilder &other) {
- *this = other;
- }
- FormattedStringBuilder &FormattedStringBuilder::operator=(const FormattedStringBuilder &other) {
- // Check for self-assignment
- if (this == &other) {
- return *this;
- }
- // Continue with deallocation and copying
- if (fUsingHeap) {
- uprv_free(fChars.heap.ptr);
- uprv_free(fFields.heap.ptr);
- fUsingHeap = false;
- }
- int32_t capacity = other.getCapacity();
- if (capacity > DEFAULT_CAPACITY) {
- // FIXME: uprv_malloc
- // C++ note: malloc appears in two places: here and in prepareForInsertHelper.
- auto newChars = static_cast<char16_t *> (uprv_malloc(sizeof(char16_t) * capacity));
- auto newFields = static_cast<Field *>(uprv_malloc(sizeof(Field) * capacity));
- if (newChars == nullptr || newFields == nullptr) {
- // UErrorCode is not available; fail silently.
- uprv_free(newChars);
- uprv_free(newFields);
- *this = FormattedStringBuilder(); // can't fail
- return *this;
- }
- fUsingHeap = true;
- fChars.heap.capacity = capacity;
- fChars.heap.ptr = newChars;
- fFields.heap.capacity = capacity;
- fFields.heap.ptr = newFields;
- }
- uprv_memcpy2(getCharPtr(), other.getCharPtr(), sizeof(char16_t) * capacity);
- uprv_memcpy2(getFieldPtr(), other.getFieldPtr(), sizeof(Field) * capacity);
- fZero = other.fZero;
- fLength = other.fLength;
- return *this;
- }
- int32_t FormattedStringBuilder::length() const {
- return fLength;
- }
- int32_t FormattedStringBuilder::codePointCount() const {
- return u_countChar32(getCharPtr() + fZero, fLength);
- }
- UChar32 FormattedStringBuilder::getFirstCodePoint() const {
- if (fLength == 0) {
- return -1;
- }
- UChar32 cp;
- U16_GET(getCharPtr() + fZero, 0, 0, fLength, cp);
- return cp;
- }
- UChar32 FormattedStringBuilder::getLastCodePoint() const {
- if (fLength == 0) {
- return -1;
- }
- int32_t offset = fLength;
- U16_BACK_1(getCharPtr() + fZero, 0, offset);
- UChar32 cp;
- U16_GET(getCharPtr() + fZero, 0, offset, fLength, cp);
- return cp;
- }
- UChar32 FormattedStringBuilder::codePointAt(int32_t index) const {
- UChar32 cp;
- U16_GET(getCharPtr() + fZero, 0, index, fLength, cp);
- return cp;
- }
- UChar32 FormattedStringBuilder::codePointBefore(int32_t index) const {
- int32_t offset = index;
- U16_BACK_1(getCharPtr() + fZero, 0, offset);
- UChar32 cp;
- U16_GET(getCharPtr() + fZero, 0, offset, fLength, cp);
- return cp;
- }
- FormattedStringBuilder &FormattedStringBuilder::clear() {
- // TODO: Reset the heap here?
- fZero = getCapacity() / 2;
- fLength = 0;
- return *this;
- }
- int32_t
- FormattedStringBuilder::insertCodePoint(int32_t index, UChar32 codePoint, Field field, UErrorCode &status) {
- int32_t count = U16_LENGTH(codePoint);
- int32_t position = prepareForInsert(index, count, status);
- if (U_FAILURE(status)) {
- return count;
- }
- if (count == 1) {
- getCharPtr()[position] = (char16_t) codePoint;
- getFieldPtr()[position] = field;
- } else {
- getCharPtr()[position] = U16_LEAD(codePoint);
- getCharPtr()[position + 1] = U16_TRAIL(codePoint);
- getFieldPtr()[position] = getFieldPtr()[position + 1] = field;
- }
- return count;
- }
- int32_t FormattedStringBuilder::insert(int32_t index, const UnicodeString &unistr, Field field,
- UErrorCode &status) {
- if (unistr.length() == 0) {
- // Nothing to insert.
- return 0;
- } else if (unistr.length() == 1) {
- // Fast path: insert using insertCodePoint.
- return insertCodePoint(index, unistr.charAt(0), field, status);
- } else {
- return insert(index, unistr, 0, unistr.length(), field, status);
- }
- }
- int32_t
- FormattedStringBuilder::insert(int32_t index, const UnicodeString &unistr, int32_t start, int32_t end,
- Field field, UErrorCode &status) {
- int32_t count = end - start;
- int32_t position = prepareForInsert(index, count, status);
- if (U_FAILURE(status)) {
- return count;
- }
- for (int32_t i = 0; i < count; i++) {
- getCharPtr()[position + i] = unistr.charAt(start + i);
- getFieldPtr()[position + i] = field;
- }
- return count;
- }
- int32_t
- FormattedStringBuilder::splice(int32_t startThis, int32_t endThis, const UnicodeString &unistr,
- int32_t startOther, int32_t endOther, Field field, UErrorCode& status) {
- int32_t thisLength = endThis - startThis;
- int32_t otherLength = endOther - startOther;
- int32_t count = otherLength - thisLength;
- if (U_FAILURE(status)) {
- return count;
- }
- int32_t position;
- if (count > 0) {
- // Overall, chars need to be added.
- position = prepareForInsert(startThis, count, status);
- } else {
- // Overall, chars need to be removed or kept the same.
- position = remove(startThis, -count);
- }
- if (U_FAILURE(status)) {
- return count;
- }
- for (int32_t i = 0; i < otherLength; i++) {
- getCharPtr()[position + i] = unistr.charAt(startOther + i);
- getFieldPtr()[position + i] = field;
- }
- return count;
- }
- int32_t FormattedStringBuilder::append(const FormattedStringBuilder &other, UErrorCode &status) {
- return insert(fLength, other, status);
- }
- int32_t
- FormattedStringBuilder::insert(int32_t index, const FormattedStringBuilder &other, UErrorCode &status) {
- if (U_FAILURE(status)) {
- return 0;
- }
- if (this == &other) {
- status = U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
- int32_t count = other.fLength;
- if (count == 0) {
- // Nothing to insert.
- return 0;
- }
- int32_t position = prepareForInsert(index, count, status);
- if (U_FAILURE(status)) {
- return count;
- }
- for (int32_t i = 0; i < count; i++) {
- getCharPtr()[position + i] = other.charAt(i);
- getFieldPtr()[position + i] = other.fieldAt(i);
- }
- return count;
- }
- void FormattedStringBuilder::writeTerminator(UErrorCode& status) {
- int32_t position = prepareForInsert(fLength, 1, status);
- if (U_FAILURE(status)) {
- return;
- }
- getCharPtr()[position] = 0;
- getFieldPtr()[position] = kUndefinedField;
- fLength--;
- }
- int32_t FormattedStringBuilder::prepareForInsert(int32_t index, int32_t count, UErrorCode &status) {
- U_ASSERT(index >= 0);
- U_ASSERT(index <= fLength);
- U_ASSERT(count >= 0);
- U_ASSERT(fZero >= 0);
- U_ASSERT(fLength >= 0);
- U_ASSERT(getCapacity() - fZero >= fLength);
- if (U_FAILURE(status)) {
- return count;
- }
- if (index == 0 && fZero - count >= 0) {
- // Append to start
- fZero -= count;
- fLength += count;
- return fZero;
- } else if (index == fLength && count <= getCapacity() - fZero - fLength) {
- // Append to end
- fLength += count;
- return fZero + fLength - count;
- } else {
- // Move chars around and/or allocate more space
- return prepareForInsertHelper(index, count, status);
- }
- }
- int32_t FormattedStringBuilder::prepareForInsertHelper(int32_t index, int32_t count, UErrorCode &status) {
- int32_t oldCapacity = getCapacity();
- int32_t oldZero = fZero;
- char16_t *oldChars = getCharPtr();
- Field *oldFields = getFieldPtr();
- int32_t newLength;
- if (uprv_add32_overflow(fLength, count, &newLength)) {
- status = U_INPUT_TOO_LONG_ERROR;
- return -1;
- }
- int32_t newZero;
- if (newLength > oldCapacity) {
- if (newLength > INT32_MAX / 2) {
- // We do not support more than 1G char16_t in this code because
- // dealing with >2G *bytes* can cause subtle bugs.
- status = U_INPUT_TOO_LONG_ERROR;
- return -1;
- }
- // Keep newCapacity also to at most 1G char16_t.
- int32_t newCapacity = newLength * 2;
- newZero = (newCapacity - newLength) / 2;
- // C++ note: malloc appears in two places: here and in the assignment operator.
- auto newChars = static_cast<char16_t *> (uprv_malloc(sizeof(char16_t) * static_cast<size_t>(newCapacity)));
- auto newFields = static_cast<Field *>(uprv_malloc(sizeof(Field) * static_cast<size_t>(newCapacity)));
- if (newChars == nullptr || newFields == nullptr) {
- uprv_free(newChars);
- uprv_free(newFields);
- status = U_MEMORY_ALLOCATION_ERROR;
- return -1;
- }
- // First copy the prefix and then the suffix, leaving room for the new chars that the
- // caller wants to insert.
- // C++ note: memcpy is OK because the src and dest do not overlap.
- uprv_memcpy2(newChars + newZero, oldChars + oldZero, sizeof(char16_t) * index);
- uprv_memcpy2(newChars + newZero + index + count,
- oldChars + oldZero + index,
- sizeof(char16_t) * (fLength - index));
- uprv_memcpy2(newFields + newZero, oldFields + oldZero, sizeof(Field) * index);
- uprv_memcpy2(newFields + newZero + index + count,
- oldFields + oldZero + index,
- sizeof(Field) * (fLength - index));
- if (fUsingHeap) {
- uprv_free(oldChars);
- uprv_free(oldFields);
- }
- fUsingHeap = true;
- fChars.heap.ptr = newChars;
- fChars.heap.capacity = newCapacity;
- fFields.heap.ptr = newFields;
- fFields.heap.capacity = newCapacity;
- } else {
- newZero = (oldCapacity - newLength) / 2;
- // C++ note: memmove is required because src and dest may overlap.
- // First copy the entire string to the location of the prefix, and then move the suffix
- // to make room for the new chars that the caller wants to insert.
- uprv_memmove2(oldChars + newZero, oldChars + oldZero, sizeof(char16_t) * fLength);
- uprv_memmove2(oldChars + newZero + index + count,
- oldChars + newZero + index,
- sizeof(char16_t) * (fLength - index));
- uprv_memmove2(oldFields + newZero, oldFields + oldZero, sizeof(Field) * fLength);
- uprv_memmove2(oldFields + newZero + index + count,
- oldFields + newZero + index,
- sizeof(Field) * (fLength - index));
- }
- fZero = newZero;
- fLength = newLength;
- return fZero + index;
- }
- int32_t FormattedStringBuilder::remove(int32_t index, int32_t count) {
- U_ASSERT(0 <= index);
- U_ASSERT(index <= fLength);
- U_ASSERT(count <= (fLength - index));
- U_ASSERT(index <= getCapacity() - fZero);
- int32_t position = index + fZero;
- // TODO: Reset the heap here? (If the string after removal can fit on stack?)
- uprv_memmove2(getCharPtr() + position,
- getCharPtr() + position + count,
- sizeof(char16_t) * (fLength - index - count));
- uprv_memmove2(getFieldPtr() + position,
- getFieldPtr() + position + count,
- sizeof(Field) * (fLength - index - count));
- fLength -= count;
- return position;
- }
- UnicodeString FormattedStringBuilder::toUnicodeString() const {
- return UnicodeString(getCharPtr() + fZero, fLength);
- }
- const UnicodeString FormattedStringBuilder::toTempUnicodeString() const {
- // Readonly-alias constructor:
- return UnicodeString(false, getCharPtr() + fZero, fLength);
- }
- UnicodeString FormattedStringBuilder::toDebugString() const {
- UnicodeString sb;
- sb.append(u"<FormattedStringBuilder [", -1);
- sb.append(toUnicodeString());
- sb.append(u"] [", -1);
- for (int i = 0; i < fLength; i++) {
- if (fieldAt(i) == kUndefinedField) {
- sb.append(u'n');
- } else if (fieldAt(i).getCategory() == UFIELD_CATEGORY_NUMBER) {
- char16_t c;
- switch (fieldAt(i).getField()) {
- case UNUM_SIGN_FIELD:
- c = u'-';
- break;
- case UNUM_INTEGER_FIELD:
- c = u'i';
- break;
- case UNUM_FRACTION_FIELD:
- c = u'f';
- break;
- case UNUM_EXPONENT_FIELD:
- c = u'e';
- break;
- case UNUM_EXPONENT_SIGN_FIELD:
- c = u'+';
- break;
- case UNUM_EXPONENT_SYMBOL_FIELD:
- c = u'E';
- break;
- case UNUM_DECIMAL_SEPARATOR_FIELD:
- c = u'.';
- break;
- case UNUM_GROUPING_SEPARATOR_FIELD:
- c = u',';
- break;
- case UNUM_PERCENT_FIELD:
- c = u'%';
- break;
- case UNUM_PERMILL_FIELD:
- c = u'‰';
- break;
- case UNUM_CURRENCY_FIELD:
- c = u'$';
- break;
- default:
- c = u'0' + fieldAt(i).getField();
- break;
- }
- sb.append(c);
- } else {
- sb.append(u'0' + fieldAt(i).getCategory());
- }
- }
- sb.append(u"]>", -1);
- return sb;
- }
- const char16_t *FormattedStringBuilder::chars() const {
- return getCharPtr() + fZero;
- }
- bool FormattedStringBuilder::contentEquals(const FormattedStringBuilder &other) const {
- if (fLength != other.fLength) {
- return false;
- }
- for (int32_t i = 0; i < fLength; i++) {
- if (charAt(i) != other.charAt(i) || fieldAt(i) != other.fieldAt(i)) {
- return false;
- }
- }
- return true;
- }
- bool FormattedStringBuilder::containsField(Field field) const {
- for (int32_t i = 0; i < fLength; i++) {
- if (field == fieldAt(i)) {
- return true;
- }
- }
- return false;
- }
- U_NAMESPACE_END
- #endif /* #if !UCONFIG_NO_FORMATTING */
|