formatted_string_builder.cpp 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470
  1. // © 2017 and later: Unicode, Inc. and others.
  2. // License & terms of use: http://www.unicode.org/copyright.html
  3. #include "unicode/utypes.h"
  4. #if !UCONFIG_NO_FORMATTING
  5. #include "formatted_string_builder.h"
  6. #include "putilimp.h"
  7. #include "unicode/ustring.h"
  8. #include "unicode/utf16.h"
  9. #include "unicode/unum.h" // for UNumberFormatFields literals
  10. namespace {
  11. // A version of uprv_memcpy that checks for length 0.
  12. // By default, uprv_memcpy requires a length of at least 1.
  13. inline void uprv_memcpy2(void* dest, const void* src, size_t len) {
  14. if (len > 0) {
  15. uprv_memcpy(dest, src, len);
  16. }
  17. }
  18. // A version of uprv_memmove that checks for length 0.
  19. // By default, uprv_memmove requires a length of at least 1.
  20. inline void uprv_memmove2(void* dest, const void* src, size_t len) {
  21. if (len > 0) {
  22. uprv_memmove(dest, src, len);
  23. }
  24. }
  25. } // namespace
  26. U_NAMESPACE_BEGIN
  27. FormattedStringBuilder::FormattedStringBuilder() {
  28. #if U_DEBUG
  29. // Initializing the memory to non-zero helps catch some bugs that involve
  30. // reading from an improperly terminated string.
  31. for (int32_t i=0; i<getCapacity(); i++) {
  32. getCharPtr()[i] = 1;
  33. }
  34. #endif
  35. }
  36. FormattedStringBuilder::~FormattedStringBuilder() {
  37. if (fUsingHeap) {
  38. uprv_free(fChars.heap.ptr);
  39. uprv_free(fFields.heap.ptr);
  40. }
  41. }
  42. FormattedStringBuilder::FormattedStringBuilder(const FormattedStringBuilder &other) {
  43. *this = other;
  44. }
  45. FormattedStringBuilder &FormattedStringBuilder::operator=(const FormattedStringBuilder &other) {
  46. // Check for self-assignment
  47. if (this == &other) {
  48. return *this;
  49. }
  50. // Continue with deallocation and copying
  51. if (fUsingHeap) {
  52. uprv_free(fChars.heap.ptr);
  53. uprv_free(fFields.heap.ptr);
  54. fUsingHeap = false;
  55. }
  56. int32_t capacity = other.getCapacity();
  57. if (capacity > DEFAULT_CAPACITY) {
  58. // FIXME: uprv_malloc
  59. // C++ note: malloc appears in two places: here and in prepareForInsertHelper.
  60. auto newChars = static_cast<char16_t *> (uprv_malloc(sizeof(char16_t) * capacity));
  61. auto newFields = static_cast<Field *>(uprv_malloc(sizeof(Field) * capacity));
  62. if (newChars == nullptr || newFields == nullptr) {
  63. // UErrorCode is not available; fail silently.
  64. uprv_free(newChars);
  65. uprv_free(newFields);
  66. *this = FormattedStringBuilder(); // can't fail
  67. return *this;
  68. }
  69. fUsingHeap = true;
  70. fChars.heap.capacity = capacity;
  71. fChars.heap.ptr = newChars;
  72. fFields.heap.capacity = capacity;
  73. fFields.heap.ptr = newFields;
  74. }
  75. uprv_memcpy2(getCharPtr(), other.getCharPtr(), sizeof(char16_t) * capacity);
  76. uprv_memcpy2(getFieldPtr(), other.getFieldPtr(), sizeof(Field) * capacity);
  77. fZero = other.fZero;
  78. fLength = other.fLength;
  79. return *this;
  80. }
  81. int32_t FormattedStringBuilder::length() const {
  82. return fLength;
  83. }
  84. int32_t FormattedStringBuilder::codePointCount() const {
  85. return u_countChar32(getCharPtr() + fZero, fLength);
  86. }
  87. UChar32 FormattedStringBuilder::getFirstCodePoint() const {
  88. if (fLength == 0) {
  89. return -1;
  90. }
  91. UChar32 cp;
  92. U16_GET(getCharPtr() + fZero, 0, 0, fLength, cp);
  93. return cp;
  94. }
  95. UChar32 FormattedStringBuilder::getLastCodePoint() const {
  96. if (fLength == 0) {
  97. return -1;
  98. }
  99. int32_t offset = fLength;
  100. U16_BACK_1(getCharPtr() + fZero, 0, offset);
  101. UChar32 cp;
  102. U16_GET(getCharPtr() + fZero, 0, offset, fLength, cp);
  103. return cp;
  104. }
  105. UChar32 FormattedStringBuilder::codePointAt(int32_t index) const {
  106. UChar32 cp;
  107. U16_GET(getCharPtr() + fZero, 0, index, fLength, cp);
  108. return cp;
  109. }
  110. UChar32 FormattedStringBuilder::codePointBefore(int32_t index) const {
  111. int32_t offset = index;
  112. U16_BACK_1(getCharPtr() + fZero, 0, offset);
  113. UChar32 cp;
  114. U16_GET(getCharPtr() + fZero, 0, offset, fLength, cp);
  115. return cp;
  116. }
  117. FormattedStringBuilder &FormattedStringBuilder::clear() {
  118. // TODO: Reset the heap here?
  119. fZero = getCapacity() / 2;
  120. fLength = 0;
  121. return *this;
  122. }
  123. int32_t
  124. FormattedStringBuilder::insertCodePoint(int32_t index, UChar32 codePoint, Field field, UErrorCode &status) {
  125. int32_t count = U16_LENGTH(codePoint);
  126. int32_t position = prepareForInsert(index, count, status);
  127. if (U_FAILURE(status)) {
  128. return count;
  129. }
  130. if (count == 1) {
  131. getCharPtr()[position] = (char16_t) codePoint;
  132. getFieldPtr()[position] = field;
  133. } else {
  134. getCharPtr()[position] = U16_LEAD(codePoint);
  135. getCharPtr()[position + 1] = U16_TRAIL(codePoint);
  136. getFieldPtr()[position] = getFieldPtr()[position + 1] = field;
  137. }
  138. return count;
  139. }
  140. int32_t FormattedStringBuilder::insert(int32_t index, const UnicodeString &unistr, Field field,
  141. UErrorCode &status) {
  142. if (unistr.length() == 0) {
  143. // Nothing to insert.
  144. return 0;
  145. } else if (unistr.length() == 1) {
  146. // Fast path: insert using insertCodePoint.
  147. return insertCodePoint(index, unistr.charAt(0), field, status);
  148. } else {
  149. return insert(index, unistr, 0, unistr.length(), field, status);
  150. }
  151. }
  152. int32_t
  153. FormattedStringBuilder::insert(int32_t index, const UnicodeString &unistr, int32_t start, int32_t end,
  154. Field field, UErrorCode &status) {
  155. int32_t count = end - start;
  156. int32_t position = prepareForInsert(index, count, status);
  157. if (U_FAILURE(status)) {
  158. return count;
  159. }
  160. for (int32_t i = 0; i < count; i++) {
  161. getCharPtr()[position + i] = unistr.charAt(start + i);
  162. getFieldPtr()[position + i] = field;
  163. }
  164. return count;
  165. }
  166. int32_t
  167. FormattedStringBuilder::splice(int32_t startThis, int32_t endThis, const UnicodeString &unistr,
  168. int32_t startOther, int32_t endOther, Field field, UErrorCode& status) {
  169. int32_t thisLength = endThis - startThis;
  170. int32_t otherLength = endOther - startOther;
  171. int32_t count = otherLength - thisLength;
  172. if (U_FAILURE(status)) {
  173. return count;
  174. }
  175. int32_t position;
  176. if (count > 0) {
  177. // Overall, chars need to be added.
  178. position = prepareForInsert(startThis, count, status);
  179. } else {
  180. // Overall, chars need to be removed or kept the same.
  181. position = remove(startThis, -count);
  182. }
  183. if (U_FAILURE(status)) {
  184. return count;
  185. }
  186. for (int32_t i = 0; i < otherLength; i++) {
  187. getCharPtr()[position + i] = unistr.charAt(startOther + i);
  188. getFieldPtr()[position + i] = field;
  189. }
  190. return count;
  191. }
  192. int32_t FormattedStringBuilder::append(const FormattedStringBuilder &other, UErrorCode &status) {
  193. return insert(fLength, other, status);
  194. }
  195. int32_t
  196. FormattedStringBuilder::insert(int32_t index, const FormattedStringBuilder &other, UErrorCode &status) {
  197. if (U_FAILURE(status)) {
  198. return 0;
  199. }
  200. if (this == &other) {
  201. status = U_ILLEGAL_ARGUMENT_ERROR;
  202. return 0;
  203. }
  204. int32_t count = other.fLength;
  205. if (count == 0) {
  206. // Nothing to insert.
  207. return 0;
  208. }
  209. int32_t position = prepareForInsert(index, count, status);
  210. if (U_FAILURE(status)) {
  211. return count;
  212. }
  213. for (int32_t i = 0; i < count; i++) {
  214. getCharPtr()[position + i] = other.charAt(i);
  215. getFieldPtr()[position + i] = other.fieldAt(i);
  216. }
  217. return count;
  218. }
  219. void FormattedStringBuilder::writeTerminator(UErrorCode& status) {
  220. int32_t position = prepareForInsert(fLength, 1, status);
  221. if (U_FAILURE(status)) {
  222. return;
  223. }
  224. getCharPtr()[position] = 0;
  225. getFieldPtr()[position] = kUndefinedField;
  226. fLength--;
  227. }
  228. int32_t FormattedStringBuilder::prepareForInsert(int32_t index, int32_t count, UErrorCode &status) {
  229. U_ASSERT(index >= 0);
  230. U_ASSERT(index <= fLength);
  231. U_ASSERT(count >= 0);
  232. U_ASSERT(fZero >= 0);
  233. U_ASSERT(fLength >= 0);
  234. U_ASSERT(getCapacity() - fZero >= fLength);
  235. if (U_FAILURE(status)) {
  236. return count;
  237. }
  238. if (index == 0 && fZero - count >= 0) {
  239. // Append to start
  240. fZero -= count;
  241. fLength += count;
  242. return fZero;
  243. } else if (index == fLength && count <= getCapacity() - fZero - fLength) {
  244. // Append to end
  245. fLength += count;
  246. return fZero + fLength - count;
  247. } else {
  248. // Move chars around and/or allocate more space
  249. return prepareForInsertHelper(index, count, status);
  250. }
  251. }
  252. int32_t FormattedStringBuilder::prepareForInsertHelper(int32_t index, int32_t count, UErrorCode &status) {
  253. int32_t oldCapacity = getCapacity();
  254. int32_t oldZero = fZero;
  255. char16_t *oldChars = getCharPtr();
  256. Field *oldFields = getFieldPtr();
  257. int32_t newLength;
  258. if (uprv_add32_overflow(fLength, count, &newLength)) {
  259. status = U_INPUT_TOO_LONG_ERROR;
  260. return -1;
  261. }
  262. int32_t newZero;
  263. if (newLength > oldCapacity) {
  264. if (newLength > INT32_MAX / 2) {
  265. // We do not support more than 1G char16_t in this code because
  266. // dealing with >2G *bytes* can cause subtle bugs.
  267. status = U_INPUT_TOO_LONG_ERROR;
  268. return -1;
  269. }
  270. // Keep newCapacity also to at most 1G char16_t.
  271. int32_t newCapacity = newLength * 2;
  272. newZero = (newCapacity - newLength) / 2;
  273. // C++ note: malloc appears in two places: here and in the assignment operator.
  274. auto newChars = static_cast<char16_t *> (uprv_malloc(sizeof(char16_t) * static_cast<size_t>(newCapacity)));
  275. auto newFields = static_cast<Field *>(uprv_malloc(sizeof(Field) * static_cast<size_t>(newCapacity)));
  276. if (newChars == nullptr || newFields == nullptr) {
  277. uprv_free(newChars);
  278. uprv_free(newFields);
  279. status = U_MEMORY_ALLOCATION_ERROR;
  280. return -1;
  281. }
  282. // First copy the prefix and then the suffix, leaving room for the new chars that the
  283. // caller wants to insert.
  284. // C++ note: memcpy is OK because the src and dest do not overlap.
  285. uprv_memcpy2(newChars + newZero, oldChars + oldZero, sizeof(char16_t) * index);
  286. uprv_memcpy2(newChars + newZero + index + count,
  287. oldChars + oldZero + index,
  288. sizeof(char16_t) * (fLength - index));
  289. uprv_memcpy2(newFields + newZero, oldFields + oldZero, sizeof(Field) * index);
  290. uprv_memcpy2(newFields + newZero + index + count,
  291. oldFields + oldZero + index,
  292. sizeof(Field) * (fLength - index));
  293. if (fUsingHeap) {
  294. uprv_free(oldChars);
  295. uprv_free(oldFields);
  296. }
  297. fUsingHeap = true;
  298. fChars.heap.ptr = newChars;
  299. fChars.heap.capacity = newCapacity;
  300. fFields.heap.ptr = newFields;
  301. fFields.heap.capacity = newCapacity;
  302. } else {
  303. newZero = (oldCapacity - newLength) / 2;
  304. // C++ note: memmove is required because src and dest may overlap.
  305. // First copy the entire string to the location of the prefix, and then move the suffix
  306. // to make room for the new chars that the caller wants to insert.
  307. uprv_memmove2(oldChars + newZero, oldChars + oldZero, sizeof(char16_t) * fLength);
  308. uprv_memmove2(oldChars + newZero + index + count,
  309. oldChars + newZero + index,
  310. sizeof(char16_t) * (fLength - index));
  311. uprv_memmove2(oldFields + newZero, oldFields + oldZero, sizeof(Field) * fLength);
  312. uprv_memmove2(oldFields + newZero + index + count,
  313. oldFields + newZero + index,
  314. sizeof(Field) * (fLength - index));
  315. }
  316. fZero = newZero;
  317. fLength = newLength;
  318. return fZero + index;
  319. }
  320. int32_t FormattedStringBuilder::remove(int32_t index, int32_t count) {
  321. U_ASSERT(0 <= index);
  322. U_ASSERT(index <= fLength);
  323. U_ASSERT(count <= (fLength - index));
  324. U_ASSERT(index <= getCapacity() - fZero);
  325. int32_t position = index + fZero;
  326. // TODO: Reset the heap here? (If the string after removal can fit on stack?)
  327. uprv_memmove2(getCharPtr() + position,
  328. getCharPtr() + position + count,
  329. sizeof(char16_t) * (fLength - index - count));
  330. uprv_memmove2(getFieldPtr() + position,
  331. getFieldPtr() + position + count,
  332. sizeof(Field) * (fLength - index - count));
  333. fLength -= count;
  334. return position;
  335. }
  336. UnicodeString FormattedStringBuilder::toUnicodeString() const {
  337. return UnicodeString(getCharPtr() + fZero, fLength);
  338. }
  339. const UnicodeString FormattedStringBuilder::toTempUnicodeString() const {
  340. // Readonly-alias constructor:
  341. return UnicodeString(false, getCharPtr() + fZero, fLength);
  342. }
  343. UnicodeString FormattedStringBuilder::toDebugString() const {
  344. UnicodeString sb;
  345. sb.append(u"<FormattedStringBuilder [", -1);
  346. sb.append(toUnicodeString());
  347. sb.append(u"] [", -1);
  348. for (int i = 0; i < fLength; i++) {
  349. if (fieldAt(i) == kUndefinedField) {
  350. sb.append(u'n');
  351. } else if (fieldAt(i).getCategory() == UFIELD_CATEGORY_NUMBER) {
  352. char16_t c;
  353. switch (fieldAt(i).getField()) {
  354. case UNUM_SIGN_FIELD:
  355. c = u'-';
  356. break;
  357. case UNUM_INTEGER_FIELD:
  358. c = u'i';
  359. break;
  360. case UNUM_FRACTION_FIELD:
  361. c = u'f';
  362. break;
  363. case UNUM_EXPONENT_FIELD:
  364. c = u'e';
  365. break;
  366. case UNUM_EXPONENT_SIGN_FIELD:
  367. c = u'+';
  368. break;
  369. case UNUM_EXPONENT_SYMBOL_FIELD:
  370. c = u'E';
  371. break;
  372. case UNUM_DECIMAL_SEPARATOR_FIELD:
  373. c = u'.';
  374. break;
  375. case UNUM_GROUPING_SEPARATOR_FIELD:
  376. c = u',';
  377. break;
  378. case UNUM_PERCENT_FIELD:
  379. c = u'%';
  380. break;
  381. case UNUM_PERMILL_FIELD:
  382. c = u'‰';
  383. break;
  384. case UNUM_CURRENCY_FIELD:
  385. c = u'$';
  386. break;
  387. default:
  388. c = u'0' + fieldAt(i).getField();
  389. break;
  390. }
  391. sb.append(c);
  392. } else {
  393. sb.append(u'0' + fieldAt(i).getCategory());
  394. }
  395. }
  396. sb.append(u"]>", -1);
  397. return sb;
  398. }
  399. const char16_t *FormattedStringBuilder::chars() const {
  400. return getCharPtr() + fZero;
  401. }
  402. bool FormattedStringBuilder::contentEquals(const FormattedStringBuilder &other) const {
  403. if (fLength != other.fLength) {
  404. return false;
  405. }
  406. for (int32_t i = 0; i < fLength; i++) {
  407. if (charAt(i) != other.charAt(i) || fieldAt(i) != other.fieldAt(i)) {
  408. return false;
  409. }
  410. }
  411. return true;
  412. }
  413. bool FormattedStringBuilder::containsField(Field field) const {
  414. for (int32_t i = 0; i < fLength; i++) {
  415. if (field == fieldAt(i)) {
  416. return true;
  417. }
  418. }
  419. return false;
  420. }
  421. U_NAMESPACE_END
  422. #endif /* #if !UCONFIG_NO_FORMATTING */