formattedval_sbimpl.cpp 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350
  1. // © 2018 and later: Unicode, Inc. and others.
  2. // License & terms of use: http://www.unicode.org/copyright.html
  3. #include "unicode/utypes.h"
  4. #if !UCONFIG_NO_FORMATTING
  5. // This file contains one implementation of FormattedValue.
  6. // Other independent implementations should go into their own cpp file for
  7. // better dependency modularization.
  8. #include "unicode/ustring.h"
  9. #include "formattedval_impl.h"
  10. #include "number_types.h"
  11. #include "formatted_string_builder.h"
  12. #include "number_utils.h"
  13. #include "static_unicode_sets.h"
  14. #include "unicode/listformatter.h"
  15. U_NAMESPACE_BEGIN
  16. typedef FormattedStringBuilder::Field Field;
  17. FormattedValueStringBuilderImpl::FormattedValueStringBuilderImpl(Field numericField)
  18. : fNumericField(numericField) {
  19. }
  20. FormattedValueStringBuilderImpl::~FormattedValueStringBuilderImpl() {
  21. }
  22. UnicodeString FormattedValueStringBuilderImpl::toString(UErrorCode&) const {
  23. return fString.toUnicodeString();
  24. }
  25. UnicodeString FormattedValueStringBuilderImpl::toTempString(UErrorCode&) const {
  26. return fString.toTempUnicodeString();
  27. }
  28. Appendable& FormattedValueStringBuilderImpl::appendTo(Appendable& appendable, UErrorCode&) const {
  29. appendable.appendString(fString.chars(), fString.length());
  30. return appendable;
  31. }
  32. UBool FormattedValueStringBuilderImpl::nextPosition(ConstrainedFieldPosition& cfpos, UErrorCode& status) const {
  33. // NOTE: MSVC sometimes complains when implicitly converting between bool and UBool
  34. return nextPositionImpl(cfpos, fNumericField, status) ? true : false;
  35. }
  36. UBool FormattedValueStringBuilderImpl::nextFieldPosition(FieldPosition& fp, UErrorCode& status) const {
  37. int32_t rawField = fp.getField();
  38. if (rawField == FieldPosition::DONT_CARE) {
  39. return false;
  40. }
  41. if (rawField < 0 || rawField >= UNUM_FIELD_COUNT) {
  42. status = U_ILLEGAL_ARGUMENT_ERROR;
  43. return false;
  44. }
  45. ConstrainedFieldPosition cfpos;
  46. cfpos.constrainField(UFIELD_CATEGORY_NUMBER, rawField);
  47. cfpos.setState(UFIELD_CATEGORY_NUMBER, rawField, fp.getBeginIndex(), fp.getEndIndex());
  48. if (nextPositionImpl(cfpos, kUndefinedField, status)) {
  49. fp.setBeginIndex(cfpos.getStart());
  50. fp.setEndIndex(cfpos.getLimit());
  51. return true;
  52. }
  53. // Special case: fraction should start after integer if fraction is not present
  54. if (rawField == UNUM_FRACTION_FIELD && fp.getEndIndex() == 0) {
  55. bool inside = false;
  56. int32_t i = fString.fZero;
  57. for (; i < fString.fZero + fString.fLength; i++) {
  58. if (isIntOrGroup(fString.getFieldPtr()[i]) || fString.getFieldPtr()[i] == Field(UFIELD_CATEGORY_NUMBER, UNUM_DECIMAL_SEPARATOR_FIELD)) {
  59. inside = true;
  60. } else if (inside) {
  61. break;
  62. }
  63. }
  64. fp.setBeginIndex(i - fString.fZero);
  65. fp.setEndIndex(i - fString.fZero);
  66. }
  67. return false;
  68. }
  69. void FormattedValueStringBuilderImpl::getAllFieldPositions(FieldPositionIteratorHandler& fpih,
  70. UErrorCode& status) const {
  71. ConstrainedFieldPosition cfpos;
  72. while (nextPositionImpl(cfpos, kUndefinedField, status)) {
  73. fpih.addAttribute(cfpos.getField(), cfpos.getStart(), cfpos.getLimit());
  74. }
  75. }
  76. void FormattedValueStringBuilderImpl::resetString() {
  77. fString.clear();
  78. spanIndicesCount = 0;
  79. }
  80. // Signal the end of the string using a field that doesn't exist and that is
  81. // different from kUndefinedField, which is used for "null field".
  82. static constexpr Field kEndField = Field(0xf, 0xf);
  83. bool FormattedValueStringBuilderImpl::nextPositionImpl(ConstrainedFieldPosition& cfpos, Field numericField, UErrorCode& /*status*/) const {
  84. int32_t fieldStart = -1;
  85. Field currField = kUndefinedField;
  86. bool prevIsSpan = false;
  87. int32_t nextSpanStart = -1;
  88. if (spanIndicesCount > 0) {
  89. int64_t si = cfpos.getInt64IterationContext();
  90. U_ASSERT(si <= spanIndicesCount);
  91. if (si < spanIndicesCount) {
  92. nextSpanStart = spanIndices[si].start;
  93. }
  94. if (si > 0) {
  95. prevIsSpan = cfpos.getCategory() == spanIndices[si-1].category
  96. && cfpos.getField() == spanIndices[si-1].spanValue;
  97. }
  98. }
  99. bool prevIsNumeric = false;
  100. if (numericField != kUndefinedField) {
  101. prevIsNumeric = cfpos.getCategory() == numericField.getCategory()
  102. && cfpos.getField() == numericField.getField();
  103. }
  104. bool prevIsInteger = cfpos.getCategory() == UFIELD_CATEGORY_NUMBER
  105. && cfpos.getField() == UNUM_INTEGER_FIELD;
  106. for (int32_t i = fString.fZero + cfpos.getLimit(); i <= fString.fZero + fString.fLength; i++) {
  107. Field _field = (i < fString.fZero + fString.fLength) ? fString.getFieldPtr()[i] : kEndField;
  108. // Case 1: currently scanning a field.
  109. if (currField != kUndefinedField) {
  110. if (currField != _field) {
  111. int32_t end = i - fString.fZero;
  112. // Grouping separators can be whitespace; don't throw them out!
  113. if (isTrimmable(currField)) {
  114. end = trimBack(i - fString.fZero);
  115. }
  116. if (end <= fieldStart) {
  117. // Entire field position is ignorable; skip.
  118. fieldStart = -1;
  119. currField = kUndefinedField;
  120. i--; // look at this index again
  121. continue;
  122. }
  123. int32_t start = fieldStart;
  124. if (isTrimmable(currField)) {
  125. start = trimFront(start);
  126. }
  127. cfpos.setState(currField.getCategory(), currField.getField(), start, end);
  128. return true;
  129. }
  130. continue;
  131. }
  132. // Special case: emit normalField if we are pointing at the end of spanField.
  133. if (i > fString.fZero && prevIsSpan) {
  134. int64_t si = cfpos.getInt64IterationContext() - 1;
  135. U_ASSERT(si >= 0);
  136. int32_t previ = i - spanIndices[si].length;
  137. U_ASSERT(previ >= fString.fZero);
  138. Field prevField = fString.getFieldPtr()[previ];
  139. if (prevField == Field(UFIELD_CATEGORY_LIST, ULISTFMT_ELEMENT_FIELD)) {
  140. // Special handling for ULISTFMT_ELEMENT_FIELD
  141. if (cfpos.matchesField(UFIELD_CATEGORY_LIST, ULISTFMT_ELEMENT_FIELD)) {
  142. fieldStart = i - fString.fZero - spanIndices[si].length;
  143. int32_t end = fieldStart + spanIndices[si].length;
  144. cfpos.setState(
  145. UFIELD_CATEGORY_LIST,
  146. ULISTFMT_ELEMENT_FIELD,
  147. fieldStart,
  148. end);
  149. return true;
  150. } else {
  151. prevIsSpan = false;
  152. }
  153. } else {
  154. // Re-wind, since there may be multiple fields in the span.
  155. i = previ;
  156. _field = prevField;
  157. }
  158. }
  159. // Special case: coalesce the INTEGER if we are pointing at the end of the INTEGER.
  160. if (cfpos.matchesField(UFIELD_CATEGORY_NUMBER, UNUM_INTEGER_FIELD)
  161. && i > fString.fZero
  162. && !prevIsInteger
  163. && !prevIsNumeric
  164. && isIntOrGroup(fString.getFieldPtr()[i - 1])
  165. && !isIntOrGroup(_field)) {
  166. int j = i - 1;
  167. for (; j >= fString.fZero && isIntOrGroup(fString.getFieldPtr()[j]); j--) {}
  168. cfpos.setState(
  169. UFIELD_CATEGORY_NUMBER,
  170. UNUM_INTEGER_FIELD,
  171. j - fString.fZero + 1,
  172. i - fString.fZero);
  173. return true;
  174. }
  175. // Special case: coalesce NUMERIC if we are pointing at the end of the NUMERIC.
  176. if (numericField != kUndefinedField
  177. && cfpos.matchesField(numericField.getCategory(), numericField.getField())
  178. && i > fString.fZero
  179. && !prevIsNumeric
  180. && fString.getFieldPtr()[i - 1].isNumeric()
  181. && !_field.isNumeric()) {
  182. // Re-wind to the beginning of the field and then emit it
  183. int32_t j = i - 1;
  184. for (; j >= fString.fZero && fString.getFieldPtr()[j].isNumeric(); j--) {}
  185. cfpos.setState(
  186. numericField.getCategory(),
  187. numericField.getField(),
  188. j - fString.fZero + 1,
  189. i - fString.fZero);
  190. return true;
  191. }
  192. // Check for span field
  193. if (!prevIsSpan && (
  194. _field == Field(UFIELD_CATEGORY_LIST, ULISTFMT_ELEMENT_FIELD) ||
  195. i - fString.fZero == nextSpanStart)) {
  196. int64_t si = cfpos.getInt64IterationContext();
  197. if (si >= spanIndicesCount) {
  198. break;
  199. }
  200. UFieldCategory spanCategory = spanIndices[si].category;
  201. int32_t spanValue = spanIndices[si].spanValue;
  202. int32_t length = spanIndices[si].length;
  203. cfpos.setInt64IterationContext(si + 1);
  204. if (si + 1 < spanIndicesCount) {
  205. nextSpanStart = spanIndices[si + 1].start;
  206. }
  207. if (length == 0) {
  208. // ICU-21871: Don't return fields on empty spans
  209. i--;
  210. continue;
  211. }
  212. if (cfpos.matchesField(spanCategory, spanValue)) {
  213. fieldStart = i - fString.fZero;
  214. int32_t end = fieldStart + length;
  215. cfpos.setState(
  216. spanCategory,
  217. spanValue,
  218. fieldStart,
  219. end);
  220. return true;
  221. } else if (_field == Field(UFIELD_CATEGORY_LIST, ULISTFMT_ELEMENT_FIELD)) {
  222. // Special handling for ULISTFMT_ELEMENT_FIELD
  223. if (cfpos.matchesField(UFIELD_CATEGORY_LIST, ULISTFMT_ELEMENT_FIELD)) {
  224. fieldStart = i - fString.fZero;
  225. int32_t end = fieldStart + length;
  226. cfpos.setState(
  227. UFIELD_CATEGORY_LIST,
  228. ULISTFMT_ELEMENT_FIELD,
  229. fieldStart,
  230. end);
  231. return true;
  232. } else {
  233. // Failed to match; jump ahead
  234. i += length - 1;
  235. // goto loopend
  236. }
  237. }
  238. }
  239. // Special case: skip over INTEGER; will be coalesced later.
  240. else if (_field == Field(UFIELD_CATEGORY_NUMBER, UNUM_INTEGER_FIELD)) {
  241. _field = kUndefinedField;
  242. }
  243. // No field starting at this position.
  244. else if (_field.isUndefined() || _field == kEndField) {
  245. // goto loopend
  246. }
  247. // No SpanField
  248. else if (cfpos.matchesField(_field.getCategory(), _field.getField())) {
  249. fieldStart = i - fString.fZero;
  250. currField = _field;
  251. }
  252. // loopend:
  253. prevIsSpan = false;
  254. prevIsNumeric = false;
  255. prevIsInteger = false;
  256. }
  257. U_ASSERT(currField == kUndefinedField);
  258. // Always set the position to the end so that we don't revisit previous sections
  259. cfpos.setState(
  260. cfpos.getCategory(),
  261. cfpos.getField(),
  262. fString.fLength,
  263. fString.fLength);
  264. return false;
  265. }
  266. void FormattedValueStringBuilderImpl::appendSpanInfo(UFieldCategory category, int32_t spanValue, int32_t start, int32_t length, UErrorCode& status) {
  267. if (U_FAILURE(status)) { return; }
  268. U_ASSERT(spanIndices.getCapacity() >= spanIndicesCount);
  269. if (spanIndices.getCapacity() == spanIndicesCount) {
  270. if (!spanIndices.resize(spanIndicesCount * 2, spanIndicesCount)) {
  271. status = U_MEMORY_ALLOCATION_ERROR;
  272. return;
  273. }
  274. }
  275. spanIndices[spanIndicesCount] = {category, spanValue, start, length};
  276. spanIndicesCount++;
  277. }
  278. void FormattedValueStringBuilderImpl::prependSpanInfo(UFieldCategory category, int32_t spanValue, int32_t start, int32_t length, UErrorCode& status) {
  279. if (U_FAILURE(status)) { return; }
  280. U_ASSERT(spanIndices.getCapacity() >= spanIndicesCount);
  281. if (spanIndices.getCapacity() == spanIndicesCount) {
  282. if (!spanIndices.resize(spanIndicesCount * 2, spanIndicesCount)) {
  283. status = U_MEMORY_ALLOCATION_ERROR;
  284. return;
  285. }
  286. }
  287. for (int32_t i = spanIndicesCount - 1; i >= 0; i--) {
  288. spanIndices[i+1] = spanIndices[i];
  289. }
  290. spanIndices[0] = {category, spanValue, start, length};
  291. spanIndicesCount++;
  292. }
  293. bool FormattedValueStringBuilderImpl::isIntOrGroup(Field field) {
  294. return field == Field(UFIELD_CATEGORY_NUMBER, UNUM_INTEGER_FIELD)
  295. || field == Field(UFIELD_CATEGORY_NUMBER, UNUM_GROUPING_SEPARATOR_FIELD);
  296. }
  297. bool FormattedValueStringBuilderImpl::isTrimmable(Field field) {
  298. return field != Field(UFIELD_CATEGORY_NUMBER, UNUM_GROUPING_SEPARATOR_FIELD)
  299. && field.getCategory() != UFIELD_CATEGORY_LIST;
  300. }
  301. int32_t FormattedValueStringBuilderImpl::trimBack(int32_t limit) const {
  302. return unisets::get(unisets::DEFAULT_IGNORABLES)->spanBack(
  303. fString.getCharPtr() + fString.fZero,
  304. limit,
  305. USET_SPAN_CONTAINED);
  306. }
  307. int32_t FormattedValueStringBuilderImpl::trimFront(int32_t start) const {
  308. return start + unisets::get(unisets::DEFAULT_IGNORABLES)->span(
  309. fString.getCharPtr() + fString.fZero + start,
  310. fString.fLength - start,
  311. USET_SPAN_CONTAINED);
  312. }
  313. U_NAMESPACE_END
  314. #endif /* #if !UCONFIG_NO_FORMATTING */