simpleformatter.h 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339
  1. // © 2016 and later: Unicode, Inc. and others.
  2. // License & terms of use: http://www.unicode.org/copyright.html
  3. /*
  4. ******************************************************************************
  5. * Copyright (C) 2014-2016, International Business Machines
  6. * Corporation and others. All Rights Reserved.
  7. ******************************************************************************
  8. * simpleformatter.h
  9. */
  10. #ifndef __SIMPLEFORMATTER_H__
  11. #define __SIMPLEFORMATTER_H__
  12. /**
  13. * \file
  14. * \brief C++ API: Simple formatter, minimal subset of MessageFormat.
  15. */
  16. #include "unicode/utypes.h"
  17. #if U_SHOW_CPLUSPLUS_API
  18. #include "unicode/unistr.h"
  19. U_NAMESPACE_BEGIN
  20. // Forward declaration:
  21. namespace number::impl {
  22. class SimpleModifier;
  23. }
  24. /**
  25. * Formats simple patterns like "{1} was born in {0}".
  26. * Minimal subset of MessageFormat; fast, simple, minimal dependencies.
  27. * Supports only numbered arguments with no type nor style parameters,
  28. * and formats only string values.
  29. * Quoting via ASCII apostrophe compatible with ICU MessageFormat default behavior.
  30. *
  31. * Factory methods set error codes for syntax errors
  32. * and for too few or too many arguments/placeholders.
  33. *
  34. * SimpleFormatter objects are thread-safe except for assignment and applying new patterns.
  35. *
  36. * Example:
  37. * <pre>
  38. * UErrorCode errorCode = U_ZERO_ERROR;
  39. * SimpleFormatter fmt("{1} '{born}' in {0}", errorCode);
  40. * UnicodeString result;
  41. *
  42. * // Output: "paul {born} in england"
  43. * fmt.format("england", "paul", result, errorCode);
  44. * </pre>
  45. *
  46. * This class is not intended for public subclassing.
  47. *
  48. * @see MessageFormat
  49. * @see UMessagePatternApostropheMode
  50. * @stable ICU 57
  51. */
  52. class U_COMMON_API SimpleFormatter final : public UMemory {
  53. public:
  54. /**
  55. * Default constructor.
  56. * @stable ICU 57
  57. */
  58. SimpleFormatter() : compiledPattern((char16_t)0) {}
  59. /**
  60. * Constructs a formatter from the pattern string.
  61. *
  62. * @param pattern The pattern string.
  63. * @param errorCode ICU error code in/out parameter.
  64. * Must fulfill U_SUCCESS before the function call.
  65. * Set to U_ILLEGAL_ARGUMENT_ERROR for bad argument syntax.
  66. * @stable ICU 57
  67. */
  68. SimpleFormatter(const UnicodeString& pattern, UErrorCode &errorCode) {
  69. applyPattern(pattern, errorCode);
  70. }
  71. /**
  72. * Constructs a formatter from the pattern string.
  73. * The number of arguments checked against the given limits is the
  74. * highest argument number plus one, not the number of occurrences of arguments.
  75. *
  76. * @param pattern The pattern string.
  77. * @param min The pattern must have at least this many arguments.
  78. * @param max The pattern must have at most this many arguments.
  79. * @param errorCode ICU error code in/out parameter.
  80. * Must fulfill U_SUCCESS before the function call.
  81. * Set to U_ILLEGAL_ARGUMENT_ERROR for bad argument syntax and
  82. * too few or too many arguments.
  83. * @stable ICU 57
  84. */
  85. SimpleFormatter(const UnicodeString& pattern, int32_t min, int32_t max,
  86. UErrorCode &errorCode) {
  87. applyPatternMinMaxArguments(pattern, min, max, errorCode);
  88. }
  89. /**
  90. * Copy constructor.
  91. * @stable ICU 57
  92. */
  93. SimpleFormatter(const SimpleFormatter& other)
  94. : compiledPattern(other.compiledPattern) {}
  95. /**
  96. * Assignment operator.
  97. * @stable ICU 57
  98. */
  99. SimpleFormatter &operator=(const SimpleFormatter& other);
  100. /**
  101. * Destructor.
  102. * @stable ICU 57
  103. */
  104. ~SimpleFormatter();
  105. /**
  106. * Changes this object according to the new pattern.
  107. *
  108. * @param pattern The pattern string.
  109. * @param errorCode ICU error code in/out parameter.
  110. * Must fulfill U_SUCCESS before the function call.
  111. * Set to U_ILLEGAL_ARGUMENT_ERROR for bad argument syntax.
  112. * @return true if U_SUCCESS(errorCode).
  113. * @stable ICU 57
  114. */
  115. UBool applyPattern(const UnicodeString &pattern, UErrorCode &errorCode) {
  116. return applyPatternMinMaxArguments(pattern, 0, INT32_MAX, errorCode);
  117. }
  118. /**
  119. * Changes this object according to the new pattern.
  120. * The number of arguments checked against the given limits is the
  121. * highest argument number plus one, not the number of occurrences of arguments.
  122. *
  123. * @param pattern The pattern string.
  124. * @param min The pattern must have at least this many arguments.
  125. * @param max The pattern must have at most this many arguments.
  126. * @param errorCode ICU error code in/out parameter.
  127. * Must fulfill U_SUCCESS before the function call.
  128. * Set to U_ILLEGAL_ARGUMENT_ERROR for bad argument syntax and
  129. * too few or too many arguments.
  130. * @return true if U_SUCCESS(errorCode).
  131. * @stable ICU 57
  132. */
  133. UBool applyPatternMinMaxArguments(const UnicodeString &pattern,
  134. int32_t min, int32_t max, UErrorCode &errorCode);
  135. /**
  136. * @return The max argument number + 1.
  137. * @stable ICU 57
  138. */
  139. int32_t getArgumentLimit() const {
  140. return getArgumentLimit(compiledPattern.getBuffer(), compiledPattern.length());
  141. }
  142. /**
  143. * Formats the given value, appending to the appendTo builder.
  144. * The argument value must not be the same object as appendTo.
  145. * getArgumentLimit() must be at most 1.
  146. *
  147. * @param value0 Value for argument {0}.
  148. * @param appendTo Gets the formatted pattern and value appended.
  149. * @param errorCode ICU error code in/out parameter.
  150. * Must fulfill U_SUCCESS before the function call.
  151. * @return appendTo
  152. * @stable ICU 57
  153. */
  154. UnicodeString &format(
  155. const UnicodeString &value0,
  156. UnicodeString &appendTo, UErrorCode &errorCode) const;
  157. /**
  158. * Formats the given values, appending to the appendTo builder.
  159. * An argument value must not be the same object as appendTo.
  160. * getArgumentLimit() must be at most 2.
  161. *
  162. * @param value0 Value for argument {0}.
  163. * @param value1 Value for argument {1}.
  164. * @param appendTo Gets the formatted pattern and values appended.
  165. * @param errorCode ICU error code in/out parameter.
  166. * Must fulfill U_SUCCESS before the function call.
  167. * @return appendTo
  168. * @stable ICU 57
  169. */
  170. UnicodeString &format(
  171. const UnicodeString &value0,
  172. const UnicodeString &value1,
  173. UnicodeString &appendTo, UErrorCode &errorCode) const;
  174. /**
  175. * Formats the given values, appending to the appendTo builder.
  176. * An argument value must not be the same object as appendTo.
  177. * getArgumentLimit() must be at most 3.
  178. *
  179. * @param value0 Value for argument {0}.
  180. * @param value1 Value for argument {1}.
  181. * @param value2 Value for argument {2}.
  182. * @param appendTo Gets the formatted pattern and values appended.
  183. * @param errorCode ICU error code in/out parameter.
  184. * Must fulfill U_SUCCESS before the function call.
  185. * @return appendTo
  186. * @stable ICU 57
  187. */
  188. UnicodeString &format(
  189. const UnicodeString &value0,
  190. const UnicodeString &value1,
  191. const UnicodeString &value2,
  192. UnicodeString &appendTo, UErrorCode &errorCode) const;
  193. /**
  194. * Formats the given values, appending to the appendTo string.
  195. *
  196. * @param values The argument values.
  197. * An argument value must not be the same object as appendTo.
  198. * Can be nullptr if valuesLength==getArgumentLimit()==0.
  199. * @param valuesLength The length of the values array.
  200. * Must be at least getArgumentLimit().
  201. * @param appendTo Gets the formatted pattern and values appended.
  202. * @param offsets offsets[i] receives the offset of where
  203. * values[i] replaced pattern argument {i}.
  204. * Can be shorter or longer than values. Can be nullptr if offsetsLength==0.
  205. * If there is no {i} in the pattern, then offsets[i] is set to -1.
  206. * @param offsetsLength The length of the offsets array.
  207. * @param errorCode ICU error code in/out parameter.
  208. * Must fulfill U_SUCCESS before the function call.
  209. * @return appendTo
  210. * @stable ICU 57
  211. */
  212. UnicodeString &formatAndAppend(
  213. const UnicodeString *const *values, int32_t valuesLength,
  214. UnicodeString &appendTo,
  215. int32_t *offsets, int32_t offsetsLength, UErrorCode &errorCode) const;
  216. /**
  217. * Formats the given values, replacing the contents of the result string.
  218. * May optimize by actually appending to the result if it is the same object
  219. * as the value corresponding to the initial argument in the pattern.
  220. *
  221. * @param values The argument values.
  222. * An argument value may be the same object as result.
  223. * Can be nullptr if valuesLength==getArgumentLimit()==0.
  224. * @param valuesLength The length of the values array.
  225. * Must be at least getArgumentLimit().
  226. * @param result Gets its contents replaced by the formatted pattern and values.
  227. * @param offsets offsets[i] receives the offset of where
  228. * values[i] replaced pattern argument {i}.
  229. * Can be shorter or longer than values. Can be nullptr if offsetsLength==0.
  230. * If there is no {i} in the pattern, then offsets[i] is set to -1.
  231. * @param offsetsLength The length of the offsets array.
  232. * @param errorCode ICU error code in/out parameter.
  233. * Must fulfill U_SUCCESS before the function call.
  234. * @return result
  235. * @stable ICU 57
  236. */
  237. UnicodeString &formatAndReplace(
  238. const UnicodeString *const *values, int32_t valuesLength,
  239. UnicodeString &result,
  240. int32_t *offsets, int32_t offsetsLength, UErrorCode &errorCode) const;
  241. /**
  242. * Returns the pattern text with none of the arguments.
  243. * Like formatting with all-empty string values.
  244. * @stable ICU 57
  245. */
  246. UnicodeString getTextWithNoArguments() const {
  247. return getTextWithNoArguments(
  248. compiledPattern.getBuffer(),
  249. compiledPattern.length(),
  250. nullptr,
  251. 0);
  252. }
  253. #ifndef U_HIDE_INTERNAL_API
  254. /**
  255. * Returns the pattern text with none of the arguments.
  256. * Like formatting with all-empty string values.
  257. *
  258. * TODO(ICU-20406): Replace this with an Iterator interface.
  259. *
  260. * @param offsets offsets[i] receives the offset of where {i} was located
  261. * before it was replaced by an empty string.
  262. * For example, "a{0}b{1}" produces offset 1 for i=0 and 2 for i=1.
  263. * Can be nullptr if offsetsLength==0.
  264. * If there is no {i} in the pattern, then offsets[i] is set to -1.
  265. * @param offsetsLength The length of the offsets array.
  266. *
  267. * @internal
  268. */
  269. UnicodeString getTextWithNoArguments(int32_t *offsets, int32_t offsetsLength) const {
  270. return getTextWithNoArguments(
  271. compiledPattern.getBuffer(),
  272. compiledPattern.length(),
  273. offsets,
  274. offsetsLength);
  275. }
  276. #endif // U_HIDE_INTERNAL_API
  277. private:
  278. /**
  279. * Binary representation of the compiled pattern.
  280. * Index 0: One more than the highest argument number.
  281. * Followed by zero or more arguments or literal-text segments.
  282. *
  283. * An argument is stored as its number, less than ARG_NUM_LIMIT.
  284. * A literal-text segment is stored as its length (at least 1) offset by ARG_NUM_LIMIT,
  285. * followed by that many chars.
  286. */
  287. UnicodeString compiledPattern;
  288. static inline int32_t getArgumentLimit(const char16_t *compiledPattern,
  289. int32_t compiledPatternLength) {
  290. return compiledPatternLength == 0 ? 0 : compiledPattern[0];
  291. }
  292. static UnicodeString getTextWithNoArguments(
  293. const char16_t *compiledPattern,
  294. int32_t compiledPatternLength,
  295. int32_t *offsets,
  296. int32_t offsetsLength);
  297. static UnicodeString &format(
  298. const char16_t *compiledPattern, int32_t compiledPatternLength,
  299. const UnicodeString *const *values,
  300. UnicodeString &result, const UnicodeString *resultCopy, UBool forbidResultAsValue,
  301. int32_t *offsets, int32_t offsetsLength,
  302. UErrorCode &errorCode);
  303. // Give access to internals to SimpleModifier for number formatting
  304. friend class number::impl::SimpleModifier;
  305. };
  306. U_NAMESPACE_END
  307. #endif /* U_SHOW_CPLUSPLUS_API */
  308. #endif // __SIMPLEFORMATTER_H__