messageformat2.h 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399
  1. // © 2024 and later: Unicode, Inc. and others.
  2. // License & terms of use: http://www.unicode.org/copyright.html
  3. #include "unicode/utypes.h"
  4. #ifndef MESSAGEFORMAT2_H
  5. #define MESSAGEFORMAT2_H
  6. #if U_SHOW_CPLUSPLUS_API
  7. #if !UCONFIG_NO_FORMATTING
  8. #if !UCONFIG_NO_MF2
  9. /**
  10. * \file
  11. * \brief C++ API: Formats messages using the draft MessageFormat 2.0.
  12. */
  13. #include "unicode/messageformat2_arguments.h"
  14. #include "unicode/messageformat2_data_model.h"
  15. #include "unicode/messageformat2_function_registry.h"
  16. #include "unicode/unistr.h"
  17. #ifndef U_HIDE_DEPRECATED_API
  18. U_NAMESPACE_BEGIN
  19. namespace message2 {
  20. class Environment;
  21. class MessageContext;
  22. class ResolvedSelector;
  23. class StaticErrors;
  24. /**
  25. * <p>MessageFormatter is a Technical Preview API implementing MessageFormat 2.0.
  26. *
  27. * <p>See <a target="github" href="https://github.com/unicode-org/message-format-wg/blob/main/spec/syntax.md">the
  28. * description of the syntax with examples and use cases</a> and the corresponding
  29. * <a target="github" href="https://github.com/unicode-org/message-format-wg/blob/main/spec/message.abnf">ABNF</a> grammar.</p>
  30. *
  31. * The MessageFormatter class is mutable and movable. It is not copyable.
  32. * (It is mutable because if it has a custom function registry, the registry may include
  33. * `FormatterFactory` objects implementing custom formatters, which are allowed to contain
  34. * mutable state.)
  35. *
  36. * @internal ICU 75 technology preview
  37. * @deprecated This API is for technology preview only.
  38. */
  39. class U_I18N_API MessageFormatter : public UObject {
  40. // Note: This class does not currently inherit from the existing
  41. // `Format` class.
  42. public:
  43. /**
  44. * Move assignment operator:
  45. * The source MessageFormatter will be left in a valid but undefined state.
  46. *
  47. * @internal ICU 75 technology preview
  48. * @deprecated This API is for technology preview only.
  49. */
  50. MessageFormatter& operator=(MessageFormatter&&) noexcept;
  51. /**
  52. * Destructor.
  53. *
  54. * @internal ICU 75 technology preview
  55. * @deprecated This API is for technology preview only.
  56. */
  57. virtual ~MessageFormatter();
  58. /**
  59. * Formats the message to a string, using the data model that was previously set or parsed,
  60. * and the given `arguments` object.
  61. *
  62. * @param arguments Reference to message arguments
  63. * @param status Input/output error code used to indicate syntax errors, data model
  64. * errors, resolution errors, formatting errors, selection errors, as well
  65. * as other errors (such as memory allocation failures). Partial output
  66. * is still provided in the presence of most error types.
  67. * @return The string result of formatting the message with the given arguments.
  68. *
  69. * @internal ICU 75 technology preview
  70. * @deprecated This API is for technology preview only.
  71. */
  72. UnicodeString formatToString(const MessageArguments& arguments, UErrorCode &status);
  73. /**
  74. * Not yet implemented; formats the message to a `FormattedMessage` object,
  75. * using the data model that was previously set or parsed,
  76. * and the given `arguments` object.
  77. *
  78. * @param arguments Reference to message arguments
  79. * @param status Input/output error code used to indicate syntax errors, data model
  80. * errors, resolution errors, formatting errors, selection errors, as well
  81. * as other errors (such as memory allocation failures). Partial output
  82. * is still provided in the presence of most error types.
  83. * @return The `FormattedMessage` representing the formatted message.
  84. *
  85. * @internal ICU 75 technology preview
  86. * @deprecated This API is for technology preview only.
  87. */
  88. FormattedMessage format(const MessageArguments& arguments, UErrorCode &status) const {
  89. (void) arguments;
  90. if (U_SUCCESS(status)) {
  91. status = U_UNSUPPORTED_ERROR;
  92. }
  93. return FormattedMessage(status);
  94. }
  95. /**
  96. * Accesses the locale that this `MessageFormatter` object was created with.
  97. *
  98. * @return A reference to the locale.
  99. *
  100. * @internal ICU 75 technology preview
  101. * @deprecated This API is for technology preview only.
  102. */
  103. const Locale& getLocale() const { return locale; }
  104. /**
  105. * Serializes the data model as a string in MessageFormat 2.0 syntax.
  106. *
  107. * @return result A string representation of the data model.
  108. * The string is a valid MessageFormat 2.0 message.
  109. *
  110. * @internal ICU 75 technology preview
  111. * @deprecated This API is for technology preview only.
  112. */
  113. UnicodeString getPattern() const;
  114. /**
  115. * Accesses the data model referred to by this
  116. * `MessageFormatter` object.
  117. *
  118. * @return A reference to the data model.
  119. *
  120. * @internal ICU 75 technology preview
  121. * @deprecated This API is for technology preview only.
  122. */
  123. const MFDataModel& getDataModel() const;
  124. /**
  125. * The mutable Builder class allows each part of the MessageFormatter to be initialized
  126. * separately; calling its `build()` method yields an immutable MessageFormatter.
  127. *
  128. * Not copyable or movable.
  129. */
  130. class U_I18N_API Builder : public UObject {
  131. private:
  132. friend class MessageFormatter;
  133. // The pattern to be parsed to generate the formatted message
  134. UnicodeString pattern;
  135. bool hasPattern = false;
  136. bool hasDataModel = false;
  137. // The data model to be used to generate the formatted message
  138. // Initialized either by `setDataModel()`, or by the parser
  139. // through a call to `setPattern()`
  140. MFDataModel dataModel;
  141. // Normalized representation of the pattern;
  142. // ignored if `setPattern()` wasn't called
  143. UnicodeString normalizedInput;
  144. // Errors (internal representation of parse errors)
  145. // Ignored if `setPattern()` wasn't called
  146. StaticErrors* errors;
  147. Locale locale;
  148. // Not owned
  149. const MFFunctionRegistry* customMFFunctionRegistry;
  150. public:
  151. /**
  152. * Sets the locale to use for formatting.
  153. *
  154. * @param locale The desired locale.
  155. * @return A reference to the builder.
  156. *
  157. * @internal ICU 75 technology preview
  158. * @deprecated This API is for technology preview only.
  159. */
  160. Builder& setLocale(const Locale& locale);
  161. /**
  162. * Sets the pattern (contents of the message) and parses it
  163. * into a data model. If a data model was
  164. * previously set, it is removed.
  165. *
  166. * @param pattern A string in MessageFormat 2.0 syntax.
  167. * @param parseError Struct to receive information on the position
  168. * of an error within the pattern.
  169. * @param status Input/output error code. If the
  170. * pattern cannot be parsed, set to failure code.
  171. * @return A reference to the builder.
  172. *
  173. * @internal ICU 75 technology preview
  174. * @deprecated This API is for technology preview only.
  175. */
  176. Builder& setPattern(const UnicodeString& pattern, UParseError& parseError, UErrorCode& status);
  177. /**
  178. * Sets a custom function registry.
  179. *
  180. * @param functionRegistry Reference to the function registry to use.
  181. * `functionRegistry` is not copied,
  182. * and the caller must ensure its lifetime contains
  183. * the lifetime of the `MessageFormatter` object built by this
  184. * builder.
  185. * @return A reference to the builder.
  186. *
  187. * @internal ICU 75 technology preview
  188. * @deprecated This API is for technology preview only.
  189. */
  190. Builder& setFunctionRegistry(const MFFunctionRegistry& functionRegistry);
  191. /**
  192. * Sets a data model. If a pattern was previously set, it is removed.
  193. *
  194. * @param dataModel Data model to format. Passed by move.
  195. * @return A reference to the builder.
  196. *
  197. * @internal ICU 75 technology preview
  198. * @deprecated This API is for technology preview only.
  199. */
  200. Builder& setDataModel(MFDataModel&& dataModel);
  201. /**
  202. * Constructs a new immutable MessageFormatter using the pattern or data model
  203. * that was previously set, and the locale (if it was previously set)
  204. * or default locale (otherwise).
  205. *
  206. * The builder object (`this`) can still be used after calling `build()`.
  207. *
  208. * @param status Input/output error code. If neither the pattern
  209. * nor the data model is set, set to failure code.
  210. * @return The new MessageFormatter object
  211. *
  212. * @internal ICU 75 technology preview
  213. * @deprecated This API is for technology preview only.
  214. */
  215. MessageFormatter build(UErrorCode& status) const;
  216. /**
  217. * Default constructor.
  218. * Returns a Builder with the default locale and with no
  219. * data model or pattern set. Either `setPattern()`
  220. * or `setDataModel()` has to be called before calling `build()`.
  221. *
  222. * @param status Input/output error code.
  223. *
  224. * @internal ICU 75 technology preview
  225. * @deprecated This API is for technology preview only.
  226. */
  227. Builder(UErrorCode& status);
  228. /**
  229. * Destructor.
  230. *
  231. * @internal ICU 75 technology preview
  232. * @deprecated This API is for technology preview only.
  233. */
  234. virtual ~Builder();
  235. }; // class MessageFormatter::Builder
  236. // TODO: Shouldn't be public; only used for testing
  237. /**
  238. * Returns a string consisting of the input with optional spaces removed.
  239. *
  240. * @return A normalized string representation of the input
  241. *
  242. * @internal ICU 75 technology preview
  243. * @deprecated This API is for technology preview only.
  244. */
  245. const UnicodeString& getNormalizedPattern() const { return normalizedInput; }
  246. private:
  247. friend class Builder;
  248. friend class MessageContext;
  249. MessageFormatter(const MessageFormatter::Builder& builder, UErrorCode &status);
  250. MessageFormatter() = delete; // default constructor not implemented
  251. // Do not define default assignment operator
  252. const MessageFormatter &operator=(const MessageFormatter &) = delete;
  253. ResolvedSelector resolveVariables(const Environment& env, const data_model::Operand&, MessageContext&, UErrorCode &) const;
  254. ResolvedSelector resolveVariables(const Environment& env, const data_model::Expression&, MessageContext&, UErrorCode &) const;
  255. // Selection methods
  256. // Takes a vector of FormattedPlaceholders
  257. void resolveSelectors(MessageContext&, const Environment& env, UErrorCode&, UVector&) const;
  258. // Takes a vector of vectors of strings (input) and a vector of PrioritizedVariants (output)
  259. void filterVariants(const UVector&, UVector&, UErrorCode&) const;
  260. // Takes a vector of vectors of strings (input) and a vector of PrioritizedVariants (input/output)
  261. void sortVariants(const UVector&, UVector&, UErrorCode&) const;
  262. // Takes a vector of strings (input) and a vector of strings (output)
  263. void matchSelectorKeys(const UVector&, MessageContext&, ResolvedSelector&& rv, UVector&, UErrorCode&) const;
  264. // Takes a vector of FormattedPlaceholders (input),
  265. // and a vector of vectors of strings (output)
  266. void resolvePreferences(MessageContext&, UVector&, UVector&, UErrorCode&) const;
  267. // Formatting methods
  268. [[nodiscard]] FormattedPlaceholder formatLiteral(const data_model::Literal&) const;
  269. void formatPattern(MessageContext&, const Environment&, const data_model::Pattern&, UErrorCode&, UnicodeString&) const;
  270. // Formats a call to a formatting function
  271. // Dispatches on argument type
  272. [[nodiscard]] FormattedPlaceholder evalFormatterCall(FormattedPlaceholder&& argument,
  273. MessageContext& context,
  274. UErrorCode& status) const;
  275. // Dispatches on function name
  276. [[nodiscard]] FormattedPlaceholder evalFormatterCall(const FunctionName& functionName,
  277. FormattedPlaceholder&& argument,
  278. FunctionOptions&& options,
  279. MessageContext& context,
  280. UErrorCode& status) const;
  281. // Formats an expression that appears as a selector
  282. ResolvedSelector formatSelectorExpression(const Environment& env, const data_model::Expression&, MessageContext&, UErrorCode&) const;
  283. // Formats an expression that appears in a pattern or as the definition of a local variable
  284. [[nodiscard]] FormattedPlaceholder formatExpression(const Environment&, const data_model::Expression&, MessageContext&, UErrorCode&) const;
  285. [[nodiscard]] FunctionOptions resolveOptions(const Environment& env, const OptionMap&, MessageContext&, UErrorCode&) const;
  286. [[nodiscard]] FormattedPlaceholder formatOperand(const Environment&, const data_model::Operand&, MessageContext&, UErrorCode&) const;
  287. [[nodiscard]] FormattedPlaceholder evalArgument(const data_model::VariableName&, MessageContext&, UErrorCode&) const;
  288. void formatSelectors(MessageContext& context, const Environment& env, UErrorCode &status, UnicodeString& result) const;
  289. // Function registry methods
  290. bool hasCustomMFFunctionRegistry() const {
  291. return (customMFFunctionRegistry != nullptr);
  292. }
  293. // Precondition: custom function registry exists
  294. // Note: this is non-const because the values in the MFFunctionRegistry are mutable
  295. // (a FormatterFactory can have mutable state)
  296. const MFFunctionRegistry& getCustomMFFunctionRegistry() const;
  297. bool isCustomFormatter(const FunctionName&) const;
  298. FormatterFactory* lookupFormatterFactory(const FunctionName&, UErrorCode& status) const;
  299. bool isBuiltInSelector(const FunctionName&) const;
  300. bool isBuiltInFormatter(const FunctionName&) const;
  301. bool isCustomSelector(const FunctionName&) const;
  302. const SelectorFactory* lookupSelectorFactory(MessageContext&, const FunctionName&, UErrorCode&) const;
  303. bool isSelector(const FunctionName& fn) const { return isBuiltInSelector(fn) || isCustomSelector(fn); }
  304. bool isFormatter(const FunctionName& fn) const { return isBuiltInFormatter(fn) || isCustomFormatter(fn); }
  305. const Formatter* lookupFormatter(const FunctionName&, UErrorCode&) const;
  306. Selector* getSelector(MessageContext&, const FunctionName&, UErrorCode&) const;
  307. Formatter* getFormatter(const FunctionName&, UErrorCode&) const;
  308. bool getDefaultFormatterNameByType(const UnicodeString&, FunctionName&) const;
  309. // Checking for resolution errors
  310. void checkDeclarations(MessageContext&, Environment*&, UErrorCode&) const;
  311. void check(MessageContext&, const Environment&, const data_model::Expression&, UErrorCode&) const;
  312. void check(MessageContext&, const Environment&, const data_model::Operand&, UErrorCode&) const;
  313. void check(MessageContext&, const Environment&, const OptionMap&, UErrorCode&) const;
  314. void initErrors(UErrorCode&);
  315. void clearErrors() const;
  316. void cleanup() noexcept;
  317. // The locale this MessageFormatter was created with
  318. /* const */ Locale locale;
  319. // Registry for built-in functions
  320. MFFunctionRegistry standardMFFunctionRegistry;
  321. // Registry for custom functions; may be null if no custom registry supplied
  322. // Note: this is *not* owned by the MessageFormatter object
  323. // The reason for this choice is to have a non-destructive MessageFormatter::Builder,
  324. // while also not requiring the function registry to be deeply-copyable. Making the
  325. // function registry copyable would impose a requirement on any implementations
  326. // of the FormatterFactory and SelectorFactory interfaces to implement a custom
  327. // clone() method, which is necessary to avoid sharing between copies of the
  328. // function registry (and thus double-frees)
  329. // Not deeply immutable (the values in the function registry are mutable,
  330. // as a FormatterFactory can have mutable state
  331. const MFFunctionRegistry* customMFFunctionRegistry;
  332. // Data model, representing the parsed message
  333. MFDataModel dataModel;
  334. // Normalized version of the input string (optional whitespace removed)
  335. UnicodeString normalizedInput;
  336. // Errors -- only used while parsing and checking for data model errors; then
  337. // the MessageContext keeps track of errors
  338. // Must be a raw pointer to avoid including the internal header file
  339. // defining StaticErrors
  340. // Owned by `this`
  341. StaticErrors* errors;
  342. }; // class MessageFormatter
  343. } // namespace message2
  344. U_NAMESPACE_END
  345. #endif // U_HIDE_DEPRECATED_API
  346. #endif /* #if !UCONFIG_NO_MF2 */
  347. #endif /* #if !UCONFIG_NO_FORMATTING */
  348. #endif /* U_SHOW_CPLUSPLUS_API */
  349. #endif // MESSAGEFORMAT2_H
  350. // eof