messageformat2.h 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464
  1. // © 2024 and later: Unicode, Inc. and others.
  2. // License & terms of use: http://www.unicode.org/copyright.html
  3. #include "unicode/utypes.h"
  4. #ifndef MESSAGEFORMAT2_H
  5. #define MESSAGEFORMAT2_H
  6. #if U_SHOW_CPLUSPLUS_API
  7. #if !UCONFIG_NO_FORMATTING
  8. #if !UCONFIG_NO_MF2
  9. /**
  10. * \file
  11. * \brief C++ API: Formats messages using the draft MessageFormat 2.0.
  12. */
  13. #include "unicode/messageformat2_arguments.h"
  14. #include "unicode/messageformat2_data_model.h"
  15. #include "unicode/messageformat2_function_registry.h"
  16. #include "unicode/unistr.h"
  17. #ifndef U_HIDE_DEPRECATED_API
  18. U_NAMESPACE_BEGIN
  19. namespace message2 {
  20. class Environment;
  21. class MessageContext;
  22. class ResolvedSelector;
  23. class StaticErrors;
  24. /**
  25. * <p>MessageFormatter is a Technical Preview API implementing MessageFormat 2.0.
  26. *
  27. * <p>See <a target="github" href="https://github.com/unicode-org/message-format-wg/blob/main/spec/syntax.md">the
  28. * description of the syntax with examples and use cases</a> and the corresponding
  29. * <a target="github" href="https://github.com/unicode-org/message-format-wg/blob/main/spec/message.abnf">ABNF</a> grammar.</p>
  30. *
  31. * The MessageFormatter class is mutable and movable. It is not copyable.
  32. * (It is mutable because if it has a custom function registry, the registry may include
  33. * `FormatterFactory` objects implementing custom formatters, which are allowed to contain
  34. * mutable state.)
  35. *
  36. * @internal ICU 75 technology preview
  37. * @deprecated This API is for technology preview only.
  38. */
  39. class U_I18N_API MessageFormatter : public UObject {
  40. // Note: This class does not currently inherit from the existing
  41. // `Format` class.
  42. public:
  43. /**
  44. * Move assignment operator:
  45. * The source MessageFormatter will be left in a valid but undefined state.
  46. *
  47. * @internal ICU 75 technology preview
  48. * @deprecated This API is for technology preview only.
  49. */
  50. MessageFormatter& operator=(MessageFormatter&&) noexcept;
  51. /**
  52. * Destructor.
  53. *
  54. * @internal ICU 75 technology preview
  55. * @deprecated This API is for technology preview only.
  56. */
  57. virtual ~MessageFormatter();
  58. /**
  59. * Formats the message to a string, using the data model that was previously set or parsed,
  60. * and the given `arguments` object.
  61. *
  62. * @param arguments Reference to message arguments
  63. * @param status Input/output error code used to indicate syntax errors, data model
  64. * errors, resolution errors, formatting errors, selection errors, as well
  65. * as other errors (such as memory allocation failures). Partial output
  66. * is still provided in the presence of most error types.
  67. * @return The string result of formatting the message with the given arguments.
  68. *
  69. * @internal ICU 75 technology preview
  70. * @deprecated This API is for technology preview only.
  71. */
  72. UnicodeString formatToString(const MessageArguments& arguments, UErrorCode &status);
  73. /**
  74. * Not yet implemented; formats the message to a `FormattedMessage` object,
  75. * using the data model that was previously set or parsed,
  76. * and the given `arguments` object.
  77. *
  78. * @param arguments Reference to message arguments
  79. * @param status Input/output error code used to indicate syntax errors, data model
  80. * errors, resolution errors, formatting errors, selection errors, as well
  81. * as other errors (such as memory allocation failures). Partial output
  82. * is still provided in the presence of most error types.
  83. * @return The `FormattedMessage` representing the formatted message.
  84. *
  85. * @internal ICU 75 technology preview
  86. * @deprecated This API is for technology preview only.
  87. */
  88. FormattedMessage format(const MessageArguments& arguments, UErrorCode &status) const {
  89. (void) arguments;
  90. if (U_SUCCESS(status)) {
  91. status = U_UNSUPPORTED_ERROR;
  92. }
  93. return FormattedMessage(status);
  94. }
  95. /**
  96. * Accesses the locale that this `MessageFormatter` object was created with.
  97. *
  98. * @return A reference to the locale.
  99. *
  100. * @internal ICU 75 technology preview
  101. * @deprecated This API is for technology preview only.
  102. */
  103. const Locale& getLocale() const { return locale; }
  104. /**
  105. * Serializes the data model as a string in MessageFormat 2.0 syntax.
  106. *
  107. * @return result A string representation of the data model.
  108. * The string is a valid MessageFormat 2.0 message.
  109. *
  110. * @internal ICU 75 technology preview
  111. * @deprecated This API is for technology preview only.
  112. */
  113. UnicodeString getPattern() const;
  114. /**
  115. * Accesses the data model referred to by this
  116. * `MessageFormatter` object.
  117. *
  118. * @return A reference to the data model.
  119. *
  120. * @internal ICU 75 technology preview
  121. * @deprecated This API is for technology preview only.
  122. */
  123. const MFDataModel& getDataModel() const;
  124. /**
  125. * Used in conjunction with the
  126. * MessageFormatter::Builder::setErrorHandlingBehavior() method.
  127. *
  128. * @internal ICU 76 technology preview
  129. * @deprecated This API is for technology preview only.
  130. */
  131. typedef enum UMFErrorHandlingBehavior {
  132. /**
  133. * Suppress errors and return best-effort output.
  134. *
  135. * @internal ICU 76 technology preview
  136. * @deprecated This API is for technology preview only.
  137. */
  138. U_MF_BEST_EFFORT = 0,
  139. /**
  140. * Signal all MessageFormat errors using the UErrorCode
  141. * argument.
  142. *
  143. * @internal ICU 76 technology preview
  144. * @deprecated This API is for technology preview only.
  145. */
  146. U_MF_STRICT
  147. } UMFErrorHandlingBehavior;
  148. /**
  149. * The mutable Builder class allows each part of the MessageFormatter to be initialized
  150. * separately; calling its `build()` method yields an immutable MessageFormatter.
  151. *
  152. * Not copyable or movable.
  153. */
  154. class U_I18N_API Builder : public UObject {
  155. private:
  156. friend class MessageFormatter;
  157. // The pattern to be parsed to generate the formatted message
  158. UnicodeString pattern;
  159. bool hasPattern = false;
  160. bool hasDataModel = false;
  161. // The data model to be used to generate the formatted message
  162. // Initialized either by `setDataModel()`, or by the parser
  163. // through a call to `setPattern()`
  164. MFDataModel dataModel;
  165. // Normalized representation of the pattern;
  166. // ignored if `setPattern()` wasn't called
  167. UnicodeString normalizedInput;
  168. // Errors (internal representation of parse errors)
  169. // Ignored if `setPattern()` wasn't called
  170. StaticErrors* errors;
  171. Locale locale;
  172. // Not owned
  173. const MFFunctionRegistry* customMFFunctionRegistry;
  174. // Error behavior; see comment in `MessageFormatter` class
  175. bool signalErrors = false;
  176. void clearState();
  177. public:
  178. /**
  179. * Sets the locale to use for formatting.
  180. *
  181. * @param locale The desired locale.
  182. * @return A reference to the builder.
  183. *
  184. * @internal ICU 75 technology preview
  185. * @deprecated This API is for technology preview only.
  186. */
  187. Builder& setLocale(const Locale& locale);
  188. /**
  189. * Sets the pattern (contents of the message) and parses it
  190. * into a data model. If a data model was
  191. * previously set, it is removed.
  192. *
  193. * @param pattern A string in MessageFormat 2.0 syntax.
  194. * @param parseError Struct to receive information on the position
  195. * of an error within the pattern.
  196. * @param status Input/output error code. If the
  197. * pattern cannot be parsed, set to failure code.
  198. * @return A reference to the builder.
  199. *
  200. * @internal ICU 75 technology preview
  201. * @deprecated This API is for technology preview only.
  202. */
  203. Builder& setPattern(const UnicodeString& pattern, UParseError& parseError, UErrorCode& status);
  204. /**
  205. * Sets a custom function registry.
  206. *
  207. * @param functionRegistry Reference to the function registry to use.
  208. * `functionRegistry` is not copied,
  209. * and the caller must ensure its lifetime contains
  210. * the lifetime of the `MessageFormatter` object built by this
  211. * builder.
  212. * @return A reference to the builder.
  213. *
  214. * @internal ICU 75 technology preview
  215. * @deprecated This API is for technology preview only.
  216. */
  217. Builder& setFunctionRegistry(const MFFunctionRegistry& functionRegistry);
  218. /**
  219. * Sets a data model. If a pattern was previously set, it is removed.
  220. *
  221. * @param dataModel Data model to format. Passed by move.
  222. * @return A reference to the builder.
  223. *
  224. * @internal ICU 75 technology preview
  225. * @deprecated This API is for technology preview only.
  226. */
  227. Builder& setDataModel(MFDataModel&& dataModel);
  228. /**
  229. * Set the error handling behavior for this formatter.
  230. *
  231. * "Strict" error behavior means that that formatting methods
  232. * will set their UErrorCode arguments to signal MessageFormat
  233. * data model, resolution, and runtime errors. Syntax errors are
  234. * always signaled.
  235. *
  236. * "Best effort" error behavior means that MessageFormat errors are
  237. * suppressed: formatting methods will _not_ set their
  238. * UErrorCode arguments to signal MessageFormat data model,
  239. * resolution, or runtime errors. Best-effort output
  240. * will be returned. Syntax errors are always signaled.
  241. * This is the default behavior.
  242. *
  243. * @param type An enum with type UMFErrorHandlingBehavior;
  244. * if type == `U_MF_STRICT`, then
  245. * errors are handled strictly.
  246. * If type == `U_MF_BEST_EFFORT`, then
  247. * best-effort output is returned.
  248. *
  249. * The default is to suppress all MessageFormat errors
  250. * and return best-effort output.
  251. *
  252. * @return A reference to the builder.
  253. *
  254. * @internal ICU 76 technology preview
  255. * @deprecated This API is for technology preview only.
  256. */
  257. Builder& setErrorHandlingBehavior(UMFErrorHandlingBehavior type);
  258. /**
  259. * Constructs a new immutable MessageFormatter using the pattern or data model
  260. * that was previously set, and the locale (if it was previously set)
  261. * or default locale (otherwise).
  262. *
  263. * The builder object (`this`) can still be used after calling `build()`.
  264. *
  265. * @param status Input/output error code. If neither the pattern
  266. * nor the data model is set, set to failure code.
  267. * @return The new MessageFormatter object
  268. *
  269. * @internal ICU 75 technology preview
  270. * @deprecated This API is for technology preview only.
  271. */
  272. MessageFormatter build(UErrorCode& status) const;
  273. /**
  274. * Default constructor.
  275. * Returns a Builder with the default locale and with no
  276. * data model or pattern set. Either `setPattern()`
  277. * or `setDataModel()` has to be called before calling `build()`.
  278. *
  279. * @param status Input/output error code.
  280. *
  281. * @internal ICU 75 technology preview
  282. * @deprecated This API is for technology preview only.
  283. */
  284. Builder(UErrorCode& status);
  285. /**
  286. * Destructor.
  287. *
  288. * @internal ICU 75 technology preview
  289. * @deprecated This API is for technology preview only.
  290. */
  291. virtual ~Builder();
  292. }; // class MessageFormatter::Builder
  293. // TODO: Shouldn't be public; only used for testing
  294. /**
  295. * Returns a string consisting of the input with optional spaces removed.
  296. *
  297. * @return A normalized string representation of the input
  298. *
  299. * @internal ICU 75 technology preview
  300. * @deprecated This API is for technology preview only.
  301. */
  302. const UnicodeString& getNormalizedPattern() const { return normalizedInput; }
  303. private:
  304. friend class Builder;
  305. friend class MessageContext;
  306. MessageFormatter(const MessageFormatter::Builder& builder, UErrorCode &status);
  307. MessageFormatter() = delete; // default constructor not implemented
  308. // Do not define default assignment operator
  309. const MessageFormatter &operator=(const MessageFormatter &) = delete;
  310. ResolvedSelector resolveVariables(const Environment& env, const data_model::Operand&, MessageContext&, UErrorCode &) const;
  311. ResolvedSelector resolveVariables(const Environment& env, const data_model::Expression&, MessageContext&, UErrorCode &) const;
  312. // Selection methods
  313. // Takes a vector of FormattedPlaceholders
  314. void resolveSelectors(MessageContext&, const Environment& env, UErrorCode&, UVector&) const;
  315. // Takes a vector of vectors of strings (input) and a vector of PrioritizedVariants (output)
  316. void filterVariants(const UVector&, UVector&, UErrorCode&) const;
  317. // Takes a vector of vectors of strings (input) and a vector of PrioritizedVariants (input/output)
  318. void sortVariants(const UVector&, UVector&, UErrorCode&) const;
  319. // Takes a vector of strings (input) and a vector of strings (output)
  320. void matchSelectorKeys(const UVector&, MessageContext&, ResolvedSelector&& rv, UVector&, UErrorCode&) const;
  321. // Takes a vector of FormattedPlaceholders (input),
  322. // and a vector of vectors of strings (output)
  323. void resolvePreferences(MessageContext&, UVector&, UVector&, UErrorCode&) const;
  324. // Formatting methods
  325. [[nodiscard]] FormattedPlaceholder formatLiteral(const data_model::Literal&) const;
  326. void formatPattern(MessageContext&, const Environment&, const data_model::Pattern&, UErrorCode&, UnicodeString&) const;
  327. // Formats a call to a formatting function
  328. // Dispatches on argument type
  329. [[nodiscard]] FormattedPlaceholder evalFormatterCall(FormattedPlaceholder&& argument,
  330. MessageContext& context,
  331. UErrorCode& status) const;
  332. // Dispatches on function name
  333. [[nodiscard]] FormattedPlaceholder evalFormatterCall(const FunctionName& functionName,
  334. FormattedPlaceholder&& argument,
  335. FunctionOptions&& options,
  336. MessageContext& context,
  337. UErrorCode& status) const;
  338. // Formats an expression that appears as a selector
  339. ResolvedSelector formatSelectorExpression(const Environment& env, const data_model::Expression&, MessageContext&, UErrorCode&) const;
  340. // Formats an expression that appears in a pattern or as the definition of a local variable
  341. [[nodiscard]] FormattedPlaceholder formatExpression(const Environment&, const data_model::Expression&, MessageContext&, UErrorCode&) const;
  342. [[nodiscard]] FunctionOptions resolveOptions(const Environment& env, const OptionMap&, MessageContext&, UErrorCode&) const;
  343. [[nodiscard]] FormattedPlaceholder formatOperand(const Environment&, const data_model::Operand&, MessageContext&, UErrorCode&) const;
  344. [[nodiscard]] FormattedPlaceholder evalArgument(const data_model::VariableName&, MessageContext&, UErrorCode&) const;
  345. void formatSelectors(MessageContext& context, const Environment& env, UErrorCode &status, UnicodeString& result) const;
  346. // Function registry methods
  347. bool hasCustomMFFunctionRegistry() const {
  348. return (customMFFunctionRegistry != nullptr);
  349. }
  350. // Precondition: custom function registry exists
  351. // Note: this is non-const because the values in the MFFunctionRegistry are mutable
  352. // (a FormatterFactory can have mutable state)
  353. const MFFunctionRegistry& getCustomMFFunctionRegistry() const;
  354. bool isCustomFormatter(const FunctionName&) const;
  355. FormatterFactory* lookupFormatterFactory(const FunctionName&, UErrorCode& status) const;
  356. bool isBuiltInSelector(const FunctionName&) const;
  357. bool isBuiltInFormatter(const FunctionName&) const;
  358. bool isCustomSelector(const FunctionName&) const;
  359. const SelectorFactory* lookupSelectorFactory(MessageContext&, const FunctionName&, UErrorCode&) const;
  360. bool isSelector(const FunctionName& fn) const { return isBuiltInSelector(fn) || isCustomSelector(fn); }
  361. bool isFormatter(const FunctionName& fn) const { return isBuiltInFormatter(fn) || isCustomFormatter(fn); }
  362. const Formatter* lookupFormatter(const FunctionName&, UErrorCode&) const;
  363. Selector* getSelector(MessageContext&, const FunctionName&, UErrorCode&) const;
  364. Formatter* getFormatter(const FunctionName&, UErrorCode&) const;
  365. bool getDefaultFormatterNameByType(const UnicodeString&, FunctionName&) const;
  366. // Checking for resolution errors
  367. void checkDeclarations(MessageContext&, Environment*&, UErrorCode&) const;
  368. void check(MessageContext&, const Environment&, const data_model::Expression&, UErrorCode&) const;
  369. void check(MessageContext&, const Environment&, const data_model::Operand&, UErrorCode&) const;
  370. void check(MessageContext&, const Environment&, const OptionMap&, UErrorCode&) const;
  371. void initErrors(UErrorCode&);
  372. void clearErrors() const;
  373. void cleanup() noexcept;
  374. // The locale this MessageFormatter was created with
  375. /* const */ Locale locale;
  376. // Registry for built-in functions
  377. MFFunctionRegistry standardMFFunctionRegistry;
  378. // Registry for custom functions; may be null if no custom registry supplied
  379. // Note: this is *not* owned by the MessageFormatter object
  380. // The reason for this choice is to have a non-destructive MessageFormatter::Builder,
  381. // while also not requiring the function registry to be deeply-copyable. Making the
  382. // function registry copyable would impose a requirement on any implementations
  383. // of the FormatterFactory and SelectorFactory interfaces to implement a custom
  384. // clone() method, which is necessary to avoid sharing between copies of the
  385. // function registry (and thus double-frees)
  386. // Not deeply immutable (the values in the function registry are mutable,
  387. // as a FormatterFactory can have mutable state
  388. const MFFunctionRegistry* customMFFunctionRegistry;
  389. // Data model, representing the parsed message
  390. MFDataModel dataModel;
  391. // Normalized version of the input string (optional whitespace removed)
  392. UnicodeString normalizedInput;
  393. // Errors -- only used while parsing and checking for data model errors; then
  394. // the MessageContext keeps track of errors
  395. // Must be a raw pointer to avoid including the internal header file
  396. // defining StaticErrors
  397. // Owned by `this`
  398. StaticErrors* errors = nullptr;
  399. // Error handling behavior.
  400. // If true, then formatting methods set their UErrorCode arguments
  401. // to signal MessageFormat errors, and no useful output is returned.
  402. // If false, then MessageFormat errors are not signaled and the
  403. // formatting methods return best-effort output.
  404. // The default is false.
  405. bool signalErrors = false;
  406. }; // class MessageFormatter
  407. } // namespace message2
  408. U_NAMESPACE_END
  409. #endif // U_HIDE_DEPRECATED_API
  410. #endif /* #if !UCONFIG_NO_MF2 */
  411. #endif /* #if !UCONFIG_NO_FORMATTING */
  412. #endif /* U_SHOW_CPLUSPLUS_API */
  413. #endif // MESSAGEFORMAT2_H
  414. // eof