plurfmt.h 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601
  1. // © 2016 and later: Unicode, Inc. and others.
  2. // License & terms of use: http://www.unicode.org/copyright.html
  3. /*
  4. *******************************************************************************
  5. * Copyright (C) 2007-2014, International Business Machines Corporation and
  6. * others. All Rights Reserved.
  7. *******************************************************************************
  8. *
  9. * File PLURFMT.H
  10. ********************************************************************************
  11. */
  12. #ifndef PLURFMT
  13. #define PLURFMT
  14. #include "unicode/utypes.h"
  15. #if U_SHOW_CPLUSPLUS_API
  16. /**
  17. * \file
  18. * \brief C++ API: PluralFormat object
  19. */
  20. #if !UCONFIG_NO_FORMATTING
  21. #include "unicode/messagepattern.h"
  22. #include "unicode/numfmt.h"
  23. #include "unicode/plurrule.h"
  24. U_NAMESPACE_BEGIN
  25. class Hashtable;
  26. class NFRule;
  27. /**
  28. * <p>
  29. * <code>PluralFormat</code> supports the creation of internationalized
  30. * messages with plural inflection. It is based on <i>plural
  31. * selection</i>, i.e. the caller specifies messages for each
  32. * plural case that can appear in the user's language and the
  33. * <code>PluralFormat</code> selects the appropriate message based on
  34. * the number.
  35. * </p>
  36. * <h4>The Problem of Plural Forms in Internationalized Messages</h4>
  37. * <p>
  38. * Different languages have different ways to inflect
  39. * plurals. Creating internationalized messages that include plural
  40. * forms is only feasible when the framework is able to handle plural
  41. * forms of <i>all</i> languages correctly. <code>ChoiceFormat</code>
  42. * doesn't handle this well, because it attaches a number interval to
  43. * each message and selects the message whose interval contains a
  44. * given number. This can only handle a finite number of
  45. * intervals. But in some languages, like Polish, one plural case
  46. * applies to infinitely many intervals (e.g., the plural case applies to
  47. * numbers ending with 2, 3, or 4 except those ending with 12, 13, or
  48. * 14). Thus <code>ChoiceFormat</code> is not adequate.
  49. * </p><p>
  50. * <code>PluralFormat</code> deals with this by breaking the problem
  51. * into two parts:
  52. * <ul>
  53. * <li>It uses <code>PluralRules</code> that can define more complex
  54. * conditions for a plural case than just a single interval. These plural
  55. * rules define both what plural cases exist in a language, and to
  56. * which numbers these cases apply.
  57. * <li>It provides predefined plural rules for many languages. Thus, the programmer
  58. * need not worry about the plural cases of a language and
  59. * does not have to define the plural cases; they can simply
  60. * use the predefined keywords. The whole plural formatting of messages can
  61. * be done using localized patterns from resource bundles. For predefined plural
  62. * rules, see the CLDR <i>Language Plural Rules</i> page at
  63. * https://unicode-org.github.io/cldr-staging/charts/latest/supplemental/language_plural_rules.html
  64. * </ul>
  65. * </p>
  66. * <h4>Usage of <code>PluralFormat</code></h4>
  67. * <p>Note: Typically, plural formatting is done via <code>MessageFormat</code>
  68. * with a <code>plural</code> argument type,
  69. * rather than using a stand-alone <code>PluralFormat</code>.
  70. * </p><p>
  71. * This discussion assumes that you use <code>PluralFormat</code> with
  72. * a predefined set of plural rules. You can create one using one of
  73. * the constructors that takes a <code>locale</code> object. To
  74. * specify the message pattern, you can either pass it to the
  75. * constructor or set it explicitly using the
  76. * <code>applyPattern()</code> method. The <code>format()</code>
  77. * method takes a number object and selects the message of the
  78. * matching plural case. This message will be returned.
  79. * </p>
  80. * <h5>Patterns and Their Interpretation</h5>
  81. * <p>
  82. * The pattern text defines the message output for each plural case of the
  83. * specified locale. Syntax:
  84. * <pre>
  85. * pluralStyle = [offsetValue] (selector '{' message '}')+
  86. * offsetValue = "offset:" number
  87. * selector = explicitValue | keyword
  88. * explicitValue = '=' number // adjacent, no white space in between
  89. * keyword = [^[[:Pattern_Syntax:][:Pattern_White_Space:]]]+
  90. * message: see {@link MessageFormat}
  91. * </pre>
  92. * Pattern_White_Space between syntax elements is ignored, except
  93. * between the {curly braces} and their sub-message,
  94. * and between the '=' and the number of an explicitValue.
  95. *
  96. * </p><p>
  97. * There are 6 predefined casekeyword in CLDR/ICU - 'zero', 'one', 'two', 'few', 'many' and
  98. * 'other'. You always have to define a message text for the default plural case
  99. * <code>other</code> which is contained in every rule set.
  100. * If you do not specify a message text for a particular plural case, the
  101. * message text of the plural case <code>other</code> gets assigned to this
  102. * plural case.
  103. * </p><p>
  104. * When formatting, the input number is first matched against the explicitValue clauses.
  105. * If there is no exact-number match, then a keyword is selected by calling
  106. * the <code>PluralRules</code> with the input number <em>minus the offset</em>.
  107. * (The offset defaults to 0 if it is omitted from the pattern string.)
  108. * If there is no clause with that keyword, then the "other" clauses is returned.
  109. * </p><p>
  110. * An unquoted pound sign (<code>#</code>) in the selected sub-message
  111. * itself (i.e., outside of arguments nested in the sub-message)
  112. * is replaced by the input number minus the offset.
  113. * The number-minus-offset value is formatted using a
  114. * <code>NumberFormat</code> for the <code>PluralFormat</code>'s locale. If you
  115. * need special number formatting, you have to use a <code>MessageFormat</code>
  116. * and explicitly specify a <code>NumberFormat</code> argument.
  117. * <strong>Note:</strong> That argument is formatting without subtracting the offset!
  118. * If you need a custom format and have a non-zero offset, then you need to pass the
  119. * number-minus-offset value as a separate parameter.
  120. * </p>
  121. * For a usage example, see the {@link MessageFormat} class documentation.
  122. *
  123. * <h4>Defining Custom Plural Rules</h4>
  124. * <p>If you need to use <code>PluralFormat</code> with custom rules, you can
  125. * create a <code>PluralRules</code> object and pass it to
  126. * <code>PluralFormat</code>'s constructor. If you also specify a locale in this
  127. * constructor, this locale will be used to format the number in the message
  128. * texts.
  129. * </p><p>
  130. * For more information about <code>PluralRules</code>, see
  131. * {@link PluralRules}.
  132. * </p>
  133. *
  134. * ported from Java
  135. * @stable ICU 4.0
  136. */
  137. class U_I18N_API PluralFormat : public Format {
  138. public:
  139. /**
  140. * Creates a new cardinal-number <code>PluralFormat</code> for the default locale.
  141. * This locale will be used to get the set of plural rules and for standard
  142. * number formatting.
  143. * @param status output param set to success/failure code on exit, which
  144. * must not indicate a failure before the function call.
  145. * @stable ICU 4.0
  146. */
  147. PluralFormat(UErrorCode& status);
  148. /**
  149. * Creates a new cardinal-number <code>PluralFormat</code> for a given locale.
  150. * @param locale the <code>PluralFormat</code> will be configured with
  151. * rules for this locale. This locale will also be used for
  152. * standard number formatting.
  153. * @param status output param set to success/failure code on exit, which
  154. * must not indicate a failure before the function call.
  155. * @stable ICU 4.0
  156. */
  157. PluralFormat(const Locale& locale, UErrorCode& status);
  158. /**
  159. * Creates a new <code>PluralFormat</code> for a given set of rules.
  160. * The standard number formatting will be done using the default locale.
  161. * @param rules defines the behavior of the <code>PluralFormat</code>
  162. * object.
  163. * @param status output param set to success/failure code on exit, which
  164. * must not indicate a failure before the function call.
  165. * @stable ICU 4.0
  166. */
  167. PluralFormat(const PluralRules& rules, UErrorCode& status);
  168. /**
  169. * Creates a new <code>PluralFormat</code> for a given set of rules.
  170. * The standard number formatting will be done using the given locale.
  171. * @param locale the default number formatting will be done using this
  172. * locale.
  173. * @param rules defines the behavior of the <code>PluralFormat</code>
  174. * object.
  175. * @param status output param set to success/failure code on exit, which
  176. * must not indicate a failure before the function call.
  177. * @stable ICU 4.0
  178. */
  179. PluralFormat(const Locale& locale, const PluralRules& rules, UErrorCode& status);
  180. /**
  181. * Creates a new <code>PluralFormat</code> for the plural type.
  182. * The standard number formatting will be done using the given locale.
  183. * @param locale the default number formatting will be done using this
  184. * locale.
  185. * @param type The plural type (e.g., cardinal or ordinal).
  186. * @param status output param set to success/failure code on exit, which
  187. * must not indicate a failure before the function call.
  188. * @stable ICU 50
  189. */
  190. PluralFormat(const Locale& locale, UPluralType type, UErrorCode& status);
  191. /**
  192. * Creates a new cardinal-number <code>PluralFormat</code> for a given pattern string.
  193. * The default locale will be used to get the set of plural rules and for
  194. * standard number formatting.
  195. * @param pattern the pattern for this <code>PluralFormat</code>.
  196. * errors are returned to status if the pattern is invalid.
  197. * @param status output param set to success/failure code on exit, which
  198. * must not indicate a failure before the function call.
  199. * @stable ICU 4.0
  200. */
  201. PluralFormat(const UnicodeString& pattern, UErrorCode& status);
  202. /**
  203. * Creates a new cardinal-number <code>PluralFormat</code> for a given pattern string and
  204. * locale.
  205. * The locale will be used to get the set of plural rules and for
  206. * standard number formatting.
  207. * @param locale the <code>PluralFormat</code> will be configured with
  208. * rules for this locale. This locale will also be used for
  209. * standard number formatting.
  210. * @param pattern the pattern for this <code>PluralFormat</code>.
  211. * errors are returned to status if the pattern is invalid.
  212. * @param status output param set to success/failure code on exit, which
  213. * must not indicate a failure before the function call.
  214. * @stable ICU 4.0
  215. */
  216. PluralFormat(const Locale& locale, const UnicodeString& pattern, UErrorCode& status);
  217. /**
  218. * Creates a new <code>PluralFormat</code> for a given set of rules, a
  219. * pattern and a locale.
  220. * @param rules defines the behavior of the <code>PluralFormat</code>
  221. * object.
  222. * @param pattern the pattern for this <code>PluralFormat</code>.
  223. * errors are returned to status if the pattern is invalid.
  224. * @param status output param set to success/failure code on exit, which
  225. * must not indicate a failure before the function call.
  226. * @stable ICU 4.0
  227. */
  228. PluralFormat(const PluralRules& rules,
  229. const UnicodeString& pattern,
  230. UErrorCode& status);
  231. /**
  232. * Creates a new <code>PluralFormat</code> for a given set of rules, a
  233. * pattern and a locale.
  234. * @param locale the <code>PluralFormat</code> will be configured with
  235. * rules for this locale. This locale will also be used for
  236. * standard number formatting.
  237. * @param rules defines the behavior of the <code>PluralFormat</code>
  238. * object.
  239. * @param pattern the pattern for this <code>PluralFormat</code>.
  240. * errors are returned to status if the pattern is invalid.
  241. * @param status output param set to success/failure code on exit, which
  242. * must not indicate a failure before the function call.
  243. * @stable ICU 4.0
  244. */
  245. PluralFormat(const Locale& locale,
  246. const PluralRules& rules,
  247. const UnicodeString& pattern,
  248. UErrorCode& status);
  249. /**
  250. * Creates a new <code>PluralFormat</code> for a plural type, a
  251. * pattern and a locale.
  252. * @param locale the <code>PluralFormat</code> will be configured with
  253. * rules for this locale. This locale will also be used for
  254. * standard number formatting.
  255. * @param type The plural type (e.g., cardinal or ordinal).
  256. * @param pattern the pattern for this <code>PluralFormat</code>.
  257. * errors are returned to status if the pattern is invalid.
  258. * @param status output param set to success/failure code on exit, which
  259. * must not indicate a failure before the function call.
  260. * @stable ICU 50
  261. */
  262. PluralFormat(const Locale& locale,
  263. UPluralType type,
  264. const UnicodeString& pattern,
  265. UErrorCode& status);
  266. /**
  267. * copy constructor.
  268. * @stable ICU 4.0
  269. */
  270. PluralFormat(const PluralFormat& other);
  271. /**
  272. * Destructor.
  273. * @stable ICU 4.0
  274. */
  275. virtual ~PluralFormat();
  276. /**
  277. * Sets the pattern used by this plural format.
  278. * The method parses the pattern and creates a map of format strings
  279. * for the plural rules.
  280. * Patterns and their interpretation are specified in the class description.
  281. *
  282. * @param pattern the pattern for this plural format
  283. * errors are returned to status if the pattern is invalid.
  284. * @param status output param set to success/failure code on exit, which
  285. * must not indicate a failure before the function call.
  286. * @stable ICU 4.0
  287. */
  288. void applyPattern(const UnicodeString& pattern, UErrorCode& status);
  289. using Format::format;
  290. /**
  291. * Formats a plural message for a given number.
  292. *
  293. * @param number a number for which the plural message should be formatted
  294. * for. If no pattern has been applied to this
  295. * <code>PluralFormat</code> object yet, the formatted number
  296. * will be returned.
  297. * @param status output param set to success/failure code on exit, which
  298. * must not indicate a failure before the function call.
  299. * @return the string containing the formatted plural message.
  300. * @stable ICU 4.0
  301. */
  302. UnicodeString format(int32_t number, UErrorCode& status) const;
  303. /**
  304. * Formats a plural message for a given number.
  305. *
  306. * @param number a number for which the plural message should be formatted
  307. * for. If no pattern has been applied to this
  308. * PluralFormat object yet, the formatted number
  309. * will be returned.
  310. * @param status output param set to success or failure code on exit, which
  311. * must not indicate a failure before the function call.
  312. * @return the string containing the formatted plural message.
  313. * @stable ICU 4.0
  314. */
  315. UnicodeString format(double number, UErrorCode& status) const;
  316. /**
  317. * Formats a plural message for a given number.
  318. *
  319. * @param number a number for which the plural message should be formatted
  320. * for. If no pattern has been applied to this
  321. * <code>PluralFormat</code> object yet, the formatted number
  322. * will be returned.
  323. * @param appendTo output parameter to receive result.
  324. * result is appended to existing contents.
  325. * @param pos On input: an alignment field, if desired.
  326. * On output: the offsets of the alignment field.
  327. * @param status output param set to success/failure code on exit, which
  328. * must not indicate a failure before the function call.
  329. * @return the string containing the formatted plural message.
  330. * @stable ICU 4.0
  331. */
  332. UnicodeString& format(int32_t number,
  333. UnicodeString& appendTo,
  334. FieldPosition& pos,
  335. UErrorCode& status) const;
  336. /**
  337. * Formats a plural message for a given number.
  338. *
  339. * @param number a number for which the plural message should be formatted
  340. * for. If no pattern has been applied to this
  341. * PluralFormat object yet, the formatted number
  342. * will be returned.
  343. * @param appendTo output parameter to receive result.
  344. * result is appended to existing contents.
  345. * @param pos On input: an alignment field, if desired.
  346. * On output: the offsets of the alignment field.
  347. * @param status output param set to success/failure code on exit, which
  348. * must not indicate a failure before the function call.
  349. * @return the string containing the formatted plural message.
  350. * @stable ICU 4.0
  351. */
  352. UnicodeString& format(double number,
  353. UnicodeString& appendTo,
  354. FieldPosition& pos,
  355. UErrorCode& status) const;
  356. #ifndef U_HIDE_DEPRECATED_API
  357. /**
  358. * Sets the locale used by this <code>PluraFormat</code> object.
  359. * Note: Calling this method resets this <code>PluraFormat</code> object,
  360. * i.e., a pattern that was applied previously will be removed,
  361. * and the NumberFormat is set to the default number format for
  362. * the locale. The resulting format behaves the same as one
  363. * constructed from {@link #PluralFormat(const Locale& locale, UPluralType type, UErrorCode& status)}
  364. * with UPLURAL_TYPE_CARDINAL.
  365. * @param locale the <code>locale</code> to use to configure the formatter.
  366. * @param status output param set to success/failure code on exit, which
  367. * must not indicate a failure before the function call.
  368. * @deprecated ICU 50 This method clears the pattern and might create
  369. * a different kind of PluralRules instance;
  370. * use one of the constructors to create a new instance instead.
  371. */
  372. void setLocale(const Locale& locale, UErrorCode& status);
  373. #endif /* U_HIDE_DEPRECATED_API */
  374. /**
  375. * Sets the number format used by this formatter. You only need to
  376. * call this if you want a different number format than the default
  377. * formatter for the locale.
  378. * @param format the number format to use.
  379. * @param status output param set to success/failure code on exit, which
  380. * must not indicate a failure before the function call.
  381. * @stable ICU 4.0
  382. */
  383. void setNumberFormat(const NumberFormat* format, UErrorCode& status);
  384. /**
  385. * Assignment operator
  386. *
  387. * @param other the PluralFormat object to copy from.
  388. * @stable ICU 4.0
  389. */
  390. PluralFormat& operator=(const PluralFormat& other);
  391. /**
  392. * Return true if another object is semantically equal to this one.
  393. *
  394. * @param other the PluralFormat object to be compared with.
  395. * @return true if other is semantically equal to this.
  396. * @stable ICU 4.0
  397. */
  398. virtual bool operator==(const Format& other) const override;
  399. /**
  400. * Return true if another object is semantically unequal to this one.
  401. *
  402. * @param other the PluralFormat object to be compared with.
  403. * @return true if other is semantically unequal to this.
  404. * @stable ICU 4.0
  405. */
  406. virtual bool operator!=(const Format& other) const;
  407. /**
  408. * Clones this Format object polymorphically. The caller owns the
  409. * result and should delete it when done.
  410. * @stable ICU 4.0
  411. */
  412. virtual PluralFormat* clone() const override;
  413. /**
  414. * Formats a plural message for a number taken from a Formattable object.
  415. *
  416. * @param obj The object containing a number for which the
  417. * plural message should be formatted.
  418. * The object must be of a numeric type.
  419. * @param appendTo output parameter to receive result.
  420. * Result is appended to existing contents.
  421. * @param pos On input: an alignment field, if desired.
  422. * On output: the offsets of the alignment field.
  423. * @param status output param filled with success/failure status.
  424. * @return Reference to 'appendTo' parameter.
  425. * @stable ICU 4.0
  426. */
  427. UnicodeString& format(const Formattable& obj,
  428. UnicodeString& appendTo,
  429. FieldPosition& pos,
  430. UErrorCode& status) const override;
  431. /**
  432. * Returns the pattern from applyPattern() or constructor().
  433. *
  434. * @param appendTo output parameter to receive result.
  435. * Result is appended to existing contents.
  436. * @return the UnicodeString with inserted pattern.
  437. * @stable ICU 4.0
  438. */
  439. UnicodeString& toPattern(UnicodeString& appendTo);
  440. /**
  441. * This method is not yet supported by <code>PluralFormat</code>.
  442. * <P>
  443. * Before calling, set parse_pos.index to the offset you want to start
  444. * parsing at in the source. After calling, parse_pos.index is the end of
  445. * the text you parsed. If error occurs, index is unchanged.
  446. * <P>
  447. * When parsing, leading whitespace is discarded (with a successful parse),
  448. * while trailing whitespace is left as is.
  449. * <P>
  450. * See Format::parseObject() for more.
  451. *
  452. * @param source The string to be parsed into an object.
  453. * @param result Formattable to be set to the parse result.
  454. * If parse fails, return contents are undefined.
  455. * @param parse_pos The position to start parsing at. Upon return
  456. * this param is set to the position after the
  457. * last character successfully parsed. If the
  458. * source is not parsed successfully, this param
  459. * will remain unchanged.
  460. * @stable ICU 4.0
  461. */
  462. virtual void parseObject(const UnicodeString& source,
  463. Formattable& result,
  464. ParsePosition& parse_pos) const override;
  465. /**
  466. * ICU "poor man's RTTI", returns a UClassID for this class.
  467. *
  468. * @stable ICU 4.0
  469. *
  470. */
  471. static UClassID U_EXPORT2 getStaticClassID();
  472. /**
  473. * ICU "poor man's RTTI", returns a UClassID for the actual class.
  474. *
  475. * @stable ICU 4.0
  476. */
  477. virtual UClassID getDynamicClassID() const override;
  478. private:
  479. /**
  480. * @internal (private)
  481. */
  482. class U_I18N_API PluralSelector : public UMemory {
  483. public:
  484. virtual ~PluralSelector();
  485. /**
  486. * Given a number, returns the appropriate PluralFormat keyword.
  487. *
  488. * @param context worker object for the selector.
  489. * @param number The number to be plural-formatted.
  490. * @param ec Error code.
  491. * @return The selected PluralFormat keyword.
  492. * @internal (private)
  493. */
  494. virtual UnicodeString select(void *context, double number, UErrorCode& ec) const = 0;
  495. };
  496. class U_I18N_API PluralSelectorAdapter : public PluralSelector {
  497. public:
  498. PluralSelectorAdapter() : pluralRules(nullptr) {
  499. }
  500. virtual ~PluralSelectorAdapter();
  501. virtual UnicodeString select(void *context, double number, UErrorCode& /*ec*/) const override;
  502. void reset();
  503. PluralRules* pluralRules;
  504. };
  505. Locale locale;
  506. MessagePattern msgPattern;
  507. NumberFormat* numberFormat;
  508. double offset;
  509. PluralSelectorAdapter pluralRulesWrapper;
  510. PluralFormat() = delete; // default constructor not implemented
  511. void init(const PluralRules* rules, UPluralType type, UErrorCode& status);
  512. /**
  513. * Copies dynamically allocated values (pointer fields).
  514. * Others are copied using their copy constructors and assignment operators.
  515. */
  516. void copyObjects(const PluralFormat& other);
  517. UnicodeString& format(const Formattable& numberObject, double number,
  518. UnicodeString& appendTo,
  519. FieldPosition& pos,
  520. UErrorCode& status) const;
  521. /**
  522. * Finds the PluralFormat sub-message for the given number, or the "other" sub-message.
  523. * @param pattern A MessagePattern.
  524. * @param partIndex the index of the first PluralFormat argument style part.
  525. * @param selector the PluralSelector for mapping the number (minus offset) to a keyword.
  526. * @param context worker object for the selector.
  527. * @param number a number to be matched to one of the PluralFormat argument's explicit values,
  528. * or mapped via the PluralSelector.
  529. * @param ec ICU error code.
  530. * @return the sub-message start part index.
  531. */
  532. static int32_t findSubMessage(
  533. const MessagePattern& pattern, int32_t partIndex,
  534. const PluralSelector& selector, void *context, double number, UErrorCode& ec);
  535. void parseType(const UnicodeString& source, const NFRule *rbnfLenientScanner,
  536. Formattable& result, FieldPosition& pos) const;
  537. friend class MessageFormat;
  538. friend class NFRule;
  539. };
  540. U_NAMESPACE_END
  541. #endif /* #if !UCONFIG_NO_FORMATTING */
  542. #endif /* U_SHOW_CPLUSPLUS_API */
  543. #endif // _PLURFMT
  544. //eof