msgfmt.cpp 71 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009
  1. // © 2016 and later: Unicode, Inc. and others.
  2. // License & terms of use: http://www.unicode.org/copyright.html
  3. /********************************************************************
  4. * COPYRIGHT:
  5. * Copyright (c) 1997-2015, International Business Machines Corporation and
  6. * others. All Rights Reserved.
  7. ********************************************************************
  8. *
  9. * File MSGFMT.CPP
  10. *
  11. * Modification History:
  12. *
  13. * Date Name Description
  14. * 02/19/97 aliu Converted from java.
  15. * 03/20/97 helena Finished first cut of implementation.
  16. * 04/10/97 aliu Made to work on AIX. Added stoi to replace wtoi.
  17. * 06/11/97 helena Fixed addPattern to take the pattern correctly.
  18. * 06/17/97 helena Fixed the getPattern to return the correct pattern.
  19. * 07/09/97 helena Made ParsePosition into a class.
  20. * 02/22/99 stephen Removed character literals for EBCDIC safety
  21. * 11/01/09 kirtig Added SelectFormat
  22. ********************************************************************/
  23. #include "unicode/utypes.h"
  24. #if !UCONFIG_NO_FORMATTING
  25. #include "unicode/appendable.h"
  26. #include "unicode/choicfmt.h"
  27. #include "unicode/datefmt.h"
  28. #include "unicode/decimfmt.h"
  29. #include "unicode/localpointer.h"
  30. #include "unicode/msgfmt.h"
  31. #include "unicode/numberformatter.h"
  32. #include "unicode/plurfmt.h"
  33. #include "unicode/rbnf.h"
  34. #include "unicode/selfmt.h"
  35. #include "unicode/smpdtfmt.h"
  36. #include "unicode/umsg.h"
  37. #include "unicode/ustring.h"
  38. #include "cmemory.h"
  39. #include "patternprops.h"
  40. #include "messageimpl.h"
  41. #include "msgfmt_impl.h"
  42. #include "plurrule_impl.h"
  43. #include "uassert.h"
  44. #include "uelement.h"
  45. #include "uhash.h"
  46. #include "ustrfmt.h"
  47. #include "util.h"
  48. #include "uvector.h"
  49. #include "number_decimalquantity.h"
  50. // *****************************************************************************
  51. // class MessageFormat
  52. // *****************************************************************************
  53. #define SINGLE_QUOTE ((char16_t)0x0027)
  54. #define COMMA ((char16_t)0x002C)
  55. #define LEFT_CURLY_BRACE ((char16_t)0x007B)
  56. #define RIGHT_CURLY_BRACE ((char16_t)0x007D)
  57. //---------------------------------------
  58. // static data
  59. static const char16_t ID_NUMBER[] = {
  60. 0x6E, 0x75, 0x6D, 0x62, 0x65, 0x72, 0 /* "number" */
  61. };
  62. static const char16_t ID_DATE[] = {
  63. 0x64, 0x61, 0x74, 0x65, 0 /* "date" */
  64. };
  65. static const char16_t ID_TIME[] = {
  66. 0x74, 0x69, 0x6D, 0x65, 0 /* "time" */
  67. };
  68. static const char16_t ID_SPELLOUT[] = {
  69. 0x73, 0x70, 0x65, 0x6c, 0x6c, 0x6f, 0x75, 0x74, 0 /* "spellout" */
  70. };
  71. static const char16_t ID_ORDINAL[] = {
  72. 0x6f, 0x72, 0x64, 0x69, 0x6e, 0x61, 0x6c, 0 /* "ordinal" */
  73. };
  74. static const char16_t ID_DURATION[] = {
  75. 0x64, 0x75, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0 /* "duration" */
  76. };
  77. // MessageFormat Type List Number, Date, Time or Choice
  78. static const char16_t * const TYPE_IDS[] = {
  79. ID_NUMBER,
  80. ID_DATE,
  81. ID_TIME,
  82. ID_SPELLOUT,
  83. ID_ORDINAL,
  84. ID_DURATION,
  85. nullptr,
  86. };
  87. static const char16_t ID_EMPTY[] = {
  88. 0 /* empty string, used for default so that null can mark end of list */
  89. };
  90. static const char16_t ID_CURRENCY[] = {
  91. 0x63, 0x75, 0x72, 0x72, 0x65, 0x6E, 0x63, 0x79, 0 /* "currency" */
  92. };
  93. static const char16_t ID_PERCENT[] = {
  94. 0x70, 0x65, 0x72, 0x63, 0x65, 0x6E, 0x74, 0 /* "percent" */
  95. };
  96. static const char16_t ID_INTEGER[] = {
  97. 0x69, 0x6E, 0x74, 0x65, 0x67, 0x65, 0x72, 0 /* "integer" */
  98. };
  99. // NumberFormat modifier list, default, currency, percent or integer
  100. static const char16_t * const NUMBER_STYLE_IDS[] = {
  101. ID_EMPTY,
  102. ID_CURRENCY,
  103. ID_PERCENT,
  104. ID_INTEGER,
  105. nullptr,
  106. };
  107. static const char16_t ID_SHORT[] = {
  108. 0x73, 0x68, 0x6F, 0x72, 0x74, 0 /* "short" */
  109. };
  110. static const char16_t ID_MEDIUM[] = {
  111. 0x6D, 0x65, 0x64, 0x69, 0x75, 0x6D, 0 /* "medium" */
  112. };
  113. static const char16_t ID_LONG[] = {
  114. 0x6C, 0x6F, 0x6E, 0x67, 0 /* "long" */
  115. };
  116. static const char16_t ID_FULL[] = {
  117. 0x66, 0x75, 0x6C, 0x6C, 0 /* "full" */
  118. };
  119. // DateFormat modifier list, default, short, medium, long or full
  120. static const char16_t * const DATE_STYLE_IDS[] = {
  121. ID_EMPTY,
  122. ID_SHORT,
  123. ID_MEDIUM,
  124. ID_LONG,
  125. ID_FULL,
  126. nullptr,
  127. };
  128. static const icu::DateFormat::EStyle DATE_STYLES[] = {
  129. icu::DateFormat::kDefault,
  130. icu::DateFormat::kShort,
  131. icu::DateFormat::kMedium,
  132. icu::DateFormat::kLong,
  133. icu::DateFormat::kFull,
  134. };
  135. static const int32_t DEFAULT_INITIAL_CAPACITY = 10;
  136. static const char16_t NULL_STRING[] = {
  137. 0x6E, 0x75, 0x6C, 0x6C, 0 // "null"
  138. };
  139. static const char16_t OTHER_STRING[] = {
  140. 0x6F, 0x74, 0x68, 0x65, 0x72, 0 // "other"
  141. };
  142. U_CDECL_BEGIN
  143. static UBool U_CALLCONV equalFormatsForHash(const UHashTok key1,
  144. const UHashTok key2) {
  145. return icu::MessageFormat::equalFormats(key1.pointer, key2.pointer);
  146. }
  147. U_CDECL_END
  148. U_NAMESPACE_BEGIN
  149. // -------------------------------------
  150. UOBJECT_DEFINE_RTTI_IMPLEMENTATION(MessageFormat)
  151. UOBJECT_DEFINE_RTTI_IMPLEMENTATION(FormatNameEnumeration)
  152. //--------------------------------------------------------------------
  153. /**
  154. * Convert an integer value to a string and append the result to
  155. * the given UnicodeString.
  156. */
  157. static UnicodeString& itos(int32_t i, UnicodeString& appendTo) {
  158. char16_t temp[16];
  159. uprv_itou(temp,16,i,10,0); // 10 == radix
  160. appendTo.append(temp, -1);
  161. return appendTo;
  162. }
  163. // AppendableWrapper: encapsulates the result of formatting, keeping track
  164. // of the string and its length.
  165. class AppendableWrapper : public UMemory {
  166. public:
  167. AppendableWrapper(Appendable& appendable) : app(appendable), len(0) {
  168. }
  169. void append(const UnicodeString& s) {
  170. app.appendString(s.getBuffer(), s.length());
  171. len += s.length();
  172. }
  173. void append(const char16_t* s, const int32_t sLength) {
  174. app.appendString(s, sLength);
  175. len += sLength;
  176. }
  177. void append(const UnicodeString& s, int32_t start, int32_t length) {
  178. append(s.tempSubString(start, length));
  179. }
  180. void formatAndAppend(const Format* formatter, const Formattable& arg, UErrorCode& ec) {
  181. UnicodeString s;
  182. formatter->format(arg, s, ec);
  183. if (U_SUCCESS(ec)) {
  184. append(s);
  185. }
  186. }
  187. void formatAndAppend(const Format* formatter, const Formattable& arg,
  188. const UnicodeString &argString, UErrorCode& ec) {
  189. if (!argString.isEmpty()) {
  190. if (U_SUCCESS(ec)) {
  191. append(argString);
  192. }
  193. } else {
  194. formatAndAppend(formatter, arg, ec);
  195. }
  196. }
  197. int32_t length() {
  198. return len;
  199. }
  200. private:
  201. Appendable& app;
  202. int32_t len;
  203. };
  204. // -------------------------------------
  205. // Creates a MessageFormat instance based on the pattern.
  206. MessageFormat::MessageFormat(const UnicodeString& pattern,
  207. UErrorCode& success)
  208. : fLocale(Locale::getDefault()), // Uses the default locale
  209. msgPattern(success),
  210. formatAliases(nullptr),
  211. formatAliasesCapacity(0),
  212. argTypes(nullptr),
  213. argTypeCount(0),
  214. argTypeCapacity(0),
  215. hasArgTypeConflicts(false),
  216. defaultNumberFormat(nullptr),
  217. defaultDateFormat(nullptr),
  218. cachedFormatters(nullptr),
  219. customFormatArgStarts(nullptr),
  220. pluralProvider(*this, UPLURAL_TYPE_CARDINAL),
  221. ordinalProvider(*this, UPLURAL_TYPE_ORDINAL)
  222. {
  223. setLocaleIDs(fLocale.getName(), fLocale.getName());
  224. applyPattern(pattern, success);
  225. }
  226. MessageFormat::MessageFormat(const UnicodeString& pattern,
  227. const Locale& newLocale,
  228. UErrorCode& success)
  229. : fLocale(newLocale),
  230. msgPattern(success),
  231. formatAliases(nullptr),
  232. formatAliasesCapacity(0),
  233. argTypes(nullptr),
  234. argTypeCount(0),
  235. argTypeCapacity(0),
  236. hasArgTypeConflicts(false),
  237. defaultNumberFormat(nullptr),
  238. defaultDateFormat(nullptr),
  239. cachedFormatters(nullptr),
  240. customFormatArgStarts(nullptr),
  241. pluralProvider(*this, UPLURAL_TYPE_CARDINAL),
  242. ordinalProvider(*this, UPLURAL_TYPE_ORDINAL)
  243. {
  244. setLocaleIDs(fLocale.getName(), fLocale.getName());
  245. applyPattern(pattern, success);
  246. }
  247. MessageFormat::MessageFormat(const UnicodeString& pattern,
  248. const Locale& newLocale,
  249. UParseError& parseError,
  250. UErrorCode& success)
  251. : fLocale(newLocale),
  252. msgPattern(success),
  253. formatAliases(nullptr),
  254. formatAliasesCapacity(0),
  255. argTypes(nullptr),
  256. argTypeCount(0),
  257. argTypeCapacity(0),
  258. hasArgTypeConflicts(false),
  259. defaultNumberFormat(nullptr),
  260. defaultDateFormat(nullptr),
  261. cachedFormatters(nullptr),
  262. customFormatArgStarts(nullptr),
  263. pluralProvider(*this, UPLURAL_TYPE_CARDINAL),
  264. ordinalProvider(*this, UPLURAL_TYPE_ORDINAL)
  265. {
  266. setLocaleIDs(fLocale.getName(), fLocale.getName());
  267. applyPattern(pattern, parseError, success);
  268. }
  269. MessageFormat::MessageFormat(const MessageFormat& that)
  270. :
  271. Format(that),
  272. fLocale(that.fLocale),
  273. msgPattern(that.msgPattern),
  274. formatAliases(nullptr),
  275. formatAliasesCapacity(0),
  276. argTypes(nullptr),
  277. argTypeCount(0),
  278. argTypeCapacity(0),
  279. hasArgTypeConflicts(that.hasArgTypeConflicts),
  280. defaultNumberFormat(nullptr),
  281. defaultDateFormat(nullptr),
  282. cachedFormatters(nullptr),
  283. customFormatArgStarts(nullptr),
  284. pluralProvider(*this, UPLURAL_TYPE_CARDINAL),
  285. ordinalProvider(*this, UPLURAL_TYPE_ORDINAL)
  286. {
  287. // This will take care of creating the hash tables (since they are nullptr).
  288. UErrorCode ec = U_ZERO_ERROR;
  289. copyObjects(that, ec);
  290. if (U_FAILURE(ec)) {
  291. resetPattern();
  292. }
  293. }
  294. MessageFormat::~MessageFormat()
  295. {
  296. uhash_close(cachedFormatters);
  297. uhash_close(customFormatArgStarts);
  298. uprv_free(argTypes);
  299. uprv_free(formatAliases);
  300. delete defaultNumberFormat;
  301. delete defaultDateFormat;
  302. }
  303. //--------------------------------------------------------------------
  304. // Variable-size array management
  305. /**
  306. * Allocate argTypes[] to at least the given capacity and return
  307. * true if successful. If not, leave argTypes[] unchanged.
  308. *
  309. * If argTypes is nullptr, allocate it. If it is not nullptr, enlarge it
  310. * if necessary to be at least as large as specified.
  311. */
  312. UBool MessageFormat::allocateArgTypes(int32_t capacity, UErrorCode& status) {
  313. if (U_FAILURE(status)) {
  314. return false;
  315. }
  316. if (argTypeCapacity >= capacity) {
  317. return true;
  318. }
  319. if (capacity < DEFAULT_INITIAL_CAPACITY) {
  320. capacity = DEFAULT_INITIAL_CAPACITY;
  321. } else if (capacity < 2*argTypeCapacity) {
  322. capacity = 2*argTypeCapacity;
  323. }
  324. Formattable::Type* a = (Formattable::Type*)
  325. uprv_realloc(argTypes, sizeof(*argTypes) * capacity);
  326. if (a == nullptr) {
  327. status = U_MEMORY_ALLOCATION_ERROR;
  328. return false;
  329. }
  330. argTypes = a;
  331. argTypeCapacity = capacity;
  332. return true;
  333. }
  334. // -------------------------------------
  335. // assignment operator
  336. const MessageFormat&
  337. MessageFormat::operator=(const MessageFormat& that)
  338. {
  339. if (this != &that) {
  340. // Calls the super class for assignment first.
  341. Format::operator=(that);
  342. setLocale(that.fLocale);
  343. msgPattern = that.msgPattern;
  344. hasArgTypeConflicts = that.hasArgTypeConflicts;
  345. UErrorCode ec = U_ZERO_ERROR;
  346. copyObjects(that, ec);
  347. if (U_FAILURE(ec)) {
  348. resetPattern();
  349. }
  350. }
  351. return *this;
  352. }
  353. bool
  354. MessageFormat::operator==(const Format& rhs) const
  355. {
  356. if (this == &rhs) return true;
  357. // Check class ID before checking MessageFormat members
  358. if (!Format::operator==(rhs)) return false;
  359. const MessageFormat& that = static_cast<const MessageFormat&>(rhs);
  360. if (msgPattern != that.msgPattern ||
  361. fLocale != that.fLocale) {
  362. return false;
  363. }
  364. // Compare hashtables.
  365. if ((customFormatArgStarts == nullptr) != (that.customFormatArgStarts == nullptr)) {
  366. return false;
  367. }
  368. if (customFormatArgStarts == nullptr) {
  369. return true;
  370. }
  371. UErrorCode ec = U_ZERO_ERROR;
  372. const int32_t count = uhash_count(customFormatArgStarts);
  373. const int32_t rhs_count = uhash_count(that.customFormatArgStarts);
  374. if (count != rhs_count) {
  375. return false;
  376. }
  377. int32_t idx = 0, rhs_idx = 0, pos = UHASH_FIRST, rhs_pos = UHASH_FIRST;
  378. for (; idx < count && rhs_idx < rhs_count && U_SUCCESS(ec); ++idx, ++rhs_idx) {
  379. const UHashElement* cur = uhash_nextElement(customFormatArgStarts, &pos);
  380. const UHashElement* rhs_cur = uhash_nextElement(that.customFormatArgStarts, &rhs_pos);
  381. if (cur->key.integer != rhs_cur->key.integer) {
  382. return false;
  383. }
  384. const Format* format = (const Format*)uhash_iget(cachedFormatters, cur->key.integer);
  385. const Format* rhs_format = (const Format*)uhash_iget(that.cachedFormatters, rhs_cur->key.integer);
  386. if (*format != *rhs_format) {
  387. return false;
  388. }
  389. }
  390. return true;
  391. }
  392. // -------------------------------------
  393. // Creates a copy of this MessageFormat, the caller owns the copy.
  394. MessageFormat*
  395. MessageFormat::clone() const
  396. {
  397. return new MessageFormat(*this);
  398. }
  399. // -------------------------------------
  400. // Sets the locale of this MessageFormat object to theLocale.
  401. void
  402. MessageFormat::setLocale(const Locale& theLocale)
  403. {
  404. if (fLocale != theLocale) {
  405. delete defaultNumberFormat;
  406. defaultNumberFormat = nullptr;
  407. delete defaultDateFormat;
  408. defaultDateFormat = nullptr;
  409. fLocale = theLocale;
  410. setLocaleIDs(fLocale.getName(), fLocale.getName());
  411. pluralProvider.reset();
  412. ordinalProvider.reset();
  413. }
  414. }
  415. // -------------------------------------
  416. // Gets the locale of this MessageFormat object.
  417. const Locale&
  418. MessageFormat::getLocale() const
  419. {
  420. return fLocale;
  421. }
  422. void
  423. MessageFormat::applyPattern(const UnicodeString& newPattern,
  424. UErrorCode& status)
  425. {
  426. UParseError parseError;
  427. applyPattern(newPattern,parseError,status);
  428. }
  429. // -------------------------------------
  430. // Applies the new pattern and returns an error if the pattern
  431. // is not correct.
  432. void
  433. MessageFormat::applyPattern(const UnicodeString& pattern,
  434. UParseError& parseError,
  435. UErrorCode& ec)
  436. {
  437. if(U_FAILURE(ec)) {
  438. return;
  439. }
  440. msgPattern.parse(pattern, &parseError, ec);
  441. cacheExplicitFormats(ec);
  442. if (U_FAILURE(ec)) {
  443. resetPattern();
  444. }
  445. }
  446. void MessageFormat::resetPattern() {
  447. msgPattern.clear();
  448. uhash_close(cachedFormatters);
  449. cachedFormatters = nullptr;
  450. uhash_close(customFormatArgStarts);
  451. customFormatArgStarts = nullptr;
  452. argTypeCount = 0;
  453. hasArgTypeConflicts = false;
  454. }
  455. void
  456. MessageFormat::applyPattern(const UnicodeString& pattern,
  457. UMessagePatternApostropheMode aposMode,
  458. UParseError* parseError,
  459. UErrorCode& status) {
  460. if (aposMode != msgPattern.getApostropheMode()) {
  461. msgPattern.clearPatternAndSetApostropheMode(aposMode);
  462. }
  463. UParseError tempParseError;
  464. applyPattern(pattern, (parseError == nullptr) ? tempParseError : *parseError, status);
  465. }
  466. // -------------------------------------
  467. // Converts this MessageFormat instance to a pattern.
  468. UnicodeString&
  469. MessageFormat::toPattern(UnicodeString& appendTo) const {
  470. if ((customFormatArgStarts != nullptr && 0 != uhash_count(customFormatArgStarts)) ||
  471. 0 == msgPattern.countParts()
  472. ) {
  473. appendTo.setToBogus();
  474. return appendTo;
  475. }
  476. return appendTo.append(msgPattern.getPatternString());
  477. }
  478. int32_t MessageFormat::nextTopLevelArgStart(int32_t partIndex) const {
  479. if (partIndex != 0) {
  480. partIndex = msgPattern.getLimitPartIndex(partIndex);
  481. }
  482. for (;;) {
  483. UMessagePatternPartType type = msgPattern.getPartType(++partIndex);
  484. if (type == UMSGPAT_PART_TYPE_ARG_START) {
  485. return partIndex;
  486. }
  487. if (type == UMSGPAT_PART_TYPE_MSG_LIMIT) {
  488. return -1;
  489. }
  490. }
  491. }
  492. void MessageFormat::setArgStartFormat(int32_t argStart,
  493. Format* formatter,
  494. UErrorCode& status) {
  495. if (U_FAILURE(status)) {
  496. delete formatter;
  497. return;
  498. }
  499. if (cachedFormatters == nullptr) {
  500. cachedFormatters=uhash_open(uhash_hashLong, uhash_compareLong,
  501. equalFormatsForHash, &status);
  502. if (U_FAILURE(status)) {
  503. delete formatter;
  504. return;
  505. }
  506. uhash_setValueDeleter(cachedFormatters, uprv_deleteUObject);
  507. }
  508. if (formatter == nullptr) {
  509. formatter = new DummyFormat();
  510. }
  511. uhash_iput(cachedFormatters, argStart, formatter, &status);
  512. }
  513. UBool MessageFormat::argNameMatches(int32_t partIndex, const UnicodeString& argName, int32_t argNumber) {
  514. const MessagePattern::Part& part = msgPattern.getPart(partIndex);
  515. return part.getType() == UMSGPAT_PART_TYPE_ARG_NAME ?
  516. msgPattern.partSubstringMatches(part, argName) :
  517. part.getValue() == argNumber; // ARG_NUMBER
  518. }
  519. // Sets a custom formatter for a MessagePattern ARG_START part index.
  520. // "Custom" formatters are provided by the user via setFormat() or similar APIs.
  521. void MessageFormat::setCustomArgStartFormat(int32_t argStart,
  522. Format* formatter,
  523. UErrorCode& status) {
  524. setArgStartFormat(argStart, formatter, status);
  525. if (customFormatArgStarts == nullptr) {
  526. customFormatArgStarts=uhash_open(uhash_hashLong, uhash_compareLong,
  527. nullptr, &status);
  528. }
  529. uhash_iputi(customFormatArgStarts, argStart, 1, &status);
  530. }
  531. Format* MessageFormat::getCachedFormatter(int32_t argumentNumber) const {
  532. if (cachedFormatters == nullptr) {
  533. return nullptr;
  534. }
  535. void* ptr = uhash_iget(cachedFormatters, argumentNumber);
  536. if (ptr != nullptr && dynamic_cast<DummyFormat*>((Format*)ptr) == nullptr) {
  537. return (Format*) ptr;
  538. } else {
  539. // Not cached, or a DummyFormat representing setFormat(nullptr).
  540. return nullptr;
  541. }
  542. }
  543. // -------------------------------------
  544. // Adopts the new formats array and updates the array count.
  545. // This MessageFormat instance owns the new formats.
  546. void
  547. MessageFormat::adoptFormats(Format** newFormats,
  548. int32_t count) {
  549. if (newFormats == nullptr || count < 0) {
  550. return;
  551. }
  552. // Throw away any cached formatters.
  553. if (cachedFormatters != nullptr) {
  554. uhash_removeAll(cachedFormatters);
  555. }
  556. if (customFormatArgStarts != nullptr) {
  557. uhash_removeAll(customFormatArgStarts);
  558. }
  559. int32_t formatNumber = 0;
  560. UErrorCode status = U_ZERO_ERROR;
  561. for (int32_t partIndex = 0;
  562. formatNumber < count && U_SUCCESS(status) &&
  563. (partIndex = nextTopLevelArgStart(partIndex)) >= 0;) {
  564. setCustomArgStartFormat(partIndex, newFormats[formatNumber], status);
  565. ++formatNumber;
  566. }
  567. // Delete those that didn't get used (if any).
  568. for (; formatNumber < count; ++formatNumber) {
  569. delete newFormats[formatNumber];
  570. }
  571. }
  572. // -------------------------------------
  573. // Sets the new formats array and updates the array count.
  574. // This MessageFormat instance makes a copy of the new formats.
  575. void
  576. MessageFormat::setFormats(const Format** newFormats,
  577. int32_t count) {
  578. if (newFormats == nullptr || count < 0) {
  579. return;
  580. }
  581. // Throw away any cached formatters.
  582. if (cachedFormatters != nullptr) {
  583. uhash_removeAll(cachedFormatters);
  584. }
  585. if (customFormatArgStarts != nullptr) {
  586. uhash_removeAll(customFormatArgStarts);
  587. }
  588. UErrorCode status = U_ZERO_ERROR;
  589. int32_t formatNumber = 0;
  590. for (int32_t partIndex = 0;
  591. formatNumber < count && U_SUCCESS(status) && (partIndex = nextTopLevelArgStart(partIndex)) >= 0;) {
  592. Format* newFormat = nullptr;
  593. if (newFormats[formatNumber] != nullptr) {
  594. newFormat = newFormats[formatNumber]->clone();
  595. if (newFormat == nullptr) {
  596. status = U_MEMORY_ALLOCATION_ERROR;
  597. }
  598. }
  599. setCustomArgStartFormat(partIndex, newFormat, status);
  600. ++formatNumber;
  601. }
  602. if (U_FAILURE(status)) {
  603. resetPattern();
  604. }
  605. }
  606. // -------------------------------------
  607. // Adopt a single format by format number.
  608. // Do nothing if the format number is not less than the array count.
  609. void
  610. MessageFormat::adoptFormat(int32_t n, Format *newFormat) {
  611. LocalPointer<Format> p(newFormat);
  612. if (n >= 0) {
  613. int32_t formatNumber = 0;
  614. for (int32_t partIndex = 0; (partIndex = nextTopLevelArgStart(partIndex)) >= 0;) {
  615. if (n == formatNumber) {
  616. UErrorCode status = U_ZERO_ERROR;
  617. setCustomArgStartFormat(partIndex, p.orphan(), status);
  618. return;
  619. }
  620. ++formatNumber;
  621. }
  622. }
  623. }
  624. // -------------------------------------
  625. // Adopt a single format by format name.
  626. // Do nothing if there is no match of formatName.
  627. void
  628. MessageFormat::adoptFormat(const UnicodeString& formatName,
  629. Format* formatToAdopt,
  630. UErrorCode& status) {
  631. LocalPointer<Format> p(formatToAdopt);
  632. if (U_FAILURE(status)) {
  633. return;
  634. }
  635. int32_t argNumber = MessagePattern::validateArgumentName(formatName);
  636. if (argNumber < UMSGPAT_ARG_NAME_NOT_NUMBER) {
  637. status = U_ILLEGAL_ARGUMENT_ERROR;
  638. return;
  639. }
  640. for (int32_t partIndex = 0;
  641. (partIndex = nextTopLevelArgStart(partIndex)) >= 0 && U_SUCCESS(status);
  642. ) {
  643. if (argNameMatches(partIndex + 1, formatName, argNumber)) {
  644. Format* f;
  645. if (p.isValid()) {
  646. f = p.orphan();
  647. } else if (formatToAdopt == nullptr) {
  648. f = nullptr;
  649. } else {
  650. f = formatToAdopt->clone();
  651. if (f == nullptr) {
  652. status = U_MEMORY_ALLOCATION_ERROR;
  653. return;
  654. }
  655. }
  656. setCustomArgStartFormat(partIndex, f, status);
  657. }
  658. }
  659. }
  660. // -------------------------------------
  661. // Set a single format.
  662. // Do nothing if the variable is not less than the array count.
  663. void
  664. MessageFormat::setFormat(int32_t n, const Format& newFormat) {
  665. if (n >= 0) {
  666. int32_t formatNumber = 0;
  667. for (int32_t partIndex = 0;
  668. (partIndex = nextTopLevelArgStart(partIndex)) >= 0;) {
  669. if (n == formatNumber) {
  670. Format* new_format = newFormat.clone();
  671. if (new_format) {
  672. UErrorCode status = U_ZERO_ERROR;
  673. setCustomArgStartFormat(partIndex, new_format, status);
  674. }
  675. return;
  676. }
  677. ++formatNumber;
  678. }
  679. }
  680. }
  681. // -------------------------------------
  682. // Get a single format by format name.
  683. // Do nothing if the variable is not less than the array count.
  684. Format *
  685. MessageFormat::getFormat(const UnicodeString& formatName, UErrorCode& status) {
  686. if (U_FAILURE(status) || cachedFormatters == nullptr) return nullptr;
  687. int32_t argNumber = MessagePattern::validateArgumentName(formatName);
  688. if (argNumber < UMSGPAT_ARG_NAME_NOT_NUMBER) {
  689. status = U_ILLEGAL_ARGUMENT_ERROR;
  690. return nullptr;
  691. }
  692. for (int32_t partIndex = 0; (partIndex = nextTopLevelArgStart(partIndex)) >= 0;) {
  693. if (argNameMatches(partIndex + 1, formatName, argNumber)) {
  694. return getCachedFormatter(partIndex);
  695. }
  696. }
  697. return nullptr;
  698. }
  699. // -------------------------------------
  700. // Set a single format by format name
  701. // Do nothing if the variable is not less than the array count.
  702. void
  703. MessageFormat::setFormat(const UnicodeString& formatName,
  704. const Format& newFormat,
  705. UErrorCode& status) {
  706. if (U_FAILURE(status)) return;
  707. int32_t argNumber = MessagePattern::validateArgumentName(formatName);
  708. if (argNumber < UMSGPAT_ARG_NAME_NOT_NUMBER) {
  709. status = U_ILLEGAL_ARGUMENT_ERROR;
  710. return;
  711. }
  712. for (int32_t partIndex = 0;
  713. (partIndex = nextTopLevelArgStart(partIndex)) >= 0 && U_SUCCESS(status);
  714. ) {
  715. if (argNameMatches(partIndex + 1, formatName, argNumber)) {
  716. Format* new_format = newFormat.clone();
  717. if (new_format == nullptr) {
  718. status = U_MEMORY_ALLOCATION_ERROR;
  719. return;
  720. }
  721. setCustomArgStartFormat(partIndex, new_format, status);
  722. }
  723. }
  724. }
  725. // -------------------------------------
  726. // Gets the format array.
  727. const Format**
  728. MessageFormat::getFormats(int32_t& cnt) const
  729. {
  730. // This old API returns an array (which we hold) of Format*
  731. // pointers. The array is valid up to the next call to any
  732. // method on this object. We construct and resize an array
  733. // on demand that contains aliases to the subformats[i].format
  734. // pointers.
  735. // Get total required capacity first (it's refreshed on each call).
  736. int32_t totalCapacity = 0;
  737. for (int32_t partIndex = 0; (partIndex = nextTopLevelArgStart(partIndex)) >= 0; ++totalCapacity) {}
  738. MessageFormat* t = const_cast<MessageFormat*> (this);
  739. cnt = 0;
  740. if (formatAliases == nullptr) {
  741. t->formatAliasesCapacity = totalCapacity;
  742. Format** a = (Format**)
  743. uprv_malloc(sizeof(Format*) * formatAliasesCapacity);
  744. if (a == nullptr) {
  745. t->formatAliasesCapacity = 0;
  746. return nullptr;
  747. }
  748. t->formatAliases = a;
  749. } else if (totalCapacity > formatAliasesCapacity) {
  750. Format** a = (Format**)
  751. uprv_realloc(formatAliases, sizeof(Format*) * totalCapacity);
  752. if (a == nullptr) {
  753. t->formatAliasesCapacity = 0;
  754. return nullptr;
  755. }
  756. t->formatAliases = a;
  757. t->formatAliasesCapacity = totalCapacity;
  758. }
  759. for (int32_t partIndex = 0; (partIndex = nextTopLevelArgStart(partIndex)) >= 0;) {
  760. t->formatAliases[cnt++] = getCachedFormatter(partIndex);
  761. }
  762. return (const Format**)formatAliases;
  763. }
  764. UnicodeString MessageFormat::getArgName(int32_t partIndex) {
  765. const MessagePattern::Part& part = msgPattern.getPart(partIndex);
  766. return msgPattern.getSubstring(part);
  767. }
  768. StringEnumeration*
  769. MessageFormat::getFormatNames(UErrorCode& status) {
  770. if (U_FAILURE(status)) return nullptr;
  771. LocalPointer<UVector> formatNames(new UVector(status), status);
  772. if (U_FAILURE(status)) {
  773. return nullptr;
  774. }
  775. formatNames->setDeleter(uprv_deleteUObject);
  776. for (int32_t partIndex = 0; (partIndex = nextTopLevelArgStart(partIndex)) >= 0;) {
  777. LocalPointer<UnicodeString> name(getArgName(partIndex + 1).clone(), status);
  778. formatNames->adoptElement(name.orphan(), status);
  779. if (U_FAILURE(status)) return nullptr;
  780. }
  781. LocalPointer<StringEnumeration> nameEnumerator(
  782. new FormatNameEnumeration(std::move(formatNames), status), status);
  783. return U_SUCCESS(status) ? nameEnumerator.orphan() : nullptr;
  784. }
  785. // -------------------------------------
  786. // Formats the source Formattable array and copy into the result buffer.
  787. // Ignore the FieldPosition result for error checking.
  788. UnicodeString&
  789. MessageFormat::format(const Formattable* source,
  790. int32_t cnt,
  791. UnicodeString& appendTo,
  792. FieldPosition& ignore,
  793. UErrorCode& success) const
  794. {
  795. return format(source, nullptr, cnt, appendTo, &ignore, success);
  796. }
  797. // -------------------------------------
  798. // Internally creates a MessageFormat instance based on the
  799. // pattern and formats the arguments Formattable array and
  800. // copy into the appendTo buffer.
  801. UnicodeString&
  802. MessageFormat::format( const UnicodeString& pattern,
  803. const Formattable* arguments,
  804. int32_t cnt,
  805. UnicodeString& appendTo,
  806. UErrorCode& success)
  807. {
  808. MessageFormat temp(pattern, success);
  809. return temp.format(arguments, nullptr, cnt, appendTo, nullptr, success);
  810. }
  811. // -------------------------------------
  812. // Formats the source Formattable object and copy into the
  813. // appendTo buffer. The Formattable object must be an array
  814. // of Formattable instances, returns error otherwise.
  815. UnicodeString&
  816. MessageFormat::format(const Formattable& source,
  817. UnicodeString& appendTo,
  818. FieldPosition& ignore,
  819. UErrorCode& success) const
  820. {
  821. if (U_FAILURE(success))
  822. return appendTo;
  823. if (source.getType() != Formattable::kArray) {
  824. success = U_ILLEGAL_ARGUMENT_ERROR;
  825. return appendTo;
  826. }
  827. int32_t cnt;
  828. const Formattable* tmpPtr = source.getArray(cnt);
  829. return format(tmpPtr, nullptr, cnt, appendTo, &ignore, success);
  830. }
  831. UnicodeString&
  832. MessageFormat::format(const UnicodeString* argumentNames,
  833. const Formattable* arguments,
  834. int32_t count,
  835. UnicodeString& appendTo,
  836. UErrorCode& success) const {
  837. return format(arguments, argumentNames, count, appendTo, nullptr, success);
  838. }
  839. // Does linear search to find the match for an ArgName.
  840. const Formattable* MessageFormat::getArgFromListByName(const Formattable* arguments,
  841. const UnicodeString *argumentNames,
  842. int32_t cnt, UnicodeString& name) const {
  843. for (int32_t i = 0; i < cnt; ++i) {
  844. if (0 == argumentNames[i].compare(name)) {
  845. return arguments + i;
  846. }
  847. }
  848. return nullptr;
  849. }
  850. UnicodeString&
  851. MessageFormat::format(const Formattable* arguments,
  852. const UnicodeString *argumentNames,
  853. int32_t cnt,
  854. UnicodeString& appendTo,
  855. FieldPosition* pos,
  856. UErrorCode& status) const {
  857. if (U_FAILURE(status)) {
  858. return appendTo;
  859. }
  860. UnicodeStringAppendable usapp(appendTo);
  861. AppendableWrapper app(usapp);
  862. format(0, nullptr, arguments, argumentNames, cnt, app, pos, status);
  863. return appendTo;
  864. }
  865. namespace {
  866. /**
  867. * Mutable input/output values for the PluralSelectorProvider.
  868. * Separate so that it is possible to make MessageFormat Freezable.
  869. */
  870. class PluralSelectorContext {
  871. public:
  872. PluralSelectorContext(int32_t start, const UnicodeString &name,
  873. const Formattable &num, double off, UErrorCode &errorCode)
  874. : startIndex(start), argName(name), offset(off),
  875. numberArgIndex(-1), formatter(nullptr), forReplaceNumber(false) {
  876. // number needs to be set even when select() is not called.
  877. // Keep it as a Number/Formattable:
  878. // For format() methods, and to preserve information (e.g., BigDecimal).
  879. if(off == 0) {
  880. number = num;
  881. } else {
  882. number = num.getDouble(errorCode) - off;
  883. }
  884. }
  885. // Input values for plural selection with decimals.
  886. int32_t startIndex;
  887. const UnicodeString &argName;
  888. /** argument number - plural offset */
  889. Formattable number;
  890. double offset;
  891. // Output values for plural selection with decimals.
  892. /** -1 if REPLACE_NUMBER, 0 arg not found, >0 ARG_START index */
  893. int32_t numberArgIndex;
  894. const Format *formatter;
  895. /** formatted argument number - plural offset */
  896. UnicodeString numberString;
  897. /** true if number-offset was formatted with the stock number formatter */
  898. UBool forReplaceNumber;
  899. };
  900. } // namespace
  901. // if argumentNames is nullptr, this means arguments is a numeric array.
  902. // arguments can not be nullptr.
  903. // We use const void *plNumber rather than const PluralSelectorContext *pluralNumber
  904. // so that we need not declare the PluralSelectorContext in the public header file.
  905. void MessageFormat::format(int32_t msgStart, const void *plNumber,
  906. const Formattable* arguments,
  907. const UnicodeString *argumentNames,
  908. int32_t cnt,
  909. AppendableWrapper& appendTo,
  910. FieldPosition* ignore,
  911. UErrorCode& success) const {
  912. if (U_FAILURE(success)) {
  913. return;
  914. }
  915. const UnicodeString& msgString = msgPattern.getPatternString();
  916. int32_t prevIndex = msgPattern.getPart(msgStart).getLimit();
  917. for (int32_t i = msgStart + 1; U_SUCCESS(success) ; ++i) {
  918. const MessagePattern::Part* part = &msgPattern.getPart(i);
  919. const UMessagePatternPartType type = part->getType();
  920. int32_t index = part->getIndex();
  921. appendTo.append(msgString, prevIndex, index - prevIndex);
  922. if (type == UMSGPAT_PART_TYPE_MSG_LIMIT) {
  923. return;
  924. }
  925. prevIndex = part->getLimit();
  926. if (type == UMSGPAT_PART_TYPE_REPLACE_NUMBER) {
  927. const PluralSelectorContext &pluralNumber =
  928. *static_cast<const PluralSelectorContext *>(plNumber);
  929. if(pluralNumber.forReplaceNumber) {
  930. // number-offset was already formatted.
  931. appendTo.formatAndAppend(pluralNumber.formatter,
  932. pluralNumber.number, pluralNumber.numberString, success);
  933. } else {
  934. const NumberFormat* nf = getDefaultNumberFormat(success);
  935. appendTo.formatAndAppend(nf, pluralNumber.number, success);
  936. }
  937. continue;
  938. }
  939. if (type != UMSGPAT_PART_TYPE_ARG_START) {
  940. continue;
  941. }
  942. int32_t argLimit = msgPattern.getLimitPartIndex(i);
  943. UMessagePatternArgType argType = part->getArgType();
  944. part = &msgPattern.getPart(++i);
  945. const Formattable* arg;
  946. UBool noArg = false;
  947. UnicodeString argName = msgPattern.getSubstring(*part);
  948. if (argumentNames == nullptr) {
  949. int32_t argNumber = part->getValue(); // ARG_NUMBER
  950. if (0 <= argNumber && argNumber < cnt) {
  951. arg = arguments + argNumber;
  952. } else {
  953. arg = nullptr;
  954. noArg = true;
  955. }
  956. } else {
  957. arg = getArgFromListByName(arguments, argumentNames, cnt, argName);
  958. if (arg == nullptr) {
  959. noArg = true;
  960. }
  961. }
  962. ++i;
  963. int32_t prevDestLength = appendTo.length();
  964. const Format* formatter = nullptr;
  965. if (noArg) {
  966. appendTo.append(
  967. UnicodeString(LEFT_CURLY_BRACE).append(argName).append(RIGHT_CURLY_BRACE));
  968. } else if (arg == nullptr) {
  969. appendTo.append(NULL_STRING, 4);
  970. } else if(plNumber!=nullptr &&
  971. static_cast<const PluralSelectorContext *>(plNumber)->numberArgIndex==(i-2)) {
  972. const PluralSelectorContext &pluralNumber =
  973. *static_cast<const PluralSelectorContext *>(plNumber);
  974. if(pluralNumber.offset == 0) {
  975. // The number was already formatted with this formatter.
  976. appendTo.formatAndAppend(pluralNumber.formatter, pluralNumber.number,
  977. pluralNumber.numberString, success);
  978. } else {
  979. // Do not use the formatted (number-offset) string for a named argument
  980. // that formats the number without subtracting the offset.
  981. appendTo.formatAndAppend(pluralNumber.formatter, *arg, success);
  982. }
  983. } else if ((formatter = getCachedFormatter(i -2)) != 0) {
  984. // Handles all ArgType.SIMPLE, and formatters from setFormat() and its siblings.
  985. if (dynamic_cast<const ChoiceFormat*>(formatter) ||
  986. dynamic_cast<const PluralFormat*>(formatter) ||
  987. dynamic_cast<const SelectFormat*>(formatter)) {
  988. // We only handle nested formats here if they were provided via
  989. // setFormat() or its siblings. Otherwise they are not cached and instead
  990. // handled below according to argType.
  991. UnicodeString subMsgString;
  992. formatter->format(*arg, subMsgString, success);
  993. if (subMsgString.indexOf(LEFT_CURLY_BRACE) >= 0 ||
  994. (subMsgString.indexOf(SINGLE_QUOTE) >= 0 && !MessageImpl::jdkAposMode(msgPattern))
  995. ) {
  996. MessageFormat subMsgFormat(subMsgString, fLocale, success);
  997. subMsgFormat.format(0, nullptr, arguments, argumentNames, cnt, appendTo, ignore, success);
  998. } else {
  999. appendTo.append(subMsgString);
  1000. }
  1001. } else {
  1002. appendTo.formatAndAppend(formatter, *arg, success);
  1003. }
  1004. } else if (argType == UMSGPAT_ARG_TYPE_NONE || (cachedFormatters && uhash_iget(cachedFormatters, i - 2))) {
  1005. // We arrive here if getCachedFormatter returned nullptr, but there was actually an element in the hash table.
  1006. // This can only happen if the hash table contained a DummyFormat, so the if statement above is a check
  1007. // for the hash table containing DummyFormat.
  1008. if (arg->isNumeric()) {
  1009. const NumberFormat* nf = getDefaultNumberFormat(success);
  1010. appendTo.formatAndAppend(nf, *arg, success);
  1011. } else if (arg->getType() == Formattable::kDate) {
  1012. const DateFormat* df = getDefaultDateFormat(success);
  1013. appendTo.formatAndAppend(df, *arg, success);
  1014. } else {
  1015. appendTo.append(arg->getString(success));
  1016. }
  1017. } else if (argType == UMSGPAT_ARG_TYPE_CHOICE) {
  1018. if (!arg->isNumeric()) {
  1019. success = U_ILLEGAL_ARGUMENT_ERROR;
  1020. return;
  1021. }
  1022. // We must use the Formattable::getDouble() variant with the UErrorCode parameter
  1023. // because only this one converts non-double numeric types to double.
  1024. const double number = arg->getDouble(success);
  1025. int32_t subMsgStart = ChoiceFormat::findSubMessage(msgPattern, i, number);
  1026. formatComplexSubMessage(subMsgStart, nullptr, arguments, argumentNames,
  1027. cnt, appendTo, success);
  1028. } else if (UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE(argType)) {
  1029. if (!arg->isNumeric()) {
  1030. success = U_ILLEGAL_ARGUMENT_ERROR;
  1031. return;
  1032. }
  1033. const PluralSelectorProvider &selector =
  1034. argType == UMSGPAT_ARG_TYPE_PLURAL ? pluralProvider : ordinalProvider;
  1035. // We must use the Formattable::getDouble() variant with the UErrorCode parameter
  1036. // because only this one converts non-double numeric types to double.
  1037. double offset = msgPattern.getPluralOffset(i);
  1038. PluralSelectorContext context(i, argName, *arg, offset, success);
  1039. int32_t subMsgStart = PluralFormat::findSubMessage(
  1040. msgPattern, i, selector, &context, arg->getDouble(success), success);
  1041. formatComplexSubMessage(subMsgStart, &context, arguments, argumentNames,
  1042. cnt, appendTo, success);
  1043. } else if (argType == UMSGPAT_ARG_TYPE_SELECT) {
  1044. int32_t subMsgStart = SelectFormat::findSubMessage(msgPattern, i, arg->getString(success), success);
  1045. formatComplexSubMessage(subMsgStart, nullptr, arguments, argumentNames,
  1046. cnt, appendTo, success);
  1047. } else {
  1048. // This should never happen.
  1049. success = U_INTERNAL_PROGRAM_ERROR;
  1050. return;
  1051. }
  1052. ignore = updateMetaData(appendTo, prevDestLength, ignore, arg);
  1053. prevIndex = msgPattern.getPart(argLimit).getLimit();
  1054. i = argLimit;
  1055. }
  1056. }
  1057. void MessageFormat::formatComplexSubMessage(int32_t msgStart,
  1058. const void *plNumber,
  1059. const Formattable* arguments,
  1060. const UnicodeString *argumentNames,
  1061. int32_t cnt,
  1062. AppendableWrapper& appendTo,
  1063. UErrorCode& success) const {
  1064. if (U_FAILURE(success)) {
  1065. return;
  1066. }
  1067. if (!MessageImpl::jdkAposMode(msgPattern)) {
  1068. format(msgStart, plNumber, arguments, argumentNames, cnt, appendTo, nullptr, success);
  1069. return;
  1070. }
  1071. // JDK compatibility mode: (see JDK MessageFormat.format() API docs)
  1072. // - remove SKIP_SYNTAX; that is, remove half of the apostrophes
  1073. // - if the result string contains an open curly brace '{' then
  1074. // instantiate a temporary MessageFormat object and format again;
  1075. // otherwise just append the result string
  1076. const UnicodeString& msgString = msgPattern.getPatternString();
  1077. UnicodeString sb;
  1078. int32_t prevIndex = msgPattern.getPart(msgStart).getLimit();
  1079. for (int32_t i = msgStart;;) {
  1080. const MessagePattern::Part& part = msgPattern.getPart(++i);
  1081. const UMessagePatternPartType type = part.getType();
  1082. int32_t index = part.getIndex();
  1083. if (type == UMSGPAT_PART_TYPE_MSG_LIMIT) {
  1084. sb.append(msgString, prevIndex, index - prevIndex);
  1085. break;
  1086. } else if (type == UMSGPAT_PART_TYPE_REPLACE_NUMBER || type == UMSGPAT_PART_TYPE_SKIP_SYNTAX) {
  1087. sb.append(msgString, prevIndex, index - prevIndex);
  1088. if (type == UMSGPAT_PART_TYPE_REPLACE_NUMBER) {
  1089. const PluralSelectorContext &pluralNumber =
  1090. *static_cast<const PluralSelectorContext *>(plNumber);
  1091. if(pluralNumber.forReplaceNumber) {
  1092. // number-offset was already formatted.
  1093. sb.append(pluralNumber.numberString);
  1094. } else {
  1095. const NumberFormat* nf = getDefaultNumberFormat(success);
  1096. sb.append(nf->format(pluralNumber.number, sb, success));
  1097. }
  1098. }
  1099. prevIndex = part.getLimit();
  1100. } else if (type == UMSGPAT_PART_TYPE_ARG_START) {
  1101. sb.append(msgString, prevIndex, index - prevIndex);
  1102. prevIndex = index;
  1103. i = msgPattern.getLimitPartIndex(i);
  1104. index = msgPattern.getPart(i).getLimit();
  1105. MessageImpl::appendReducedApostrophes(msgString, prevIndex, index, sb);
  1106. prevIndex = index;
  1107. }
  1108. }
  1109. if (sb.indexOf(LEFT_CURLY_BRACE) >= 0) {
  1110. UnicodeString emptyPattern; // gcc 3.3.3 fails with "UnicodeString()" as the first parameter.
  1111. MessageFormat subMsgFormat(emptyPattern, fLocale, success);
  1112. subMsgFormat.applyPattern(sb, UMSGPAT_APOS_DOUBLE_REQUIRED, nullptr, success);
  1113. subMsgFormat.format(0, nullptr, arguments, argumentNames, cnt, appendTo, nullptr, success);
  1114. } else {
  1115. appendTo.append(sb);
  1116. }
  1117. }
  1118. UnicodeString MessageFormat::getLiteralStringUntilNextArgument(int32_t from) const {
  1119. const UnicodeString& msgString=msgPattern.getPatternString();
  1120. int32_t prevIndex=msgPattern.getPart(from).getLimit();
  1121. UnicodeString b;
  1122. for (int32_t i = from + 1; ; ++i) {
  1123. const MessagePattern::Part& part = msgPattern.getPart(i);
  1124. const UMessagePatternPartType type=part.getType();
  1125. int32_t index=part.getIndex();
  1126. b.append(msgString, prevIndex, index - prevIndex);
  1127. if(type==UMSGPAT_PART_TYPE_ARG_START || type==UMSGPAT_PART_TYPE_MSG_LIMIT) {
  1128. return b;
  1129. }
  1130. // Unexpected Part "part" in parsed message.
  1131. U_ASSERT(type==UMSGPAT_PART_TYPE_SKIP_SYNTAX || type==UMSGPAT_PART_TYPE_INSERT_CHAR);
  1132. prevIndex=part.getLimit();
  1133. }
  1134. }
  1135. FieldPosition* MessageFormat::updateMetaData(AppendableWrapper& /*dest*/, int32_t /*prevLength*/,
  1136. FieldPosition* /*fp*/, const Formattable* /*argId*/) const {
  1137. // Unlike in Java, there are no field attributes defined for MessageFormat. Do nothing.
  1138. return nullptr;
  1139. /*
  1140. if (fp != nullptr && Field.ARGUMENT.equals(fp.getFieldAttribute())) {
  1141. fp->setBeginIndex(prevLength);
  1142. fp->setEndIndex(dest.get_length());
  1143. return nullptr;
  1144. }
  1145. return fp;
  1146. */
  1147. }
  1148. int32_t
  1149. MessageFormat::findOtherSubMessage(int32_t partIndex) const {
  1150. int32_t count=msgPattern.countParts();
  1151. const MessagePattern::Part *part = &msgPattern.getPart(partIndex);
  1152. if(MessagePattern::Part::hasNumericValue(part->getType())) {
  1153. ++partIndex;
  1154. }
  1155. // Iterate over (ARG_SELECTOR [ARG_INT|ARG_DOUBLE] message) tuples
  1156. // until ARG_LIMIT or end of plural-only pattern.
  1157. UnicodeString other(false, OTHER_STRING, 5);
  1158. do {
  1159. part=&msgPattern.getPart(partIndex++);
  1160. UMessagePatternPartType type=part->getType();
  1161. if(type==UMSGPAT_PART_TYPE_ARG_LIMIT) {
  1162. break;
  1163. }
  1164. U_ASSERT(type==UMSGPAT_PART_TYPE_ARG_SELECTOR);
  1165. // part is an ARG_SELECTOR followed by an optional explicit value, and then a message
  1166. if(msgPattern.partSubstringMatches(*part, other)) {
  1167. return partIndex;
  1168. }
  1169. if(MessagePattern::Part::hasNumericValue(msgPattern.getPartType(partIndex))) {
  1170. ++partIndex; // skip the numeric-value part of "=1" etc.
  1171. }
  1172. partIndex=msgPattern.getLimitPartIndex(partIndex);
  1173. } while(++partIndex<count);
  1174. return 0;
  1175. }
  1176. int32_t
  1177. MessageFormat::findFirstPluralNumberArg(int32_t msgStart, const UnicodeString &argName) const {
  1178. for(int32_t i=msgStart+1;; ++i) {
  1179. const MessagePattern::Part &part=msgPattern.getPart(i);
  1180. UMessagePatternPartType type=part.getType();
  1181. if(type==UMSGPAT_PART_TYPE_MSG_LIMIT) {
  1182. return 0;
  1183. }
  1184. if(type==UMSGPAT_PART_TYPE_REPLACE_NUMBER) {
  1185. return -1;
  1186. }
  1187. if(type==UMSGPAT_PART_TYPE_ARG_START) {
  1188. UMessagePatternArgType argType=part.getArgType();
  1189. if(!argName.isEmpty() && (argType==UMSGPAT_ARG_TYPE_NONE || argType==UMSGPAT_ARG_TYPE_SIMPLE)) {
  1190. // ARG_NUMBER or ARG_NAME
  1191. if(msgPattern.partSubstringMatches(msgPattern.getPart(i+1), argName)) {
  1192. return i;
  1193. }
  1194. }
  1195. i=msgPattern.getLimitPartIndex(i);
  1196. }
  1197. }
  1198. }
  1199. void MessageFormat::copyObjects(const MessageFormat& that, UErrorCode& ec) {
  1200. // Deep copy pointer fields.
  1201. // We need not copy the formatAliases because they are re-filled
  1202. // in each getFormats() call.
  1203. // The defaultNumberFormat, defaultDateFormat and pluralProvider.rules
  1204. // also get created on demand.
  1205. argTypeCount = that.argTypeCount;
  1206. if (argTypeCount > 0) {
  1207. if (!allocateArgTypes(argTypeCount, ec)) {
  1208. return;
  1209. }
  1210. uprv_memcpy(argTypes, that.argTypes, argTypeCount * sizeof(argTypes[0]));
  1211. }
  1212. if (cachedFormatters != nullptr) {
  1213. uhash_removeAll(cachedFormatters);
  1214. }
  1215. if (customFormatArgStarts != nullptr) {
  1216. uhash_removeAll(customFormatArgStarts);
  1217. }
  1218. if (that.cachedFormatters) {
  1219. if (cachedFormatters == nullptr) {
  1220. cachedFormatters=uhash_open(uhash_hashLong, uhash_compareLong,
  1221. equalFormatsForHash, &ec);
  1222. if (U_FAILURE(ec)) {
  1223. return;
  1224. }
  1225. uhash_setValueDeleter(cachedFormatters, uprv_deleteUObject);
  1226. }
  1227. const int32_t count = uhash_count(that.cachedFormatters);
  1228. int32_t pos, idx;
  1229. for (idx = 0, pos = UHASH_FIRST; idx < count && U_SUCCESS(ec); ++idx) {
  1230. const UHashElement* cur = uhash_nextElement(that.cachedFormatters, &pos);
  1231. Format* newFormat = ((Format*)(cur->value.pointer))->clone();
  1232. if (newFormat) {
  1233. uhash_iput(cachedFormatters, cur->key.integer, newFormat, &ec);
  1234. } else {
  1235. ec = U_MEMORY_ALLOCATION_ERROR;
  1236. return;
  1237. }
  1238. }
  1239. }
  1240. if (that.customFormatArgStarts) {
  1241. if (customFormatArgStarts == nullptr) {
  1242. customFormatArgStarts=uhash_open(uhash_hashLong, uhash_compareLong,
  1243. nullptr, &ec);
  1244. }
  1245. const int32_t count = uhash_count(that.customFormatArgStarts);
  1246. int32_t pos, idx;
  1247. for (idx = 0, pos = UHASH_FIRST; idx < count && U_SUCCESS(ec); ++idx) {
  1248. const UHashElement* cur = uhash_nextElement(that.customFormatArgStarts, &pos);
  1249. uhash_iputi(customFormatArgStarts, cur->key.integer, cur->value.integer, &ec);
  1250. }
  1251. }
  1252. }
  1253. Formattable*
  1254. MessageFormat::parse(int32_t msgStart,
  1255. const UnicodeString& source,
  1256. ParsePosition& pos,
  1257. int32_t& count,
  1258. UErrorCode& ec) const {
  1259. count = 0;
  1260. if (U_FAILURE(ec)) {
  1261. pos.setErrorIndex(pos.getIndex());
  1262. return nullptr;
  1263. }
  1264. // parse() does not work with named arguments.
  1265. if (msgPattern.hasNamedArguments()) {
  1266. ec = U_ARGUMENT_TYPE_MISMATCH;
  1267. pos.setErrorIndex(pos.getIndex());
  1268. return nullptr;
  1269. }
  1270. LocalArray<Formattable> resultArray(new Formattable[argTypeCount ? argTypeCount : 1]);
  1271. const UnicodeString& msgString=msgPattern.getPatternString();
  1272. int32_t prevIndex=msgPattern.getPart(msgStart).getLimit();
  1273. int32_t sourceOffset = pos.getIndex();
  1274. ParsePosition tempStatus(0);
  1275. for(int32_t i=msgStart+1; ; ++i) {
  1276. UBool haveArgResult = false;
  1277. const MessagePattern::Part* part=&msgPattern.getPart(i);
  1278. const UMessagePatternPartType type=part->getType();
  1279. int32_t index=part->getIndex();
  1280. // Make sure the literal string matches.
  1281. int32_t len = index - prevIndex;
  1282. if (len == 0 || (0 == msgString.compare(prevIndex, len, source, sourceOffset, len))) {
  1283. sourceOffset += len;
  1284. prevIndex += len;
  1285. } else {
  1286. pos.setErrorIndex(sourceOffset);
  1287. return nullptr; // leave index as is to signal error
  1288. }
  1289. if(type==UMSGPAT_PART_TYPE_MSG_LIMIT) {
  1290. // Things went well! Done.
  1291. pos.setIndex(sourceOffset);
  1292. return resultArray.orphan();
  1293. }
  1294. if(type==UMSGPAT_PART_TYPE_SKIP_SYNTAX || type==UMSGPAT_PART_TYPE_INSERT_CHAR) {
  1295. prevIndex=part->getLimit();
  1296. continue;
  1297. }
  1298. // We do not support parsing Plural formats. (No REPLACE_NUMBER here.)
  1299. // Unexpected Part "part" in parsed message.
  1300. U_ASSERT(type==UMSGPAT_PART_TYPE_ARG_START);
  1301. int32_t argLimit=msgPattern.getLimitPartIndex(i);
  1302. UMessagePatternArgType argType=part->getArgType();
  1303. part=&msgPattern.getPart(++i);
  1304. int32_t argNumber = part->getValue(); // ARG_NUMBER
  1305. UnicodeString key;
  1306. ++i;
  1307. const Format* formatter = nullptr;
  1308. Formattable& argResult = resultArray[argNumber];
  1309. if(cachedFormatters!=nullptr && (formatter = getCachedFormatter(i - 2))!=nullptr) {
  1310. // Just parse using the formatter.
  1311. tempStatus.setIndex(sourceOffset);
  1312. formatter->parseObject(source, argResult, tempStatus);
  1313. if (tempStatus.getIndex() == sourceOffset) {
  1314. pos.setErrorIndex(sourceOffset);
  1315. return nullptr; // leave index as is to signal error
  1316. }
  1317. sourceOffset = tempStatus.getIndex();
  1318. haveArgResult = true;
  1319. } else if(
  1320. argType==UMSGPAT_ARG_TYPE_NONE || (cachedFormatters && uhash_iget(cachedFormatters, i -2))) {
  1321. // We arrive here if getCachedFormatter returned nullptr, but there was actually an element in the hash table.
  1322. // This can only happen if the hash table contained a DummyFormat, so the if statement above is a check
  1323. // for the hash table containing DummyFormat.
  1324. // Match as a string.
  1325. // if at end, use longest possible match
  1326. // otherwise uses first match to intervening string
  1327. // does NOT recursively try all possibilities
  1328. UnicodeString stringAfterArgument = getLiteralStringUntilNextArgument(argLimit);
  1329. int32_t next;
  1330. if (!stringAfterArgument.isEmpty()) {
  1331. next = source.indexOf(stringAfterArgument, sourceOffset);
  1332. } else {
  1333. next = source.length();
  1334. }
  1335. if (next < 0) {
  1336. pos.setErrorIndex(sourceOffset);
  1337. return nullptr; // leave index as is to signal error
  1338. } else {
  1339. UnicodeString strValue(source.tempSubString(sourceOffset, next - sourceOffset));
  1340. UnicodeString compValue;
  1341. compValue.append(LEFT_CURLY_BRACE);
  1342. itos(argNumber, compValue);
  1343. compValue.append(RIGHT_CURLY_BRACE);
  1344. if (0 != strValue.compare(compValue)) {
  1345. argResult.setString(strValue);
  1346. haveArgResult = true;
  1347. }
  1348. sourceOffset = next;
  1349. }
  1350. } else if(argType==UMSGPAT_ARG_TYPE_CHOICE) {
  1351. tempStatus.setIndex(sourceOffset);
  1352. double choiceResult = ChoiceFormat::parseArgument(msgPattern, i, source, tempStatus);
  1353. if (tempStatus.getIndex() == sourceOffset) {
  1354. pos.setErrorIndex(sourceOffset);
  1355. return nullptr; // leave index as is to signal error
  1356. }
  1357. argResult.setDouble(choiceResult);
  1358. haveArgResult = true;
  1359. sourceOffset = tempStatus.getIndex();
  1360. } else if(UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE(argType) || argType==UMSGPAT_ARG_TYPE_SELECT) {
  1361. // Parsing not supported.
  1362. ec = U_UNSUPPORTED_ERROR;
  1363. return nullptr;
  1364. } else {
  1365. // This should never happen.
  1366. ec = U_INTERNAL_PROGRAM_ERROR;
  1367. return nullptr;
  1368. }
  1369. if (haveArgResult && count <= argNumber) {
  1370. count = argNumber + 1;
  1371. }
  1372. prevIndex=msgPattern.getPart(argLimit).getLimit();
  1373. i=argLimit;
  1374. }
  1375. }
  1376. // -------------------------------------
  1377. // Parses the source pattern and returns the Formattable objects array,
  1378. // the array count and the ending parse position. The caller of this method
  1379. // owns the array.
  1380. Formattable*
  1381. MessageFormat::parse(const UnicodeString& source,
  1382. ParsePosition& pos,
  1383. int32_t& count) const {
  1384. UErrorCode ec = U_ZERO_ERROR;
  1385. return parse(0, source, pos, count, ec);
  1386. }
  1387. // -------------------------------------
  1388. // Parses the source string and returns the array of
  1389. // Formattable objects and the array count. The caller
  1390. // owns the returned array.
  1391. Formattable*
  1392. MessageFormat::parse(const UnicodeString& source,
  1393. int32_t& cnt,
  1394. UErrorCode& success) const
  1395. {
  1396. if (msgPattern.hasNamedArguments()) {
  1397. success = U_ARGUMENT_TYPE_MISMATCH;
  1398. return nullptr;
  1399. }
  1400. ParsePosition status(0);
  1401. // Calls the actual implementation method and starts
  1402. // from zero offset of the source text.
  1403. Formattable* result = parse(source, status, cnt);
  1404. if (status.getIndex() == 0) {
  1405. success = U_MESSAGE_PARSE_ERROR;
  1406. delete[] result;
  1407. return nullptr;
  1408. }
  1409. return result;
  1410. }
  1411. // -------------------------------------
  1412. // Parses the source text and copy into the result buffer.
  1413. void
  1414. MessageFormat::parseObject( const UnicodeString& source,
  1415. Formattable& result,
  1416. ParsePosition& status) const
  1417. {
  1418. int32_t cnt = 0;
  1419. Formattable* tmpResult = parse(source, status, cnt);
  1420. if (tmpResult != nullptr)
  1421. result.adoptArray(tmpResult, cnt);
  1422. }
  1423. UnicodeString
  1424. MessageFormat::autoQuoteApostrophe(const UnicodeString& pattern, UErrorCode& status) {
  1425. UnicodeString result;
  1426. if (U_SUCCESS(status)) {
  1427. int32_t plen = pattern.length();
  1428. const char16_t* pat = pattern.getBuffer();
  1429. int32_t blen = plen * 2 + 1; // space for null termination, convenience
  1430. char16_t* buf = result.getBuffer(blen);
  1431. if (buf == nullptr) {
  1432. status = U_MEMORY_ALLOCATION_ERROR;
  1433. } else {
  1434. int32_t len = umsg_autoQuoteApostrophe(pat, plen, buf, blen, &status);
  1435. result.releaseBuffer(U_SUCCESS(status) ? len : 0);
  1436. }
  1437. }
  1438. if (U_FAILURE(status)) {
  1439. result.setToBogus();
  1440. }
  1441. return result;
  1442. }
  1443. // -------------------------------------
  1444. static Format* makeRBNF(URBNFRuleSetTag tag, const Locale& locale, const UnicodeString& defaultRuleSet, UErrorCode& ec) {
  1445. RuleBasedNumberFormat* fmt = new RuleBasedNumberFormat(tag, locale, ec);
  1446. if (fmt == nullptr) {
  1447. ec = U_MEMORY_ALLOCATION_ERROR;
  1448. } else if (U_SUCCESS(ec) && defaultRuleSet.length() > 0) {
  1449. UErrorCode localStatus = U_ZERO_ERROR; // ignore unrecognized default rule set
  1450. fmt->setDefaultRuleSet(defaultRuleSet, localStatus);
  1451. }
  1452. return fmt;
  1453. }
  1454. void MessageFormat::cacheExplicitFormats(UErrorCode& status) {
  1455. if (U_FAILURE(status)) {
  1456. return;
  1457. }
  1458. if (cachedFormatters != nullptr) {
  1459. uhash_removeAll(cachedFormatters);
  1460. }
  1461. if (customFormatArgStarts != nullptr) {
  1462. uhash_removeAll(customFormatArgStarts);
  1463. }
  1464. // The last two "parts" can at most be ARG_LIMIT and MSG_LIMIT
  1465. // which we need not examine.
  1466. int32_t limit = msgPattern.countParts() - 2;
  1467. argTypeCount = 0;
  1468. // We also need not look at the first two "parts"
  1469. // (at most MSG_START and ARG_START) in this loop.
  1470. // We determine the argTypeCount first so that we can allocateArgTypes
  1471. // so that the next loop can set argTypes[argNumber].
  1472. // (This is for the C API which needs the argTypes to read its va_arg list.)
  1473. for (int32_t i = 2; i < limit && U_SUCCESS(status); ++i) {
  1474. const MessagePattern::Part& part = msgPattern.getPart(i);
  1475. if (part.getType() == UMSGPAT_PART_TYPE_ARG_NUMBER) {
  1476. const int argNumber = part.getValue();
  1477. if (argNumber >= argTypeCount) {
  1478. argTypeCount = argNumber + 1;
  1479. }
  1480. }
  1481. }
  1482. if (!allocateArgTypes(argTypeCount, status)) {
  1483. return;
  1484. }
  1485. // Set all argTypes to kObject, as a "none" value, for lack of any better value.
  1486. // We never use kObject for real arguments.
  1487. // We use it as "no argument yet" for the check for hasArgTypeConflicts.
  1488. for (int32_t i = 0; i < argTypeCount; ++i) {
  1489. argTypes[i] = Formattable::kObject;
  1490. }
  1491. hasArgTypeConflicts = false;
  1492. // This loop starts at part index 1 because we do need to examine
  1493. // ARG_START parts. (But we can ignore the MSG_START.)
  1494. for (int32_t i = 1; i < limit && U_SUCCESS(status); ++i) {
  1495. const MessagePattern::Part* part = &msgPattern.getPart(i);
  1496. if (part->getType() != UMSGPAT_PART_TYPE_ARG_START) {
  1497. continue;
  1498. }
  1499. UMessagePatternArgType argType = part->getArgType();
  1500. int32_t argNumber = -1;
  1501. part = &msgPattern.getPart(i + 1);
  1502. if (part->getType() == UMSGPAT_PART_TYPE_ARG_NUMBER) {
  1503. argNumber = part->getValue();
  1504. }
  1505. Formattable::Type formattableType;
  1506. switch (argType) {
  1507. case UMSGPAT_ARG_TYPE_NONE:
  1508. formattableType = Formattable::kString;
  1509. break;
  1510. case UMSGPAT_ARG_TYPE_SIMPLE: {
  1511. int32_t index = i;
  1512. i += 2;
  1513. UnicodeString explicitType = msgPattern.getSubstring(msgPattern.getPart(i++));
  1514. UnicodeString style;
  1515. if ((part = &msgPattern.getPart(i))->getType() == UMSGPAT_PART_TYPE_ARG_STYLE) {
  1516. style = msgPattern.getSubstring(*part);
  1517. ++i;
  1518. }
  1519. UParseError parseError;
  1520. Format* formatter = createAppropriateFormat(explicitType, style, formattableType, parseError, status);
  1521. setArgStartFormat(index, formatter, status);
  1522. break;
  1523. }
  1524. case UMSGPAT_ARG_TYPE_CHOICE:
  1525. case UMSGPAT_ARG_TYPE_PLURAL:
  1526. case UMSGPAT_ARG_TYPE_SELECTORDINAL:
  1527. formattableType = Formattable::kDouble;
  1528. break;
  1529. case UMSGPAT_ARG_TYPE_SELECT:
  1530. formattableType = Formattable::kString;
  1531. break;
  1532. default:
  1533. status = U_INTERNAL_PROGRAM_ERROR; // Should be unreachable.
  1534. formattableType = Formattable::kString;
  1535. break;
  1536. }
  1537. if (argNumber != -1) {
  1538. if (argTypes[argNumber] != Formattable::kObject && argTypes[argNumber] != formattableType) {
  1539. hasArgTypeConflicts = true;
  1540. }
  1541. argTypes[argNumber] = formattableType;
  1542. }
  1543. }
  1544. }
  1545. Format* MessageFormat::createAppropriateFormat(UnicodeString& type, UnicodeString& style,
  1546. Formattable::Type& formattableType, UParseError& parseError,
  1547. UErrorCode& ec) {
  1548. if (U_FAILURE(ec)) {
  1549. return nullptr;
  1550. }
  1551. Format* fmt = nullptr;
  1552. int32_t typeID, styleID;
  1553. DateFormat::EStyle date_style;
  1554. int32_t firstNonSpace;
  1555. switch (typeID = findKeyword(type, TYPE_IDS)) {
  1556. case 0: // number
  1557. formattableType = Formattable::kDouble;
  1558. switch (findKeyword(style, NUMBER_STYLE_IDS)) {
  1559. case 0: // default
  1560. fmt = NumberFormat::createInstance(fLocale, ec);
  1561. break;
  1562. case 1: // currency
  1563. fmt = NumberFormat::createCurrencyInstance(fLocale, ec);
  1564. break;
  1565. case 2: // percent
  1566. fmt = NumberFormat::createPercentInstance(fLocale, ec);
  1567. break;
  1568. case 3: // integer
  1569. formattableType = Formattable::kLong;
  1570. fmt = createIntegerFormat(fLocale, ec);
  1571. break;
  1572. default: // pattern or skeleton
  1573. firstNonSpace = PatternProps::skipWhiteSpace(style, 0);
  1574. if (style.compare(firstNonSpace, 2, u"::", 0, 2) == 0) {
  1575. // Skeleton
  1576. UnicodeString skeleton = style.tempSubString(firstNonSpace + 2);
  1577. fmt = number::NumberFormatter::forSkeleton(skeleton, ec).locale(fLocale).toFormat(ec);
  1578. } else {
  1579. // Pattern
  1580. fmt = NumberFormat::createInstance(fLocale, ec);
  1581. if (fmt) {
  1582. auto* decfmt = dynamic_cast<DecimalFormat*>(fmt);
  1583. if (decfmt != nullptr) {
  1584. decfmt->applyPattern(style, parseError, ec);
  1585. }
  1586. }
  1587. }
  1588. break;
  1589. }
  1590. break;
  1591. case 1: // date
  1592. case 2: // time
  1593. formattableType = Formattable::kDate;
  1594. firstNonSpace = PatternProps::skipWhiteSpace(style, 0);
  1595. if (style.compare(firstNonSpace, 2, u"::", 0, 2) == 0) {
  1596. // Skeleton
  1597. UnicodeString skeleton = style.tempSubString(firstNonSpace + 2);
  1598. fmt = DateFormat::createInstanceForSkeleton(skeleton, fLocale, ec);
  1599. } else {
  1600. // Pattern
  1601. styleID = findKeyword(style, DATE_STYLE_IDS);
  1602. date_style = (styleID >= 0) ? DATE_STYLES[styleID] : DateFormat::kDefault;
  1603. if (typeID == 1) {
  1604. fmt = DateFormat::createDateInstance(date_style, fLocale);
  1605. } else {
  1606. fmt = DateFormat::createTimeInstance(date_style, fLocale);
  1607. }
  1608. if (styleID < 0 && fmt != nullptr) {
  1609. SimpleDateFormat* sdtfmt = dynamic_cast<SimpleDateFormat*>(fmt);
  1610. if (sdtfmt != nullptr) {
  1611. sdtfmt->applyPattern(style);
  1612. }
  1613. }
  1614. }
  1615. break;
  1616. case 3: // spellout
  1617. formattableType = Formattable::kDouble;
  1618. fmt = makeRBNF(URBNF_SPELLOUT, fLocale, style, ec);
  1619. break;
  1620. case 4: // ordinal
  1621. formattableType = Formattable::kDouble;
  1622. fmt = makeRBNF(URBNF_ORDINAL, fLocale, style, ec);
  1623. break;
  1624. case 5: // duration
  1625. formattableType = Formattable::kDouble;
  1626. fmt = makeRBNF(URBNF_DURATION, fLocale, style, ec);
  1627. break;
  1628. default:
  1629. formattableType = Formattable::kString;
  1630. ec = U_ILLEGAL_ARGUMENT_ERROR;
  1631. break;
  1632. }
  1633. return fmt;
  1634. }
  1635. //-------------------------------------
  1636. // Finds the string, s, in the string array, list.
  1637. int32_t MessageFormat::findKeyword(const UnicodeString& s,
  1638. const char16_t * const *list)
  1639. {
  1640. if (s.isEmpty()) {
  1641. return 0; // default
  1642. }
  1643. int32_t length = s.length();
  1644. const char16_t *ps = PatternProps::trimWhiteSpace(s.getBuffer(), length);
  1645. UnicodeString buffer(false, ps, length);
  1646. // Trims the space characters and turns all characters
  1647. // in s to lower case.
  1648. buffer.toLower("");
  1649. for (int32_t i = 0; list[i]; ++i) {
  1650. if (!buffer.compare(list[i], u_strlen(list[i]))) {
  1651. return i;
  1652. }
  1653. }
  1654. return -1;
  1655. }
  1656. /**
  1657. * Convenience method that ought to be in NumberFormat
  1658. */
  1659. NumberFormat*
  1660. MessageFormat::createIntegerFormat(const Locale& locale, UErrorCode& status) const {
  1661. NumberFormat *temp = NumberFormat::createInstance(locale, status);
  1662. DecimalFormat *temp2;
  1663. if (temp != nullptr && (temp2 = dynamic_cast<DecimalFormat*>(temp)) != nullptr) {
  1664. temp2->setMaximumFractionDigits(0);
  1665. temp2->setDecimalSeparatorAlwaysShown(false);
  1666. temp2->setParseIntegerOnly(true);
  1667. }
  1668. return temp;
  1669. }
  1670. /**
  1671. * Return the default number format. Used to format a numeric
  1672. * argument when subformats[i].format is nullptr. Returns nullptr
  1673. * on failure.
  1674. *
  1675. * Semantically const but may modify *this.
  1676. */
  1677. const NumberFormat* MessageFormat::getDefaultNumberFormat(UErrorCode& ec) const {
  1678. if (defaultNumberFormat == nullptr) {
  1679. MessageFormat* t = (MessageFormat*) this;
  1680. t->defaultNumberFormat = NumberFormat::createInstance(fLocale, ec);
  1681. if (U_FAILURE(ec)) {
  1682. delete t->defaultNumberFormat;
  1683. t->defaultNumberFormat = nullptr;
  1684. } else if (t->defaultNumberFormat == nullptr) {
  1685. ec = U_MEMORY_ALLOCATION_ERROR;
  1686. }
  1687. }
  1688. return defaultNumberFormat;
  1689. }
  1690. /**
  1691. * Return the default date format. Used to format a date
  1692. * argument when subformats[i].format is nullptr. Returns nullptr
  1693. * on failure.
  1694. *
  1695. * Semantically const but may modify *this.
  1696. */
  1697. const DateFormat* MessageFormat::getDefaultDateFormat(UErrorCode& ec) const {
  1698. if (defaultDateFormat == nullptr) {
  1699. MessageFormat* t = (MessageFormat*) this;
  1700. t->defaultDateFormat = DateFormat::createDateTimeInstance(DateFormat::kShort, DateFormat::kShort, fLocale);
  1701. if (t->defaultDateFormat == nullptr) {
  1702. ec = U_MEMORY_ALLOCATION_ERROR;
  1703. }
  1704. }
  1705. return defaultDateFormat;
  1706. }
  1707. UBool
  1708. MessageFormat::usesNamedArguments() const {
  1709. return msgPattern.hasNamedArguments();
  1710. }
  1711. int32_t
  1712. MessageFormat::getArgTypeCount() const {
  1713. return argTypeCount;
  1714. }
  1715. UBool MessageFormat::equalFormats(const void* left, const void* right) {
  1716. return *(const Format*)left==*(const Format*)right;
  1717. }
  1718. bool MessageFormat::DummyFormat::operator==(const Format&) const {
  1719. return true;
  1720. }
  1721. MessageFormat::DummyFormat* MessageFormat::DummyFormat::clone() const {
  1722. return new DummyFormat();
  1723. }
  1724. UnicodeString& MessageFormat::DummyFormat::format(const Formattable&,
  1725. UnicodeString& appendTo,
  1726. UErrorCode& status) const {
  1727. if (U_SUCCESS(status)) {
  1728. status = U_UNSUPPORTED_ERROR;
  1729. }
  1730. return appendTo;
  1731. }
  1732. UnicodeString& MessageFormat::DummyFormat::format(const Formattable&,
  1733. UnicodeString& appendTo,
  1734. FieldPosition&,
  1735. UErrorCode& status) const {
  1736. if (U_SUCCESS(status)) {
  1737. status = U_UNSUPPORTED_ERROR;
  1738. }
  1739. return appendTo;
  1740. }
  1741. UnicodeString& MessageFormat::DummyFormat::format(const Formattable&,
  1742. UnicodeString& appendTo,
  1743. FieldPositionIterator*,
  1744. UErrorCode& status) const {
  1745. if (U_SUCCESS(status)) {
  1746. status = U_UNSUPPORTED_ERROR;
  1747. }
  1748. return appendTo;
  1749. }
  1750. void MessageFormat::DummyFormat::parseObject(const UnicodeString&,
  1751. Formattable&,
  1752. ParsePosition& ) const {
  1753. }
  1754. FormatNameEnumeration::FormatNameEnumeration(LocalPointer<UVector> nameList, UErrorCode& /*status*/) {
  1755. pos=0;
  1756. fFormatNames = std::move(nameList);
  1757. }
  1758. const UnicodeString*
  1759. FormatNameEnumeration::snext(UErrorCode& status) {
  1760. if (U_SUCCESS(status) && pos < fFormatNames->size()) {
  1761. return (const UnicodeString*)fFormatNames->elementAt(pos++);
  1762. }
  1763. return nullptr;
  1764. }
  1765. void
  1766. FormatNameEnumeration::reset(UErrorCode& /*status*/) {
  1767. pos=0;
  1768. }
  1769. int32_t
  1770. FormatNameEnumeration::count(UErrorCode& /*status*/) const {
  1771. return (fFormatNames==nullptr) ? 0 : fFormatNames->size();
  1772. }
  1773. FormatNameEnumeration::~FormatNameEnumeration() {
  1774. }
  1775. MessageFormat::PluralSelectorProvider::PluralSelectorProvider(const MessageFormat &mf, UPluralType t)
  1776. : msgFormat(mf), rules(nullptr), type(t) {
  1777. }
  1778. MessageFormat::PluralSelectorProvider::~PluralSelectorProvider() {
  1779. delete rules;
  1780. }
  1781. UnicodeString MessageFormat::PluralSelectorProvider::select(void *ctx, double number,
  1782. UErrorCode& ec) const {
  1783. if (U_FAILURE(ec)) {
  1784. return UnicodeString(false, OTHER_STRING, 5);
  1785. }
  1786. MessageFormat::PluralSelectorProvider* t = const_cast<MessageFormat::PluralSelectorProvider*>(this);
  1787. if(rules == nullptr) {
  1788. t->rules = PluralRules::forLocale(msgFormat.fLocale, type, ec);
  1789. if (U_FAILURE(ec)) {
  1790. return UnicodeString(false, OTHER_STRING, 5);
  1791. }
  1792. }
  1793. // Select a sub-message according to how the number is formatted,
  1794. // which is specified in the selected sub-message.
  1795. // We avoid this circle by looking at how
  1796. // the number is formatted in the "other" sub-message
  1797. // which must always be present and usually contains the number.
  1798. // Message authors should be consistent across sub-messages.
  1799. PluralSelectorContext &context = *static_cast<PluralSelectorContext *>(ctx);
  1800. int32_t otherIndex = msgFormat.findOtherSubMessage(context.startIndex);
  1801. context.numberArgIndex = msgFormat.findFirstPluralNumberArg(otherIndex, context.argName);
  1802. if(context.numberArgIndex > 0 && msgFormat.cachedFormatters != nullptr) {
  1803. context.formatter =
  1804. (const Format*)uhash_iget(msgFormat.cachedFormatters, context.numberArgIndex);
  1805. }
  1806. if(context.formatter == nullptr) {
  1807. context.formatter = msgFormat.getDefaultNumberFormat(ec);
  1808. context.forReplaceNumber = true;
  1809. }
  1810. if (context.number.getDouble(ec) != number) {
  1811. ec = U_INTERNAL_PROGRAM_ERROR;
  1812. return UnicodeString(false, OTHER_STRING, 5);
  1813. }
  1814. context.formatter->format(context.number, context.numberString, ec);
  1815. auto* decFmt = dynamic_cast<const DecimalFormat *>(context.formatter);
  1816. if(decFmt != nullptr) {
  1817. number::impl::DecimalQuantity dq;
  1818. decFmt->formatToDecimalQuantity(context.number, dq, ec);
  1819. if (U_FAILURE(ec)) {
  1820. return UnicodeString(false, OTHER_STRING, 5);
  1821. }
  1822. return rules->select(dq);
  1823. } else {
  1824. return rules->select(number);
  1825. }
  1826. }
  1827. void MessageFormat::PluralSelectorProvider::reset() {
  1828. delete rules;
  1829. rules = nullptr;
  1830. }
  1831. U_NAMESPACE_END
  1832. #endif /* #if !UCONFIG_NO_FORMATTING */
  1833. //eof