messagepattern.cpp 43 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233
  1. // © 2016 and later: Unicode, Inc. and others.
  2. // License & terms of use: http://www.unicode.org/copyright.html
  3. /*
  4. *******************************************************************************
  5. * Copyright (C) 2011-2012, International Business Machines
  6. * Corporation and others. All Rights Reserved.
  7. *******************************************************************************
  8. * file name: messagepattern.cpp
  9. * encoding: UTF-8
  10. * tab size: 8 (not used)
  11. * indentation:4
  12. *
  13. * created on: 2011mar14
  14. * created by: Markus W. Scherer
  15. */
  16. #include "unicode/utypes.h"
  17. #if !UCONFIG_NO_FORMATTING
  18. #include "unicode/messagepattern.h"
  19. #include "unicode/unistr.h"
  20. #include "unicode/utf16.h"
  21. #include "cmemory.h"
  22. #include "cstring.h"
  23. #include "messageimpl.h"
  24. #include "patternprops.h"
  25. #include "putilimp.h"
  26. #include "uassert.h"
  27. U_NAMESPACE_BEGIN
  28. // Unicode character/code point constants ---------------------------------- ***
  29. static const char16_t u_pound=0x23;
  30. static const char16_t u_apos=0x27;
  31. static const char16_t u_plus=0x2B;
  32. static const char16_t u_comma=0x2C;
  33. static const char16_t u_minus=0x2D;
  34. static const char16_t u_dot=0x2E;
  35. static const char16_t u_colon=0x3A;
  36. static const char16_t u_lessThan=0x3C;
  37. static const char16_t u_equal=0x3D;
  38. static const char16_t u_A=0x41;
  39. static const char16_t u_C=0x43;
  40. static const char16_t u_D=0x44;
  41. static const char16_t u_E=0x45;
  42. static const char16_t u_H=0x48;
  43. static const char16_t u_I=0x49;
  44. static const char16_t u_L=0x4C;
  45. static const char16_t u_N=0x4E;
  46. static const char16_t u_O=0x4F;
  47. static const char16_t u_P=0x50;
  48. static const char16_t u_R=0x52;
  49. static const char16_t u_S=0x53;
  50. static const char16_t u_T=0x54;
  51. static const char16_t u_U=0x55;
  52. static const char16_t u_Z=0x5A;
  53. static const char16_t u_a=0x61;
  54. static const char16_t u_c=0x63;
  55. static const char16_t u_d=0x64;
  56. static const char16_t u_e=0x65;
  57. static const char16_t u_f=0x66;
  58. static const char16_t u_h=0x68;
  59. static const char16_t u_i=0x69;
  60. static const char16_t u_l=0x6C;
  61. static const char16_t u_n=0x6E;
  62. static const char16_t u_o=0x6F;
  63. static const char16_t u_p=0x70;
  64. static const char16_t u_r=0x72;
  65. static const char16_t u_s=0x73;
  66. static const char16_t u_t=0x74;
  67. static const char16_t u_u=0x75;
  68. static const char16_t u_z=0x7A;
  69. static const char16_t u_leftCurlyBrace=0x7B;
  70. static const char16_t u_pipe=0x7C;
  71. static const char16_t u_rightCurlyBrace=0x7D;
  72. static const char16_t u_lessOrEqual=0x2264; // U+2264 is <=
  73. static const char16_t kOffsetColon[]={ // "offset:"
  74. u_o, u_f, u_f, u_s, u_e, u_t, u_colon
  75. };
  76. static const char16_t kOther[]={ // "other"
  77. u_o, u_t, u_h, u_e, u_r
  78. };
  79. // MessagePatternList ------------------------------------------------------ ***
  80. template<typename T, int32_t stackCapacity>
  81. class MessagePatternList : public UMemory {
  82. public:
  83. MessagePatternList() {}
  84. void copyFrom(const MessagePatternList<T, stackCapacity> &other,
  85. int32_t length,
  86. UErrorCode &errorCode);
  87. UBool ensureCapacityForOneMore(int32_t oldLength, UErrorCode &errorCode);
  88. UBool equals(const MessagePatternList<T, stackCapacity> &other, int32_t length) const {
  89. for(int32_t i=0; i<length; ++i) {
  90. if(a[i]!=other.a[i]) { return false; }
  91. }
  92. return true;
  93. }
  94. MaybeStackArray<T, stackCapacity> a;
  95. };
  96. template<typename T, int32_t stackCapacity>
  97. void
  98. MessagePatternList<T, stackCapacity>::copyFrom(
  99. const MessagePatternList<T, stackCapacity> &other,
  100. int32_t length,
  101. UErrorCode &errorCode) {
  102. if(U_SUCCESS(errorCode) && length>0) {
  103. if(length>a.getCapacity() && nullptr==a.resize(length)) {
  104. errorCode=U_MEMORY_ALLOCATION_ERROR;
  105. return;
  106. }
  107. uprv_memcpy(a.getAlias(), other.a.getAlias(), (size_t)length*sizeof(T));
  108. }
  109. }
  110. template<typename T, int32_t stackCapacity>
  111. UBool
  112. MessagePatternList<T, stackCapacity>::ensureCapacityForOneMore(int32_t oldLength, UErrorCode &errorCode) {
  113. if(U_FAILURE(errorCode)) {
  114. return false;
  115. }
  116. if(a.getCapacity()>oldLength || a.resize(2*oldLength, oldLength)!=nullptr) {
  117. return true;
  118. }
  119. errorCode=U_MEMORY_ALLOCATION_ERROR;
  120. return false;
  121. }
  122. // MessagePatternList specializations -------------------------------------- ***
  123. class MessagePatternDoubleList : public MessagePatternList<double, 8> {
  124. };
  125. class MessagePatternPartsList : public MessagePatternList<MessagePattern::Part, 32> {
  126. };
  127. // MessagePattern constructors etc. ---------------------------------------- ***
  128. MessagePattern::MessagePattern(UErrorCode &errorCode)
  129. : aposMode(UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE),
  130. partsList(nullptr), parts(nullptr), partsLength(0),
  131. numericValuesList(nullptr), numericValues(nullptr), numericValuesLength(0),
  132. hasArgNames(false), hasArgNumbers(false), needsAutoQuoting(false) {
  133. init(errorCode);
  134. }
  135. MessagePattern::MessagePattern(UMessagePatternApostropheMode mode, UErrorCode &errorCode)
  136. : aposMode(mode),
  137. partsList(nullptr), parts(nullptr), partsLength(0),
  138. numericValuesList(nullptr), numericValues(nullptr), numericValuesLength(0),
  139. hasArgNames(false), hasArgNumbers(false), needsAutoQuoting(false) {
  140. init(errorCode);
  141. }
  142. MessagePattern::MessagePattern(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode)
  143. : aposMode(UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE),
  144. partsList(nullptr), parts(nullptr), partsLength(0),
  145. numericValuesList(nullptr), numericValues(nullptr), numericValuesLength(0),
  146. hasArgNames(false), hasArgNumbers(false), needsAutoQuoting(false) {
  147. if(init(errorCode)) {
  148. parse(pattern, parseError, errorCode);
  149. }
  150. }
  151. UBool
  152. MessagePattern::init(UErrorCode &errorCode) {
  153. if(U_FAILURE(errorCode)) {
  154. return false;
  155. }
  156. partsList=new MessagePatternPartsList();
  157. if(partsList==nullptr) {
  158. errorCode=U_MEMORY_ALLOCATION_ERROR;
  159. return false;
  160. }
  161. parts=partsList->a.getAlias();
  162. return true;
  163. }
  164. MessagePattern::MessagePattern(const MessagePattern &other)
  165. : UObject(other), aposMode(other.aposMode), msg(other.msg),
  166. partsList(nullptr), parts(nullptr), partsLength(0),
  167. numericValuesList(nullptr), numericValues(nullptr), numericValuesLength(0),
  168. hasArgNames(other.hasArgNames), hasArgNumbers(other.hasArgNumbers),
  169. needsAutoQuoting(other.needsAutoQuoting) {
  170. UErrorCode errorCode=U_ZERO_ERROR;
  171. if(!copyStorage(other, errorCode)) {
  172. clear();
  173. }
  174. }
  175. MessagePattern &
  176. MessagePattern::operator=(const MessagePattern &other) {
  177. if(this==&other) {
  178. return *this;
  179. }
  180. aposMode=other.aposMode;
  181. msg=other.msg;
  182. hasArgNames=other.hasArgNames;
  183. hasArgNumbers=other.hasArgNumbers;
  184. needsAutoQuoting=other.needsAutoQuoting;
  185. UErrorCode errorCode=U_ZERO_ERROR;
  186. if(!copyStorage(other, errorCode)) {
  187. clear();
  188. }
  189. return *this;
  190. }
  191. UBool
  192. MessagePattern::copyStorage(const MessagePattern &other, UErrorCode &errorCode) {
  193. if(U_FAILURE(errorCode)) {
  194. return false;
  195. }
  196. parts=nullptr;
  197. partsLength=0;
  198. numericValues=nullptr;
  199. numericValuesLength=0;
  200. if(partsList==nullptr) {
  201. partsList=new MessagePatternPartsList();
  202. if(partsList==nullptr) {
  203. errorCode=U_MEMORY_ALLOCATION_ERROR;
  204. return false;
  205. }
  206. parts=partsList->a.getAlias();
  207. }
  208. if(other.partsLength>0) {
  209. partsList->copyFrom(*other.partsList, other.partsLength, errorCode);
  210. if(U_FAILURE(errorCode)) {
  211. return false;
  212. }
  213. parts=partsList->a.getAlias();
  214. partsLength=other.partsLength;
  215. }
  216. if(other.numericValuesLength>0) {
  217. if(numericValuesList==nullptr) {
  218. numericValuesList=new MessagePatternDoubleList();
  219. if(numericValuesList==nullptr) {
  220. errorCode=U_MEMORY_ALLOCATION_ERROR;
  221. return false;
  222. }
  223. numericValues=numericValuesList->a.getAlias();
  224. }
  225. numericValuesList->copyFrom(
  226. *other.numericValuesList, other.numericValuesLength, errorCode);
  227. if(U_FAILURE(errorCode)) {
  228. return false;
  229. }
  230. numericValues=numericValuesList->a.getAlias();
  231. numericValuesLength=other.numericValuesLength;
  232. }
  233. return true;
  234. }
  235. MessagePattern::~MessagePattern() {
  236. delete partsList;
  237. delete numericValuesList;
  238. }
  239. // MessagePattern API ------------------------------------------------------ ***
  240. MessagePattern &
  241. MessagePattern::parse(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode) {
  242. preParse(pattern, parseError, errorCode);
  243. parseMessage(0, 0, 0, UMSGPAT_ARG_TYPE_NONE, parseError, errorCode);
  244. postParse();
  245. return *this;
  246. }
  247. MessagePattern &
  248. MessagePattern::parseChoiceStyle(const UnicodeString &pattern,
  249. UParseError *parseError, UErrorCode &errorCode) {
  250. preParse(pattern, parseError, errorCode);
  251. parseChoiceStyle(0, 0, parseError, errorCode);
  252. postParse();
  253. return *this;
  254. }
  255. MessagePattern &
  256. MessagePattern::parsePluralStyle(const UnicodeString &pattern,
  257. UParseError *parseError, UErrorCode &errorCode) {
  258. preParse(pattern, parseError, errorCode);
  259. parsePluralOrSelectStyle(UMSGPAT_ARG_TYPE_PLURAL, 0, 0, parseError, errorCode);
  260. postParse();
  261. return *this;
  262. }
  263. MessagePattern &
  264. MessagePattern::parseSelectStyle(const UnicodeString &pattern,
  265. UParseError *parseError, UErrorCode &errorCode) {
  266. preParse(pattern, parseError, errorCode);
  267. parsePluralOrSelectStyle(UMSGPAT_ARG_TYPE_SELECT, 0, 0, parseError, errorCode);
  268. postParse();
  269. return *this;
  270. }
  271. void
  272. MessagePattern::clear() {
  273. // Mostly the same as preParse().
  274. msg.remove();
  275. hasArgNames=hasArgNumbers=false;
  276. needsAutoQuoting=false;
  277. partsLength=0;
  278. numericValuesLength=0;
  279. }
  280. bool
  281. MessagePattern::operator==(const MessagePattern &other) const {
  282. if(this==&other) {
  283. return true;
  284. }
  285. return
  286. aposMode==other.aposMode &&
  287. msg==other.msg &&
  288. // parts.equals(o.parts)
  289. partsLength==other.partsLength &&
  290. (partsLength==0 || partsList->equals(*other.partsList, partsLength));
  291. // No need to compare numericValues if msg and parts are the same.
  292. }
  293. int32_t
  294. MessagePattern::hashCode() const {
  295. int32_t hash=(aposMode*37+msg.hashCode())*37+partsLength;
  296. for(int32_t i=0; i<partsLength; ++i) {
  297. hash=hash*37+parts[i].hashCode();
  298. }
  299. return hash;
  300. }
  301. int32_t
  302. MessagePattern::validateArgumentName(const UnicodeString &name) {
  303. if(!PatternProps::isIdentifier(name.getBuffer(), name.length())) {
  304. return UMSGPAT_ARG_NAME_NOT_VALID;
  305. }
  306. return parseArgNumber(name, 0, name.length());
  307. }
  308. UnicodeString
  309. MessagePattern::autoQuoteApostropheDeep() const {
  310. if(!needsAutoQuoting) {
  311. return msg;
  312. }
  313. UnicodeString modified(msg);
  314. // Iterate backward so that the insertion indexes do not change.
  315. int32_t count=countParts();
  316. for(int32_t i=count; i>0;) {
  317. const Part &part=getPart(--i);
  318. if(part.getType()==UMSGPAT_PART_TYPE_INSERT_CHAR) {
  319. modified.insert(part.index, (char16_t)part.value);
  320. }
  321. }
  322. return modified;
  323. }
  324. double
  325. MessagePattern::getNumericValue(const Part &part) const {
  326. UMessagePatternPartType type=part.type;
  327. if(type==UMSGPAT_PART_TYPE_ARG_INT) {
  328. return part.value;
  329. } else if(type==UMSGPAT_PART_TYPE_ARG_DOUBLE) {
  330. return numericValues[part.value];
  331. } else {
  332. return UMSGPAT_NO_NUMERIC_VALUE;
  333. }
  334. }
  335. /**
  336. * Returns the "offset:" value of a PluralFormat argument, or 0 if none is specified.
  337. * @param pluralStart the index of the first PluralFormat argument style part. (0..countParts()-1)
  338. * @return the "offset:" value.
  339. * @draft ICU 4.8
  340. */
  341. double
  342. MessagePattern::getPluralOffset(int32_t pluralStart) const {
  343. const Part &part=getPart(pluralStart);
  344. if(Part::hasNumericValue(part.type)) {
  345. return getNumericValue(part);
  346. } else {
  347. return 0;
  348. }
  349. }
  350. // MessagePattern::Part ---------------------------------------------------- ***
  351. bool
  352. MessagePattern::Part::operator==(const Part &other) const {
  353. if(this==&other) {
  354. return true;
  355. }
  356. return
  357. type==other.type &&
  358. index==other.index &&
  359. length==other.length &&
  360. value==other.value &&
  361. limitPartIndex==other.limitPartIndex;
  362. }
  363. // MessagePattern parser --------------------------------------------------- ***
  364. void
  365. MessagePattern::preParse(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode) {
  366. if(U_FAILURE(errorCode)) {
  367. return;
  368. }
  369. if(parseError!=nullptr) {
  370. parseError->line=0;
  371. parseError->offset=0;
  372. parseError->preContext[0]=0;
  373. parseError->postContext[0]=0;
  374. }
  375. msg=pattern;
  376. hasArgNames=hasArgNumbers=false;
  377. needsAutoQuoting=false;
  378. partsLength=0;
  379. numericValuesLength=0;
  380. }
  381. void
  382. MessagePattern::postParse() {
  383. if(partsList!=nullptr) {
  384. parts=partsList->a.getAlias();
  385. }
  386. if(numericValuesList!=nullptr) {
  387. numericValues=numericValuesList->a.getAlias();
  388. }
  389. }
  390. int32_t
  391. MessagePattern::parseMessage(int32_t index, int32_t msgStartLength,
  392. int32_t nestingLevel, UMessagePatternArgType parentType,
  393. UParseError *parseError, UErrorCode &errorCode) {
  394. if(U_FAILURE(errorCode)) {
  395. return 0;
  396. }
  397. if(nestingLevel>Part::MAX_VALUE) {
  398. errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
  399. return 0;
  400. }
  401. int32_t msgStart=partsLength;
  402. addPart(UMSGPAT_PART_TYPE_MSG_START, index, msgStartLength, nestingLevel, errorCode);
  403. index+=msgStartLength;
  404. for(;;) { // while(index<msg.length()) with U_FAILURE(errorCode) check
  405. if(U_FAILURE(errorCode)) {
  406. return 0;
  407. }
  408. if(index>=msg.length()) {
  409. break;
  410. }
  411. char16_t c=msg.charAt(index++);
  412. if(c==u_apos) {
  413. if(index==msg.length()) {
  414. // The apostrophe is the last character in the pattern.
  415. // Add a Part for auto-quoting.
  416. addPart(UMSGPAT_PART_TYPE_INSERT_CHAR, index, 0,
  417. u_apos, errorCode); // value=char to be inserted
  418. needsAutoQuoting=true;
  419. } else {
  420. c=msg.charAt(index);
  421. if(c==u_apos) {
  422. // double apostrophe, skip the second one
  423. addPart(UMSGPAT_PART_TYPE_SKIP_SYNTAX, index++, 1, 0, errorCode);
  424. } else if(
  425. aposMode==UMSGPAT_APOS_DOUBLE_REQUIRED ||
  426. c==u_leftCurlyBrace || c==u_rightCurlyBrace ||
  427. (parentType==UMSGPAT_ARG_TYPE_CHOICE && c==u_pipe) ||
  428. (UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE(parentType) && c==u_pound)
  429. ) {
  430. // skip the quote-starting apostrophe
  431. addPart(UMSGPAT_PART_TYPE_SKIP_SYNTAX, index-1, 1, 0, errorCode);
  432. // find the end of the quoted literal text
  433. for(;;) {
  434. index=msg.indexOf(u_apos, index+1);
  435. if(index>=0) {
  436. if(/*(index+1)<msg.length() &&*/ msg.charAt(index+1)==u_apos) {
  437. // double apostrophe inside quoted literal text
  438. // still encodes a single apostrophe, skip the second one
  439. addPart(UMSGPAT_PART_TYPE_SKIP_SYNTAX, ++index, 1, 0, errorCode);
  440. } else {
  441. // skip the quote-ending apostrophe
  442. addPart(UMSGPAT_PART_TYPE_SKIP_SYNTAX, index++, 1, 0, errorCode);
  443. break;
  444. }
  445. } else {
  446. // The quoted text reaches to the end of the of the message.
  447. index=msg.length();
  448. // Add a Part for auto-quoting.
  449. addPart(UMSGPAT_PART_TYPE_INSERT_CHAR, index, 0,
  450. u_apos, errorCode); // value=char to be inserted
  451. needsAutoQuoting=true;
  452. break;
  453. }
  454. }
  455. } else {
  456. // Interpret the apostrophe as literal text.
  457. // Add a Part for auto-quoting.
  458. addPart(UMSGPAT_PART_TYPE_INSERT_CHAR, index, 0,
  459. u_apos, errorCode); // value=char to be inserted
  460. needsAutoQuoting=true;
  461. }
  462. }
  463. } else if(UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE(parentType) && c==u_pound) {
  464. // The unquoted # in a plural message fragment will be replaced
  465. // with the (number-offset).
  466. addPart(UMSGPAT_PART_TYPE_REPLACE_NUMBER, index-1, 1, 0, errorCode);
  467. } else if(c==u_leftCurlyBrace) {
  468. index=parseArg(index-1, 1, nestingLevel, parseError, errorCode);
  469. } else if((nestingLevel>0 && c==u_rightCurlyBrace) ||
  470. (parentType==UMSGPAT_ARG_TYPE_CHOICE && c==u_pipe)) {
  471. // Finish the message before the terminator.
  472. // In a choice style, report the "}" substring only for the following ARG_LIMIT,
  473. // not for this MSG_LIMIT.
  474. int32_t limitLength=(parentType==UMSGPAT_ARG_TYPE_CHOICE && c==u_rightCurlyBrace) ? 0 : 1;
  475. addLimitPart(msgStart, UMSGPAT_PART_TYPE_MSG_LIMIT, index-1, limitLength,
  476. nestingLevel, errorCode);
  477. if(parentType==UMSGPAT_ARG_TYPE_CHOICE) {
  478. // Let the choice style parser see the '}' or '|'.
  479. return index-1;
  480. } else {
  481. // continue parsing after the '}'
  482. return index;
  483. }
  484. } // else: c is part of literal text
  485. }
  486. if(nestingLevel>0 && !inTopLevelChoiceMessage(nestingLevel, parentType)) {
  487. setParseError(parseError, 0); // Unmatched '{' braces in message.
  488. errorCode=U_UNMATCHED_BRACES;
  489. return 0;
  490. }
  491. addLimitPart(msgStart, UMSGPAT_PART_TYPE_MSG_LIMIT, index, 0, nestingLevel, errorCode);
  492. return index;
  493. }
  494. int32_t
  495. MessagePattern::parseArg(int32_t index, int32_t argStartLength, int32_t nestingLevel,
  496. UParseError *parseError, UErrorCode &errorCode) {
  497. int32_t argStart=partsLength;
  498. UMessagePatternArgType argType=UMSGPAT_ARG_TYPE_NONE;
  499. addPart(UMSGPAT_PART_TYPE_ARG_START, index, argStartLength, argType, errorCode);
  500. if(U_FAILURE(errorCode)) {
  501. return 0;
  502. }
  503. int32_t nameIndex=index=skipWhiteSpace(index+argStartLength);
  504. if(index==msg.length()) {
  505. setParseError(parseError, 0); // Unmatched '{' braces in message.
  506. errorCode=U_UNMATCHED_BRACES;
  507. return 0;
  508. }
  509. // parse argument name or number
  510. index=skipIdentifier(index);
  511. int32_t number=parseArgNumber(nameIndex, index);
  512. if(number>=0) {
  513. int32_t length=index-nameIndex;
  514. if(length>Part::MAX_LENGTH || number>Part::MAX_VALUE) {
  515. setParseError(parseError, nameIndex); // Argument number too large.
  516. errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
  517. return 0;
  518. }
  519. hasArgNumbers=true;
  520. addPart(UMSGPAT_PART_TYPE_ARG_NUMBER, nameIndex, length, number, errorCode);
  521. } else if(number==UMSGPAT_ARG_NAME_NOT_NUMBER) {
  522. int32_t length=index-nameIndex;
  523. if(length>Part::MAX_LENGTH) {
  524. setParseError(parseError, nameIndex); // Argument name too long.
  525. errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
  526. return 0;
  527. }
  528. hasArgNames=true;
  529. addPart(UMSGPAT_PART_TYPE_ARG_NAME, nameIndex, length, 0, errorCode);
  530. } else { // number<-1 (ARG_NAME_NOT_VALID)
  531. setParseError(parseError, nameIndex); // Bad argument syntax.
  532. errorCode=U_PATTERN_SYNTAX_ERROR;
  533. return 0;
  534. }
  535. index=skipWhiteSpace(index);
  536. if(index==msg.length()) {
  537. setParseError(parseError, 0); // Unmatched '{' braces in message.
  538. errorCode=U_UNMATCHED_BRACES;
  539. return 0;
  540. }
  541. char16_t c=msg.charAt(index);
  542. if(c==u_rightCurlyBrace) {
  543. // all done
  544. } else if(c!=u_comma) {
  545. setParseError(parseError, nameIndex); // Bad argument syntax.
  546. errorCode=U_PATTERN_SYNTAX_ERROR;
  547. return 0;
  548. } else /* ',' */ {
  549. // parse argument type: case-sensitive a-zA-Z
  550. int32_t typeIndex=index=skipWhiteSpace(index+1);
  551. while(index<msg.length() && isArgTypeChar(msg.charAt(index))) {
  552. ++index;
  553. }
  554. int32_t length=index-typeIndex;
  555. index=skipWhiteSpace(index);
  556. if(index==msg.length()) {
  557. setParseError(parseError, 0); // Unmatched '{' braces in message.
  558. errorCode=U_UNMATCHED_BRACES;
  559. return 0;
  560. }
  561. if(length==0 || ((c=msg.charAt(index))!=u_comma && c!=u_rightCurlyBrace)) {
  562. setParseError(parseError, nameIndex); // Bad argument syntax.
  563. errorCode=U_PATTERN_SYNTAX_ERROR;
  564. return 0;
  565. }
  566. if(length>Part::MAX_LENGTH) {
  567. setParseError(parseError, nameIndex); // Argument type name too long.
  568. errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
  569. return 0;
  570. }
  571. argType=UMSGPAT_ARG_TYPE_SIMPLE;
  572. if(length==6) {
  573. // case-insensitive comparisons for complex-type names
  574. if(isChoice(typeIndex)) {
  575. argType=UMSGPAT_ARG_TYPE_CHOICE;
  576. } else if(isPlural(typeIndex)) {
  577. argType=UMSGPAT_ARG_TYPE_PLURAL;
  578. } else if(isSelect(typeIndex)) {
  579. argType=UMSGPAT_ARG_TYPE_SELECT;
  580. }
  581. } else if(length==13) {
  582. if(isSelect(typeIndex) && isOrdinal(typeIndex+6)) {
  583. argType=UMSGPAT_ARG_TYPE_SELECTORDINAL;
  584. }
  585. }
  586. // change the ARG_START type from NONE to argType
  587. partsList->a[argStart].value=(int16_t)argType;
  588. if(argType==UMSGPAT_ARG_TYPE_SIMPLE) {
  589. addPart(UMSGPAT_PART_TYPE_ARG_TYPE, typeIndex, length, 0, errorCode);
  590. }
  591. // look for an argument style (pattern)
  592. if(c==u_rightCurlyBrace) {
  593. if(argType!=UMSGPAT_ARG_TYPE_SIMPLE) {
  594. setParseError(parseError, nameIndex); // No style field for complex argument.
  595. errorCode=U_PATTERN_SYNTAX_ERROR;
  596. return 0;
  597. }
  598. } else /* ',' */ {
  599. ++index;
  600. if(argType==UMSGPAT_ARG_TYPE_SIMPLE) {
  601. index=parseSimpleStyle(index, parseError, errorCode);
  602. } else if(argType==UMSGPAT_ARG_TYPE_CHOICE) {
  603. index=parseChoiceStyle(index, nestingLevel, parseError, errorCode);
  604. } else {
  605. index=parsePluralOrSelectStyle(argType, index, nestingLevel, parseError, errorCode);
  606. }
  607. }
  608. }
  609. // Argument parsing stopped on the '}'.
  610. addLimitPart(argStart, UMSGPAT_PART_TYPE_ARG_LIMIT, index, 1, argType, errorCode);
  611. return index+1;
  612. }
  613. int32_t
  614. MessagePattern::parseSimpleStyle(int32_t index, UParseError *parseError, UErrorCode &errorCode) {
  615. if(U_FAILURE(errorCode)) {
  616. return 0;
  617. }
  618. int32_t start=index;
  619. int32_t nestedBraces=0;
  620. while(index<msg.length()) {
  621. char16_t c=msg.charAt(index++);
  622. if(c==u_apos) {
  623. // Treat apostrophe as quoting but include it in the style part.
  624. // Find the end of the quoted literal text.
  625. index=msg.indexOf(u_apos, index);
  626. if(index<0) {
  627. // Quoted literal argument style text reaches to the end of the message.
  628. setParseError(parseError, start);
  629. errorCode=U_PATTERN_SYNTAX_ERROR;
  630. return 0;
  631. }
  632. // skip the quote-ending apostrophe
  633. ++index;
  634. } else if(c==u_leftCurlyBrace) {
  635. ++nestedBraces;
  636. } else if(c==u_rightCurlyBrace) {
  637. if(nestedBraces>0) {
  638. --nestedBraces;
  639. } else {
  640. int32_t length=--index-start;
  641. if(length>Part::MAX_LENGTH) {
  642. setParseError(parseError, start); // Argument style text too long.
  643. errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
  644. return 0;
  645. }
  646. addPart(UMSGPAT_PART_TYPE_ARG_STYLE, start, length, 0, errorCode);
  647. return index;
  648. }
  649. } // c is part of literal text
  650. }
  651. setParseError(parseError, 0); // Unmatched '{' braces in message.
  652. errorCode=U_UNMATCHED_BRACES;
  653. return 0;
  654. }
  655. int32_t
  656. MessagePattern::parseChoiceStyle(int32_t index, int32_t nestingLevel,
  657. UParseError *parseError, UErrorCode &errorCode) {
  658. if(U_FAILURE(errorCode)) {
  659. return 0;
  660. }
  661. int32_t start=index;
  662. index=skipWhiteSpace(index);
  663. if(index==msg.length() || msg.charAt(index)==u_rightCurlyBrace) {
  664. setParseError(parseError, 0); // Missing choice argument pattern.
  665. errorCode=U_PATTERN_SYNTAX_ERROR;
  666. return 0;
  667. }
  668. for(;;) {
  669. // The choice argument style contains |-separated (number, separator, message) triples.
  670. // Parse the number.
  671. int32_t numberIndex=index;
  672. index=skipDouble(index);
  673. int32_t length=index-numberIndex;
  674. if(length==0) {
  675. setParseError(parseError, start); // Bad choice pattern syntax.
  676. errorCode=U_PATTERN_SYNTAX_ERROR;
  677. return 0;
  678. }
  679. if(length>Part::MAX_LENGTH) {
  680. setParseError(parseError, numberIndex); // Choice number too long.
  681. errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
  682. return 0;
  683. }
  684. parseDouble(numberIndex, index, true, parseError, errorCode); // adds ARG_INT or ARG_DOUBLE
  685. if(U_FAILURE(errorCode)) {
  686. return 0;
  687. }
  688. // Parse the separator.
  689. index=skipWhiteSpace(index);
  690. if(index==msg.length()) {
  691. setParseError(parseError, start); // Bad choice pattern syntax.
  692. errorCode=U_PATTERN_SYNTAX_ERROR;
  693. return 0;
  694. }
  695. char16_t c=msg.charAt(index);
  696. if(!(c==u_pound || c==u_lessThan || c==u_lessOrEqual)) { // U+2264 is <=
  697. setParseError(parseError, start); // Expected choice separator (#<\u2264) instead of c.
  698. errorCode=U_PATTERN_SYNTAX_ERROR;
  699. return 0;
  700. }
  701. addPart(UMSGPAT_PART_TYPE_ARG_SELECTOR, index, 1, 0, errorCode);
  702. // Parse the message fragment.
  703. index=parseMessage(++index, 0, nestingLevel+1, UMSGPAT_ARG_TYPE_CHOICE, parseError, errorCode);
  704. if(U_FAILURE(errorCode)) {
  705. return 0;
  706. }
  707. // parseMessage(..., CHOICE) returns the index of the terminator, or msg.length().
  708. if(index==msg.length()) {
  709. return index;
  710. }
  711. if(msg.charAt(index)==u_rightCurlyBrace) {
  712. if(!inMessageFormatPattern(nestingLevel)) {
  713. setParseError(parseError, start); // Bad choice pattern syntax.
  714. errorCode=U_PATTERN_SYNTAX_ERROR;
  715. return 0;
  716. }
  717. return index;
  718. } // else the terminator is '|'
  719. index=skipWhiteSpace(index+1);
  720. }
  721. }
  722. int32_t
  723. MessagePattern::parsePluralOrSelectStyle(UMessagePatternArgType argType,
  724. int32_t index, int32_t nestingLevel,
  725. UParseError *parseError, UErrorCode &errorCode) {
  726. if(U_FAILURE(errorCode)) {
  727. return 0;
  728. }
  729. int32_t start=index;
  730. UBool isEmpty=true;
  731. UBool hasOther=false;
  732. for(;;) {
  733. // First, collect the selector looking for a small set of terminators.
  734. // It would be a little faster to consider the syntax of each possible
  735. // token right here, but that makes the code too complicated.
  736. index=skipWhiteSpace(index);
  737. UBool eos=index==msg.length();
  738. if(eos || msg.charAt(index)==u_rightCurlyBrace) {
  739. if(eos==inMessageFormatPattern(nestingLevel)) {
  740. setParseError(parseError, start); // Bad plural/select pattern syntax.
  741. errorCode=U_PATTERN_SYNTAX_ERROR;
  742. return 0;
  743. }
  744. if(!hasOther) {
  745. setParseError(parseError, 0); // Missing 'other' keyword in plural/select pattern.
  746. errorCode=U_DEFAULT_KEYWORD_MISSING;
  747. return 0;
  748. }
  749. return index;
  750. }
  751. int32_t selectorIndex=index;
  752. if(UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE(argType) && msg.charAt(selectorIndex)==u_equal) {
  753. // explicit-value plural selector: =double
  754. index=skipDouble(index+1);
  755. int32_t length=index-selectorIndex;
  756. if(length==1) {
  757. setParseError(parseError, start); // Bad plural/select pattern syntax.
  758. errorCode=U_PATTERN_SYNTAX_ERROR;
  759. return 0;
  760. }
  761. if(length>Part::MAX_LENGTH) {
  762. setParseError(parseError, selectorIndex); // Argument selector too long.
  763. errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
  764. return 0;
  765. }
  766. addPart(UMSGPAT_PART_TYPE_ARG_SELECTOR, selectorIndex, length, 0, errorCode);
  767. parseDouble(selectorIndex+1, index, false,
  768. parseError, errorCode); // adds ARG_INT or ARG_DOUBLE
  769. } else {
  770. index=skipIdentifier(index);
  771. int32_t length=index-selectorIndex;
  772. if(length==0) {
  773. setParseError(parseError, start); // Bad plural/select pattern syntax.
  774. errorCode=U_PATTERN_SYNTAX_ERROR;
  775. return 0;
  776. }
  777. // Note: The ':' in "offset:" is just beyond the skipIdentifier() range.
  778. if( UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE(argType) && length==6 && index<msg.length() &&
  779. 0==msg.compare(selectorIndex, 7, kOffsetColon, 0, 7)
  780. ) {
  781. // plural offset, not a selector
  782. if(!isEmpty) {
  783. // Plural argument 'offset:' (if present) must precede key-message pairs.
  784. setParseError(parseError, start);
  785. errorCode=U_PATTERN_SYNTAX_ERROR;
  786. return 0;
  787. }
  788. // allow whitespace between offset: and its value
  789. int32_t valueIndex=skipWhiteSpace(index+1); // The ':' is at index.
  790. index=skipDouble(valueIndex);
  791. if(index==valueIndex) {
  792. setParseError(parseError, start); // Missing value for plural 'offset:'.
  793. errorCode=U_PATTERN_SYNTAX_ERROR;
  794. return 0;
  795. }
  796. if((index-valueIndex)>Part::MAX_LENGTH) {
  797. setParseError(parseError, valueIndex); // Plural offset value too long.
  798. errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
  799. return 0;
  800. }
  801. parseDouble(valueIndex, index, false,
  802. parseError, errorCode); // adds ARG_INT or ARG_DOUBLE
  803. if(U_FAILURE(errorCode)) {
  804. return 0;
  805. }
  806. isEmpty=false;
  807. continue; // no message fragment after the offset
  808. } else {
  809. // normal selector word
  810. if(length>Part::MAX_LENGTH) {
  811. setParseError(parseError, selectorIndex); // Argument selector too long.
  812. errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
  813. return 0;
  814. }
  815. addPart(UMSGPAT_PART_TYPE_ARG_SELECTOR, selectorIndex, length, 0, errorCode);
  816. if(0==msg.compare(selectorIndex, length, kOther, 0, 5)) {
  817. hasOther=true;
  818. }
  819. }
  820. }
  821. if(U_FAILURE(errorCode)) {
  822. return 0;
  823. }
  824. // parse the message fragment following the selector
  825. index=skipWhiteSpace(index);
  826. if(index==msg.length() || msg.charAt(index)!=u_leftCurlyBrace) {
  827. setParseError(parseError, selectorIndex); // No message fragment after plural/select selector.
  828. errorCode=U_PATTERN_SYNTAX_ERROR;
  829. return 0;
  830. }
  831. index=parseMessage(index, 1, nestingLevel+1, argType, parseError, errorCode);
  832. if(U_FAILURE(errorCode)) {
  833. return 0;
  834. }
  835. isEmpty=false;
  836. }
  837. }
  838. int32_t
  839. MessagePattern::parseArgNumber(const UnicodeString &s, int32_t start, int32_t limit) {
  840. // If the identifier contains only ASCII digits, then it is an argument _number_
  841. // and must not have leading zeros (except "0" itself).
  842. // Otherwise it is an argument _name_.
  843. if(start>=limit) {
  844. return UMSGPAT_ARG_NAME_NOT_VALID;
  845. }
  846. int32_t number;
  847. // Defer numeric errors until we know there are only digits.
  848. UBool badNumber;
  849. char16_t c=s.charAt(start++);
  850. if(c==0x30) {
  851. if(start==limit) {
  852. return 0;
  853. } else {
  854. number=0;
  855. badNumber=true; // leading zero
  856. }
  857. } else if(0x31<=c && c<=0x39) {
  858. number=c-0x30;
  859. badNumber=false;
  860. } else {
  861. return UMSGPAT_ARG_NAME_NOT_NUMBER;
  862. }
  863. while(start<limit) {
  864. c=s.charAt(start++);
  865. if(0x30<=c && c<=0x39) {
  866. if(number>=INT32_MAX/10) {
  867. badNumber=true; // overflow
  868. }
  869. number=number*10+(c-0x30);
  870. } else {
  871. return UMSGPAT_ARG_NAME_NOT_NUMBER;
  872. }
  873. }
  874. // There are only ASCII digits.
  875. if(badNumber) {
  876. return UMSGPAT_ARG_NAME_NOT_VALID;
  877. } else {
  878. return number;
  879. }
  880. }
  881. void
  882. MessagePattern::parseDouble(int32_t start, int32_t limit, UBool allowInfinity,
  883. UParseError *parseError, UErrorCode &errorCode) {
  884. if(U_FAILURE(errorCode)) {
  885. return;
  886. }
  887. U_ASSERT(start<limit);
  888. // fake loop for easy exit and single throw statement
  889. for(;;) { /*loop doesn't iterate*/
  890. // fast path for small integers and infinity
  891. int32_t value=0;
  892. int32_t isNegative=0; // not boolean so that we can easily add it to value
  893. int32_t index=start;
  894. char16_t c=msg.charAt(index++);
  895. if(c==u_minus) {
  896. isNegative=1;
  897. if(index==limit) {
  898. break; // no number
  899. }
  900. c=msg.charAt(index++);
  901. } else if(c==u_plus) {
  902. if(index==limit) {
  903. break; // no number
  904. }
  905. c=msg.charAt(index++);
  906. }
  907. if(c==0x221e) { // infinity
  908. if(allowInfinity && index==limit) {
  909. double infinity=uprv_getInfinity();
  910. addArgDoublePart(
  911. isNegative!=0 ? -infinity : infinity,
  912. start, limit-start, errorCode);
  913. return;
  914. } else {
  915. break;
  916. }
  917. }
  918. // try to parse the number as a small integer but fall back to a double
  919. while('0'<=c && c<='9') {
  920. value=value*10+(c-'0');
  921. if(value>(Part::MAX_VALUE+isNegative)) {
  922. break; // not a small-enough integer
  923. }
  924. if(index==limit) {
  925. addPart(UMSGPAT_PART_TYPE_ARG_INT, start, limit-start,
  926. isNegative!=0 ? -value : value, errorCode);
  927. return;
  928. }
  929. c=msg.charAt(index++);
  930. }
  931. // Let Double.parseDouble() throw a NumberFormatException.
  932. char numberChars[128];
  933. int32_t capacity=(int32_t)sizeof(numberChars);
  934. int32_t length=limit-start;
  935. if(length>=capacity) {
  936. break; // number too long
  937. }
  938. msg.extract(start, length, numberChars, capacity, US_INV);
  939. if((int32_t)uprv_strlen(numberChars)<length) {
  940. break; // contains non-invariant character that was turned into NUL
  941. }
  942. char *end;
  943. double numericValue=uprv_strtod(numberChars, &end);
  944. if(end!=(numberChars+length)) {
  945. break; // parsing error
  946. }
  947. addArgDoublePart(numericValue, start, length, errorCode);
  948. return;
  949. }
  950. setParseError(parseError, start /*, limit*/); // Bad syntax for numeric value.
  951. errorCode=U_PATTERN_SYNTAX_ERROR;
  952. return;
  953. }
  954. int32_t
  955. MessagePattern::skipWhiteSpace(int32_t index) {
  956. const char16_t *s=msg.getBuffer();
  957. int32_t msgLength=msg.length();
  958. const char16_t *t=PatternProps::skipWhiteSpace(s+index, msgLength-index);
  959. return (int32_t)(t-s);
  960. }
  961. int32_t
  962. MessagePattern::skipIdentifier(int32_t index) {
  963. const char16_t *s=msg.getBuffer();
  964. int32_t msgLength=msg.length();
  965. const char16_t *t=PatternProps::skipIdentifier(s+index, msgLength-index);
  966. return (int32_t)(t-s);
  967. }
  968. int32_t
  969. MessagePattern::skipDouble(int32_t index) {
  970. int32_t msgLength=msg.length();
  971. while(index<msgLength) {
  972. char16_t c=msg.charAt(index);
  973. // U+221E: Allow the infinity symbol, for ChoiceFormat patterns.
  974. if((c<0x30 && c!=u_plus && c!=u_minus && c!=u_dot) || (c>0x39 && c!=u_e && c!=u_E && c!=0x221e)) {
  975. break;
  976. }
  977. ++index;
  978. }
  979. return index;
  980. }
  981. UBool
  982. MessagePattern::isArgTypeChar(UChar32 c) {
  983. return (u_a<=c && c<=u_z) || (u_A<=c && c<=u_Z);
  984. }
  985. UBool
  986. MessagePattern::isChoice(int32_t index) {
  987. char16_t c;
  988. return
  989. ((c=msg.charAt(index++))==u_c || c==u_C) &&
  990. ((c=msg.charAt(index++))==u_h || c==u_H) &&
  991. ((c=msg.charAt(index++))==u_o || c==u_O) &&
  992. ((c=msg.charAt(index++))==u_i || c==u_I) &&
  993. ((c=msg.charAt(index++))==u_c || c==u_C) &&
  994. ((c=msg.charAt(index))==u_e || c==u_E);
  995. }
  996. UBool
  997. MessagePattern::isPlural(int32_t index) {
  998. char16_t c;
  999. return
  1000. ((c=msg.charAt(index++))==u_p || c==u_P) &&
  1001. ((c=msg.charAt(index++))==u_l || c==u_L) &&
  1002. ((c=msg.charAt(index++))==u_u || c==u_U) &&
  1003. ((c=msg.charAt(index++))==u_r || c==u_R) &&
  1004. ((c=msg.charAt(index++))==u_a || c==u_A) &&
  1005. ((c=msg.charAt(index))==u_l || c==u_L);
  1006. }
  1007. UBool
  1008. MessagePattern::isSelect(int32_t index) {
  1009. char16_t c;
  1010. return
  1011. ((c=msg.charAt(index++))==u_s || c==u_S) &&
  1012. ((c=msg.charAt(index++))==u_e || c==u_E) &&
  1013. ((c=msg.charAt(index++))==u_l || c==u_L) &&
  1014. ((c=msg.charAt(index++))==u_e || c==u_E) &&
  1015. ((c=msg.charAt(index++))==u_c || c==u_C) &&
  1016. ((c=msg.charAt(index))==u_t || c==u_T);
  1017. }
  1018. UBool
  1019. MessagePattern::isOrdinal(int32_t index) {
  1020. char16_t c;
  1021. return
  1022. ((c=msg.charAt(index++))==u_o || c==u_O) &&
  1023. ((c=msg.charAt(index++))==u_r || c==u_R) &&
  1024. ((c=msg.charAt(index++))==u_d || c==u_D) &&
  1025. ((c=msg.charAt(index++))==u_i || c==u_I) &&
  1026. ((c=msg.charAt(index++))==u_n || c==u_N) &&
  1027. ((c=msg.charAt(index++))==u_a || c==u_A) &&
  1028. ((c=msg.charAt(index))==u_l || c==u_L);
  1029. }
  1030. UBool
  1031. MessagePattern::inMessageFormatPattern(int32_t nestingLevel) {
  1032. return nestingLevel>0 || partsList->a[0].type==UMSGPAT_PART_TYPE_MSG_START;
  1033. }
  1034. UBool
  1035. MessagePattern::inTopLevelChoiceMessage(int32_t nestingLevel, UMessagePatternArgType parentType) {
  1036. return
  1037. nestingLevel==1 &&
  1038. parentType==UMSGPAT_ARG_TYPE_CHOICE &&
  1039. partsList->a[0].type!=UMSGPAT_PART_TYPE_MSG_START;
  1040. }
  1041. void
  1042. MessagePattern::addPart(UMessagePatternPartType type, int32_t index, int32_t length,
  1043. int32_t value, UErrorCode &errorCode) {
  1044. if(partsList->ensureCapacityForOneMore(partsLength, errorCode)) {
  1045. Part &part=partsList->a[partsLength++];
  1046. part.type=type;
  1047. part.index=index;
  1048. part.length=(uint16_t)length;
  1049. part.value=(int16_t)value;
  1050. part.limitPartIndex=0;
  1051. }
  1052. }
  1053. void
  1054. MessagePattern::addLimitPart(int32_t start,
  1055. UMessagePatternPartType type, int32_t index, int32_t length,
  1056. int32_t value, UErrorCode &errorCode) {
  1057. partsList->a[start].limitPartIndex=partsLength;
  1058. addPart(type, index, length, value, errorCode);
  1059. }
  1060. void
  1061. MessagePattern::addArgDoublePart(double numericValue, int32_t start, int32_t length,
  1062. UErrorCode &errorCode) {
  1063. if(U_FAILURE(errorCode)) {
  1064. return;
  1065. }
  1066. int32_t numericIndex=numericValuesLength;
  1067. if(numericValuesList==nullptr) {
  1068. numericValuesList=new MessagePatternDoubleList();
  1069. if(numericValuesList==nullptr) {
  1070. errorCode=U_MEMORY_ALLOCATION_ERROR;
  1071. return;
  1072. }
  1073. } else if(!numericValuesList->ensureCapacityForOneMore(numericValuesLength, errorCode)) {
  1074. return;
  1075. } else {
  1076. if(numericIndex>Part::MAX_VALUE) {
  1077. errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
  1078. return;
  1079. }
  1080. }
  1081. numericValuesList->a[numericValuesLength++]=numericValue;
  1082. addPart(UMSGPAT_PART_TYPE_ARG_DOUBLE, start, length, numericIndex, errorCode);
  1083. }
  1084. void
  1085. MessagePattern::setParseError(UParseError *parseError, int32_t index) {
  1086. if(parseError==nullptr) {
  1087. return;
  1088. }
  1089. parseError->offset=index;
  1090. // Set preContext to some of msg before index.
  1091. // Avoid splitting a surrogate pair.
  1092. int32_t length=index;
  1093. if(length>=U_PARSE_CONTEXT_LEN) {
  1094. length=U_PARSE_CONTEXT_LEN-1;
  1095. if(length>0 && U16_IS_TRAIL(msg[index-length])) {
  1096. --length;
  1097. }
  1098. }
  1099. msg.extract(index-length, length, parseError->preContext);
  1100. parseError->preContext[length]=0;
  1101. // Set postContext to some of msg starting at index.
  1102. length=msg.length()-index;
  1103. if(length>=U_PARSE_CONTEXT_LEN) {
  1104. length=U_PARSE_CONTEXT_LEN-1;
  1105. if(length>0 && U16_IS_LEAD(msg[index+length-1])) {
  1106. --length;
  1107. }
  1108. }
  1109. msg.extract(index, length, parseError->postContext);
  1110. parseError->postContext[length]=0;
  1111. }
  1112. // MessageImpl ------------------------------------------------------------- ***
  1113. void
  1114. MessageImpl::appendReducedApostrophes(const UnicodeString &s, int32_t start, int32_t limit,
  1115. UnicodeString &sb) {
  1116. int32_t doubleApos=-1;
  1117. for(;;) {
  1118. int32_t i=s.indexOf(u_apos, start);
  1119. if(i<0 || i>=limit) {
  1120. sb.append(s, start, limit-start);
  1121. break;
  1122. }
  1123. if(i==doubleApos) {
  1124. // Double apostrophe at start-1 and start==i, append one.
  1125. sb.append(u_apos);
  1126. ++start;
  1127. doubleApos=-1;
  1128. } else {
  1129. // Append text between apostrophes and skip this one.
  1130. sb.append(s, start, i-start);
  1131. doubleApos=start=i+1;
  1132. }
  1133. }
  1134. }
  1135. // Ported from second half of ICU4J SelectFormat.format(String).
  1136. UnicodeString &
  1137. MessageImpl::appendSubMessageWithoutSkipSyntax(const MessagePattern &msgPattern,
  1138. int32_t msgStart,
  1139. UnicodeString &result) {
  1140. const UnicodeString &msgString=msgPattern.getPatternString();
  1141. int32_t prevIndex=msgPattern.getPart(msgStart).getLimit();
  1142. for(int32_t i=msgStart;;) {
  1143. const MessagePattern::Part &part=msgPattern.getPart(++i);
  1144. UMessagePatternPartType type=part.getType();
  1145. int32_t index=part.getIndex();
  1146. if(type==UMSGPAT_PART_TYPE_MSG_LIMIT) {
  1147. return result.append(msgString, prevIndex, index-prevIndex);
  1148. } else if(type==UMSGPAT_PART_TYPE_SKIP_SYNTAX) {
  1149. result.append(msgString, prevIndex, index-prevIndex);
  1150. prevIndex=part.getLimit();
  1151. } else if(type==UMSGPAT_PART_TYPE_ARG_START) {
  1152. result.append(msgString, prevIndex, index-prevIndex);
  1153. prevIndex=index;
  1154. i=msgPattern.getLimitPartIndex(i);
  1155. index=msgPattern.getPart(i).getLimit();
  1156. appendReducedApostrophes(msgString, prevIndex, index, result);
  1157. prevIndex=index;
  1158. }
  1159. }
  1160. }
  1161. U_NAMESPACE_END
  1162. #endif // !UCONFIG_NO_FORMATTING