messagepattern.cpp 44 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232
  1. // © 2016 and later: Unicode, Inc. and others.
  2. // License & terms of use: http://www.unicode.org/copyright.html
  3. /*
  4. *******************************************************************************
  5. * Copyright (C) 2011-2012, International Business Machines
  6. * Corporation and others. All Rights Reserved.
  7. *******************************************************************************
  8. * file name: messagepattern.cpp
  9. * encoding: UTF-8
  10. * tab size: 8 (not used)
  11. * indentation:4
  12. *
  13. * created on: 2011mar14
  14. * created by: Markus W. Scherer
  15. */
  16. #include "unicode/utypes.h"
  17. #if !UCONFIG_NO_FORMATTING
  18. #include "unicode/messagepattern.h"
  19. #include "unicode/unistr.h"
  20. #include "unicode/utf16.h"
  21. #include "cmemory.h"
  22. #include "cstring.h"
  23. #include "messageimpl.h"
  24. #include "patternprops.h"
  25. #include "putilimp.h"
  26. #include "uassert.h"
  27. U_NAMESPACE_BEGIN
  28. // Unicode character/code point constants ---------------------------------- ***
  29. static const char16_t u_pound=0x23;
  30. static const char16_t u_apos=0x27;
  31. static const char16_t u_plus=0x2B;
  32. static const char16_t u_comma=0x2C;
  33. static const char16_t u_minus=0x2D;
  34. static const char16_t u_dot=0x2E;
  35. static const char16_t u_colon=0x3A;
  36. static const char16_t u_lessThan=0x3C;
  37. static const char16_t u_equal=0x3D;
  38. static const char16_t u_A=0x41;
  39. static const char16_t u_C=0x43;
  40. static const char16_t u_D=0x44;
  41. static const char16_t u_E=0x45;
  42. static const char16_t u_H=0x48;
  43. static const char16_t u_I=0x49;
  44. static const char16_t u_L=0x4C;
  45. static const char16_t u_N=0x4E;
  46. static const char16_t u_O=0x4F;
  47. static const char16_t u_P=0x50;
  48. static const char16_t u_R=0x52;
  49. static const char16_t u_S=0x53;
  50. static const char16_t u_T=0x54;
  51. static const char16_t u_U=0x55;
  52. static const char16_t u_Z=0x5A;
  53. static const char16_t u_a=0x61;
  54. static const char16_t u_c=0x63;
  55. static const char16_t u_d=0x64;
  56. static const char16_t u_e=0x65;
  57. static const char16_t u_f=0x66;
  58. static const char16_t u_h=0x68;
  59. static const char16_t u_i=0x69;
  60. static const char16_t u_l=0x6C;
  61. static const char16_t u_n=0x6E;
  62. static const char16_t u_o=0x6F;
  63. static const char16_t u_p=0x70;
  64. static const char16_t u_r=0x72;
  65. static const char16_t u_s=0x73;
  66. static const char16_t u_t=0x74;
  67. static const char16_t u_u=0x75;
  68. static const char16_t u_z=0x7A;
  69. static const char16_t u_leftCurlyBrace=0x7B;
  70. static const char16_t u_pipe=0x7C;
  71. static const char16_t u_rightCurlyBrace=0x7D;
  72. static const char16_t u_lessOrEqual=0x2264; // U+2264 is <=
  73. static const char16_t kOffsetColon[]={ // "offset:"
  74. u_o, u_f, u_f, u_s, u_e, u_t, u_colon
  75. };
  76. static const char16_t kOther[]={ // "other"
  77. u_o, u_t, u_h, u_e, u_r
  78. };
  79. // MessagePatternList ------------------------------------------------------ ***
  80. template<typename T, int32_t stackCapacity>
  81. class MessagePatternList : public UMemory {
  82. public:
  83. MessagePatternList() {}
  84. void copyFrom(const MessagePatternList<T, stackCapacity> &other,
  85. int32_t length,
  86. UErrorCode &errorCode);
  87. UBool ensureCapacityForOneMore(int32_t oldLength, UErrorCode &errorCode);
  88. UBool equals(const MessagePatternList<T, stackCapacity> &other, int32_t length) const {
  89. for(int32_t i=0; i<length; ++i) {
  90. if(a[i]!=other.a[i]) { return false; }
  91. }
  92. return true;
  93. }
  94. MaybeStackArray<T, stackCapacity> a;
  95. };
  96. template<typename T, int32_t stackCapacity>
  97. void
  98. MessagePatternList<T, stackCapacity>::copyFrom(
  99. const MessagePatternList<T, stackCapacity> &other,
  100. int32_t length,
  101. UErrorCode &errorCode) {
  102. if(U_SUCCESS(errorCode) && length>0) {
  103. if(length>a.getCapacity() && nullptr==a.resize(length)) {
  104. errorCode=U_MEMORY_ALLOCATION_ERROR;
  105. return;
  106. }
  107. uprv_memcpy(a.getAlias(), other.a.getAlias(), (size_t)length*sizeof(T));
  108. }
  109. }
  110. template<typename T, int32_t stackCapacity>
  111. UBool
  112. MessagePatternList<T, stackCapacity>::ensureCapacityForOneMore(int32_t oldLength, UErrorCode &errorCode) {
  113. if(U_FAILURE(errorCode)) {
  114. return false;
  115. }
  116. if(a.getCapacity()>oldLength || a.resize(2*oldLength, oldLength)!=nullptr) {
  117. return true;
  118. }
  119. errorCode=U_MEMORY_ALLOCATION_ERROR;
  120. return false;
  121. }
  122. // MessagePatternList specializations -------------------------------------- ***
  123. class MessagePatternDoubleList : public MessagePatternList<double, 8> {
  124. };
  125. class MessagePatternPartsList : public MessagePatternList<MessagePattern::Part, 32> {
  126. };
  127. // MessagePattern constructors etc. ---------------------------------------- ***
  128. MessagePattern::MessagePattern(UErrorCode &errorCode)
  129. : aposMode(UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE),
  130. partsList(nullptr), parts(nullptr), partsLength(0),
  131. numericValuesList(nullptr), numericValues(nullptr), numericValuesLength(0),
  132. hasArgNames(false), hasArgNumbers(false), needsAutoQuoting(false) {
  133. init(errorCode);
  134. }
  135. MessagePattern::MessagePattern(UMessagePatternApostropheMode mode, UErrorCode &errorCode)
  136. : aposMode(mode),
  137. partsList(nullptr), parts(nullptr), partsLength(0),
  138. numericValuesList(nullptr), numericValues(nullptr), numericValuesLength(0),
  139. hasArgNames(false), hasArgNumbers(false), needsAutoQuoting(false) {
  140. init(errorCode);
  141. }
  142. MessagePattern::MessagePattern(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode)
  143. : aposMode(UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE),
  144. partsList(nullptr), parts(nullptr), partsLength(0),
  145. numericValuesList(nullptr), numericValues(nullptr), numericValuesLength(0),
  146. hasArgNames(false), hasArgNumbers(false), needsAutoQuoting(false) {
  147. if(init(errorCode)) {
  148. parse(pattern, parseError, errorCode);
  149. }
  150. }
  151. UBool
  152. MessagePattern::init(UErrorCode &errorCode) {
  153. if(U_FAILURE(errorCode)) {
  154. return false;
  155. }
  156. partsList=new MessagePatternPartsList();
  157. if(partsList==nullptr) {
  158. errorCode=U_MEMORY_ALLOCATION_ERROR;
  159. return false;
  160. }
  161. parts=partsList->a.getAlias();
  162. return true;
  163. }
  164. MessagePattern::MessagePattern(const MessagePattern &other)
  165. : UObject(other), aposMode(other.aposMode), msg(other.msg),
  166. partsList(nullptr), parts(nullptr), partsLength(0),
  167. numericValuesList(nullptr), numericValues(nullptr), numericValuesLength(0),
  168. hasArgNames(other.hasArgNames), hasArgNumbers(other.hasArgNumbers),
  169. needsAutoQuoting(other.needsAutoQuoting) {
  170. UErrorCode errorCode=U_ZERO_ERROR;
  171. if(!copyStorage(other, errorCode)) {
  172. clear();
  173. }
  174. }
  175. MessagePattern &
  176. MessagePattern::operator=(const MessagePattern &other) {
  177. if(this==&other) {
  178. return *this;
  179. }
  180. aposMode=other.aposMode;
  181. msg=other.msg;
  182. hasArgNames=other.hasArgNames;
  183. hasArgNumbers=other.hasArgNumbers;
  184. needsAutoQuoting=other.needsAutoQuoting;
  185. UErrorCode errorCode=U_ZERO_ERROR;
  186. if(!copyStorage(other, errorCode)) {
  187. clear();
  188. }
  189. return *this;
  190. }
  191. UBool
  192. MessagePattern::copyStorage(const MessagePattern &other, UErrorCode &errorCode) {
  193. if(U_FAILURE(errorCode)) {
  194. return false;
  195. }
  196. parts=nullptr;
  197. partsLength=0;
  198. numericValues=nullptr;
  199. numericValuesLength=0;
  200. if(partsList==nullptr) {
  201. partsList=new MessagePatternPartsList();
  202. if(partsList==nullptr) {
  203. errorCode=U_MEMORY_ALLOCATION_ERROR;
  204. return false;
  205. }
  206. parts=partsList->a.getAlias();
  207. }
  208. if(other.partsLength>0) {
  209. partsList->copyFrom(*other.partsList, other.partsLength, errorCode);
  210. if(U_FAILURE(errorCode)) {
  211. return false;
  212. }
  213. parts=partsList->a.getAlias();
  214. partsLength=other.partsLength;
  215. }
  216. if(other.numericValuesLength>0) {
  217. if(numericValuesList==nullptr) {
  218. numericValuesList=new MessagePatternDoubleList();
  219. if(numericValuesList==nullptr) {
  220. errorCode=U_MEMORY_ALLOCATION_ERROR;
  221. return false;
  222. }
  223. numericValues=numericValuesList->a.getAlias();
  224. }
  225. numericValuesList->copyFrom(
  226. *other.numericValuesList, other.numericValuesLength, errorCode);
  227. if(U_FAILURE(errorCode)) {
  228. return false;
  229. }
  230. numericValues=numericValuesList->a.getAlias();
  231. numericValuesLength=other.numericValuesLength;
  232. }
  233. return true;
  234. }
  235. MessagePattern::~MessagePattern() {
  236. delete partsList;
  237. delete numericValuesList;
  238. }
  239. // MessagePattern API ------------------------------------------------------ ***
  240. MessagePattern &
  241. MessagePattern::parse(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode) {
  242. preParse(pattern, parseError, errorCode);
  243. parseMessage(0, 0, 0, UMSGPAT_ARG_TYPE_NONE, parseError, errorCode);
  244. postParse();
  245. return *this;
  246. }
  247. MessagePattern &
  248. MessagePattern::parseChoiceStyle(const UnicodeString &pattern,
  249. UParseError *parseError, UErrorCode &errorCode) {
  250. preParse(pattern, parseError, errorCode);
  251. parseChoiceStyle(0, 0, parseError, errorCode);
  252. postParse();
  253. return *this;
  254. }
  255. MessagePattern &
  256. MessagePattern::parsePluralStyle(const UnicodeString &pattern,
  257. UParseError *parseError, UErrorCode &errorCode) {
  258. preParse(pattern, parseError, errorCode);
  259. parsePluralOrSelectStyle(UMSGPAT_ARG_TYPE_PLURAL, 0, 0, parseError, errorCode);
  260. postParse();
  261. return *this;
  262. }
  263. MessagePattern &
  264. MessagePattern::parseSelectStyle(const UnicodeString &pattern,
  265. UParseError *parseError, UErrorCode &errorCode) {
  266. preParse(pattern, parseError, errorCode);
  267. parsePluralOrSelectStyle(UMSGPAT_ARG_TYPE_SELECT, 0, 0, parseError, errorCode);
  268. postParse();
  269. return *this;
  270. }
  271. void
  272. MessagePattern::clear() {
  273. // Mostly the same as preParse().
  274. msg.remove();
  275. hasArgNames=hasArgNumbers=false;
  276. needsAutoQuoting=false;
  277. partsLength=0;
  278. numericValuesLength=0;
  279. }
  280. bool
  281. MessagePattern::operator==(const MessagePattern &other) const {
  282. if(this==&other) {
  283. return true;
  284. }
  285. return
  286. aposMode==other.aposMode &&
  287. msg==other.msg &&
  288. // parts.equals(o.parts)
  289. partsLength==other.partsLength &&
  290. (partsLength==0 || partsList->equals(*other.partsList, partsLength));
  291. // No need to compare numericValues if msg and parts are the same.
  292. }
  293. int32_t
  294. MessagePattern::hashCode() const {
  295. int32_t hash=(aposMode*37+msg.hashCode())*37+partsLength;
  296. for(int32_t i=0; i<partsLength; ++i) {
  297. hash=hash*37+parts[i].hashCode();
  298. }
  299. return hash;
  300. }
  301. int32_t
  302. MessagePattern::validateArgumentName(const UnicodeString &name) {
  303. if(!PatternProps::isIdentifier(name.getBuffer(), name.length())) {
  304. return UMSGPAT_ARG_NAME_NOT_VALID;
  305. }
  306. return parseArgNumber(name, 0, name.length());
  307. }
  308. UnicodeString
  309. MessagePattern::autoQuoteApostropheDeep() const {
  310. if(!needsAutoQuoting) {
  311. return msg;
  312. }
  313. UnicodeString modified(msg);
  314. // Iterate backward so that the insertion indexes do not change.
  315. int32_t count=countParts();
  316. for(int32_t i=count; i>0;) {
  317. const Part &part=getPart(--i);
  318. if(part.getType()==UMSGPAT_PART_TYPE_INSERT_CHAR) {
  319. modified.insert(part.index, static_cast<char16_t>(part.value));
  320. }
  321. }
  322. return modified;
  323. }
  324. double
  325. MessagePattern::getNumericValue(const Part &part) const {
  326. UMessagePatternPartType type=part.type;
  327. if(type==UMSGPAT_PART_TYPE_ARG_INT) {
  328. return part.value;
  329. } else if(type==UMSGPAT_PART_TYPE_ARG_DOUBLE) {
  330. return numericValues[part.value];
  331. } else {
  332. return UMSGPAT_NO_NUMERIC_VALUE;
  333. }
  334. }
  335. /**
  336. * Returns the "offset:" value of a PluralFormat argument, or 0 if none is specified.
  337. * @param pluralStart the index of the first PluralFormat argument style part. (0..countParts()-1)
  338. * @return the "offset:" value.
  339. * @draft ICU 4.8
  340. */
  341. double
  342. MessagePattern::getPluralOffset(int32_t pluralStart) const {
  343. const Part &part=getPart(pluralStart);
  344. if(Part::hasNumericValue(part.type)) {
  345. return getNumericValue(part);
  346. } else {
  347. return 0;
  348. }
  349. }
  350. // MessagePattern::Part ---------------------------------------------------- ***
  351. bool
  352. MessagePattern::Part::operator==(const Part &other) const {
  353. if(this==&other) {
  354. return true;
  355. }
  356. return
  357. type==other.type &&
  358. index==other.index &&
  359. length==other.length &&
  360. value==other.value &&
  361. limitPartIndex==other.limitPartIndex;
  362. }
  363. // MessagePattern parser --------------------------------------------------- ***
  364. void
  365. MessagePattern::preParse(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode) {
  366. if(U_FAILURE(errorCode)) {
  367. return;
  368. }
  369. if(parseError!=nullptr) {
  370. parseError->line=0;
  371. parseError->offset=0;
  372. parseError->preContext[0]=0;
  373. parseError->postContext[0]=0;
  374. }
  375. msg=pattern;
  376. hasArgNames=hasArgNumbers=false;
  377. needsAutoQuoting=false;
  378. partsLength=0;
  379. numericValuesLength=0;
  380. }
  381. void
  382. MessagePattern::postParse() {
  383. if(partsList!=nullptr) {
  384. parts=partsList->a.getAlias();
  385. }
  386. if(numericValuesList!=nullptr) {
  387. numericValues=numericValuesList->a.getAlias();
  388. }
  389. }
  390. int32_t
  391. MessagePattern::parseMessage(int32_t index, int32_t msgStartLength,
  392. int32_t nestingLevel, UMessagePatternArgType parentType,
  393. UParseError *parseError, UErrorCode &errorCode) {
  394. if(U_FAILURE(errorCode)) {
  395. return 0;
  396. }
  397. if(nestingLevel>Part::MAX_NESTED_LEVELS) {
  398. errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
  399. return 0;
  400. }
  401. int32_t msgStart=partsLength;
  402. addPart(UMSGPAT_PART_TYPE_MSG_START, index, msgStartLength, nestingLevel, errorCode);
  403. index+=msgStartLength;
  404. for(;;) { // while(index<msg.length()) with U_FAILURE(errorCode) check
  405. if(U_FAILURE(errorCode)) {
  406. return 0;
  407. }
  408. if(index>=msg.length()) {
  409. break;
  410. }
  411. char16_t c=msg.charAt(index++);
  412. if(c==u_apos) {
  413. if(index==msg.length()) {
  414. // The apostrophe is the last character in the pattern.
  415. // Add a Part for auto-quoting.
  416. addPart(UMSGPAT_PART_TYPE_INSERT_CHAR, index, 0,
  417. u_apos, errorCode); // value=char to be inserted
  418. needsAutoQuoting=true;
  419. } else {
  420. c=msg.charAt(index);
  421. if(c==u_apos) {
  422. // double apostrophe, skip the second one
  423. addPart(UMSGPAT_PART_TYPE_SKIP_SYNTAX, index++, 1, 0, errorCode);
  424. } else if(
  425. aposMode==UMSGPAT_APOS_DOUBLE_REQUIRED ||
  426. c==u_leftCurlyBrace || c==u_rightCurlyBrace ||
  427. (parentType==UMSGPAT_ARG_TYPE_CHOICE && c==u_pipe) ||
  428. (UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE(parentType) && c==u_pound)
  429. ) {
  430. // skip the quote-starting apostrophe
  431. addPart(UMSGPAT_PART_TYPE_SKIP_SYNTAX, index-1, 1, 0, errorCode);
  432. // find the end of the quoted literal text
  433. for(;;) {
  434. index=msg.indexOf(u_apos, index+1);
  435. if(index>=0) {
  436. if(/*(index+1)<msg.length() &&*/ msg.charAt(index+1)==u_apos) {
  437. // double apostrophe inside quoted literal text
  438. // still encodes a single apostrophe, skip the second one
  439. addPart(UMSGPAT_PART_TYPE_SKIP_SYNTAX, ++index, 1, 0, errorCode);
  440. } else {
  441. // skip the quote-ending apostrophe
  442. addPart(UMSGPAT_PART_TYPE_SKIP_SYNTAX, index++, 1, 0, errorCode);
  443. break;
  444. }
  445. } else {
  446. // The quoted text reaches to the end of the of the message.
  447. index=msg.length();
  448. // Add a Part for auto-quoting.
  449. addPart(UMSGPAT_PART_TYPE_INSERT_CHAR, index, 0,
  450. u_apos, errorCode); // value=char to be inserted
  451. needsAutoQuoting=true;
  452. break;
  453. }
  454. }
  455. } else {
  456. // Interpret the apostrophe as literal text.
  457. // Add a Part for auto-quoting.
  458. addPart(UMSGPAT_PART_TYPE_INSERT_CHAR, index, 0,
  459. u_apos, errorCode); // value=char to be inserted
  460. needsAutoQuoting=true;
  461. }
  462. }
  463. } else if(UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE(parentType) && c==u_pound) {
  464. // The unquoted # in a plural message fragment will be replaced
  465. // with the (number-offset).
  466. addPart(UMSGPAT_PART_TYPE_REPLACE_NUMBER, index-1, 1, 0, errorCode);
  467. } else if(c==u_leftCurlyBrace) {
  468. index=parseArg(index-1, 1, nestingLevel, parseError, errorCode);
  469. } else if((nestingLevel>0 && c==u_rightCurlyBrace) ||
  470. (parentType==UMSGPAT_ARG_TYPE_CHOICE && c==u_pipe)) {
  471. // Finish the message before the terminator.
  472. // In a choice style, report the "}" substring only for the following ARG_LIMIT,
  473. // not for this MSG_LIMIT.
  474. int32_t limitLength=(parentType==UMSGPAT_ARG_TYPE_CHOICE && c==u_rightCurlyBrace) ? 0 : 1;
  475. addLimitPart(msgStart, UMSGPAT_PART_TYPE_MSG_LIMIT, index-1, limitLength,
  476. nestingLevel, errorCode);
  477. if(parentType==UMSGPAT_ARG_TYPE_CHOICE) {
  478. // Let the choice style parser see the '}' or '|'.
  479. return index-1;
  480. } else {
  481. // continue parsing after the '}'
  482. return index;
  483. }
  484. } // else: c is part of literal text
  485. }
  486. if(nestingLevel>0 && !inTopLevelChoiceMessage(nestingLevel, parentType)) {
  487. setParseError(parseError, 0); // Unmatched '{' braces in message.
  488. errorCode=U_UNMATCHED_BRACES;
  489. return 0;
  490. }
  491. addLimitPart(msgStart, UMSGPAT_PART_TYPE_MSG_LIMIT, index, 0, nestingLevel, errorCode);
  492. return index;
  493. }
  494. int32_t
  495. MessagePattern::parseArg(int32_t index, int32_t argStartLength, int32_t nestingLevel,
  496. UParseError *parseError, UErrorCode &errorCode) {
  497. int32_t argStart=partsLength;
  498. UMessagePatternArgType argType=UMSGPAT_ARG_TYPE_NONE;
  499. addPart(UMSGPAT_PART_TYPE_ARG_START, index, argStartLength, argType, errorCode);
  500. if(U_FAILURE(errorCode)) {
  501. return 0;
  502. }
  503. int32_t nameIndex=index=skipWhiteSpace(index+argStartLength);
  504. if(index==msg.length()) {
  505. setParseError(parseError, 0); // Unmatched '{' braces in message.
  506. errorCode=U_UNMATCHED_BRACES;
  507. return 0;
  508. }
  509. // parse argument name or number
  510. index=skipIdentifier(index);
  511. int32_t number=parseArgNumber(nameIndex, index);
  512. if(number>=0) {
  513. int32_t length=index-nameIndex;
  514. if(length>Part::MAX_LENGTH || number>Part::MAX_VALUE) {
  515. setParseError(parseError, nameIndex); // Argument number too large.
  516. errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
  517. return 0;
  518. }
  519. hasArgNumbers=true;
  520. addPart(UMSGPAT_PART_TYPE_ARG_NUMBER, nameIndex, length, number, errorCode);
  521. } else if(number==UMSGPAT_ARG_NAME_NOT_NUMBER) {
  522. int32_t length=index-nameIndex;
  523. if(length>Part::MAX_LENGTH) {
  524. setParseError(parseError, nameIndex); // Argument name too long.
  525. errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
  526. return 0;
  527. }
  528. hasArgNames=true;
  529. addPart(UMSGPAT_PART_TYPE_ARG_NAME, nameIndex, length, 0, errorCode);
  530. } else { // number<-1 (ARG_NAME_NOT_VALID)
  531. setParseError(parseError, nameIndex); // Bad argument syntax.
  532. errorCode=U_PATTERN_SYNTAX_ERROR;
  533. return 0;
  534. }
  535. index=skipWhiteSpace(index);
  536. if(index==msg.length()) {
  537. setParseError(parseError, 0); // Unmatched '{' braces in message.
  538. errorCode=U_UNMATCHED_BRACES;
  539. return 0;
  540. }
  541. char16_t c=msg.charAt(index);
  542. if(c==u_rightCurlyBrace) {
  543. // all done
  544. } else if(c!=u_comma) {
  545. setParseError(parseError, nameIndex); // Bad argument syntax.
  546. errorCode=U_PATTERN_SYNTAX_ERROR;
  547. return 0;
  548. } else /* ',' */ {
  549. // parse argument type: case-sensitive a-zA-Z
  550. int32_t typeIndex=index=skipWhiteSpace(index+1);
  551. while(index<msg.length() && isArgTypeChar(msg.charAt(index))) {
  552. ++index;
  553. }
  554. int32_t length=index-typeIndex;
  555. index=skipWhiteSpace(index);
  556. if(index==msg.length()) {
  557. setParseError(parseError, 0); // Unmatched '{' braces in message.
  558. errorCode=U_UNMATCHED_BRACES;
  559. return 0;
  560. }
  561. if(length==0 || ((c=msg.charAt(index))!=u_comma && c!=u_rightCurlyBrace)) {
  562. setParseError(parseError, nameIndex); // Bad argument syntax.
  563. errorCode=U_PATTERN_SYNTAX_ERROR;
  564. return 0;
  565. }
  566. if(length>Part::MAX_LENGTH) {
  567. setParseError(parseError, nameIndex); // Argument type name too long.
  568. errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
  569. return 0;
  570. }
  571. argType=UMSGPAT_ARG_TYPE_SIMPLE;
  572. if(length==6) {
  573. // case-insensitive comparisons for complex-type names
  574. if(isChoice(typeIndex)) {
  575. argType=UMSGPAT_ARG_TYPE_CHOICE;
  576. } else if(isPlural(typeIndex)) {
  577. argType=UMSGPAT_ARG_TYPE_PLURAL;
  578. } else if(isSelect(typeIndex)) {
  579. argType=UMSGPAT_ARG_TYPE_SELECT;
  580. }
  581. } else if(length==13) {
  582. if(isSelect(typeIndex) && isOrdinal(typeIndex+6)) {
  583. argType=UMSGPAT_ARG_TYPE_SELECTORDINAL;
  584. }
  585. }
  586. // change the ARG_START type from NONE to argType
  587. partsList->a[argStart].value = static_cast<int16_t>(argType);
  588. if(argType==UMSGPAT_ARG_TYPE_SIMPLE) {
  589. addPart(UMSGPAT_PART_TYPE_ARG_TYPE, typeIndex, length, 0, errorCode);
  590. }
  591. // look for an argument style (pattern)
  592. if(c==u_rightCurlyBrace) {
  593. if(argType!=UMSGPAT_ARG_TYPE_SIMPLE) {
  594. setParseError(parseError, nameIndex); // No style field for complex argument.
  595. errorCode=U_PATTERN_SYNTAX_ERROR;
  596. return 0;
  597. }
  598. } else /* ',' */ {
  599. ++index;
  600. if(argType==UMSGPAT_ARG_TYPE_SIMPLE) {
  601. index=parseSimpleStyle(index, parseError, errorCode);
  602. } else if(argType==UMSGPAT_ARG_TYPE_CHOICE) {
  603. index=parseChoiceStyle(index, nestingLevel, parseError, errorCode);
  604. } else {
  605. index=parsePluralOrSelectStyle(argType, index, nestingLevel, parseError, errorCode);
  606. }
  607. }
  608. }
  609. // Argument parsing stopped on the '}'.
  610. addLimitPart(argStart, UMSGPAT_PART_TYPE_ARG_LIMIT, index, 1, argType, errorCode);
  611. return index+1;
  612. }
  613. int32_t
  614. MessagePattern::parseSimpleStyle(int32_t index, UParseError *parseError, UErrorCode &errorCode) {
  615. if(U_FAILURE(errorCode)) {
  616. return 0;
  617. }
  618. int32_t start=index;
  619. int32_t nestedBraces=0;
  620. while(index<msg.length()) {
  621. char16_t c=msg.charAt(index++);
  622. if(c==u_apos) {
  623. // Treat apostrophe as quoting but include it in the style part.
  624. // Find the end of the quoted literal text.
  625. index=msg.indexOf(u_apos, index);
  626. if(index<0) {
  627. // Quoted literal argument style text reaches to the end of the message.
  628. setParseError(parseError, start);
  629. errorCode=U_PATTERN_SYNTAX_ERROR;
  630. return 0;
  631. }
  632. // skip the quote-ending apostrophe
  633. ++index;
  634. } else if(c==u_leftCurlyBrace) {
  635. ++nestedBraces;
  636. } else if(c==u_rightCurlyBrace) {
  637. if(nestedBraces>0) {
  638. --nestedBraces;
  639. } else {
  640. int32_t length=--index-start;
  641. if(length>Part::MAX_LENGTH) {
  642. setParseError(parseError, start); // Argument style text too long.
  643. errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
  644. return 0;
  645. }
  646. addPart(UMSGPAT_PART_TYPE_ARG_STYLE, start, length, 0, errorCode);
  647. return index;
  648. }
  649. } // c is part of literal text
  650. }
  651. setParseError(parseError, 0); // Unmatched '{' braces in message.
  652. errorCode=U_UNMATCHED_BRACES;
  653. return 0;
  654. }
  655. int32_t
  656. MessagePattern::parseChoiceStyle(int32_t index, int32_t nestingLevel,
  657. UParseError *parseError, UErrorCode &errorCode) {
  658. if(U_FAILURE(errorCode)) {
  659. return 0;
  660. }
  661. int32_t start=index;
  662. index=skipWhiteSpace(index);
  663. if(index==msg.length() || msg.charAt(index)==u_rightCurlyBrace) {
  664. setParseError(parseError, 0); // Missing choice argument pattern.
  665. errorCode=U_PATTERN_SYNTAX_ERROR;
  666. return 0;
  667. }
  668. for(;;) {
  669. // The choice argument style contains |-separated (number, separator, message) triples.
  670. // Parse the number.
  671. int32_t numberIndex=index;
  672. index=skipDouble(index);
  673. int32_t length=index-numberIndex;
  674. if(length==0) {
  675. setParseError(parseError, start); // Bad choice pattern syntax.
  676. errorCode=U_PATTERN_SYNTAX_ERROR;
  677. return 0;
  678. }
  679. if(length>Part::MAX_LENGTH) {
  680. setParseError(parseError, numberIndex); // Choice number too long.
  681. errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
  682. return 0;
  683. }
  684. parseDouble(numberIndex, index, true, parseError, errorCode); // adds ARG_INT or ARG_DOUBLE
  685. if(U_FAILURE(errorCode)) {
  686. return 0;
  687. }
  688. // Parse the separator.
  689. index=skipWhiteSpace(index);
  690. if(index==msg.length()) {
  691. setParseError(parseError, start); // Bad choice pattern syntax.
  692. errorCode=U_PATTERN_SYNTAX_ERROR;
  693. return 0;
  694. }
  695. char16_t c=msg.charAt(index);
  696. if(!(c==u_pound || c==u_lessThan || c==u_lessOrEqual)) { // U+2264 is <=
  697. setParseError(parseError, start); // Expected choice separator (#<\u2264) instead of c.
  698. errorCode=U_PATTERN_SYNTAX_ERROR;
  699. return 0;
  700. }
  701. addPart(UMSGPAT_PART_TYPE_ARG_SELECTOR, index, 1, 0, errorCode);
  702. // Parse the message fragment.
  703. index=parseMessage(++index, 0, nestingLevel+1, UMSGPAT_ARG_TYPE_CHOICE, parseError, errorCode);
  704. if(U_FAILURE(errorCode)) {
  705. return 0;
  706. }
  707. // parseMessage(..., CHOICE) returns the index of the terminator, or msg.length().
  708. if(index==msg.length()) {
  709. return index;
  710. }
  711. if(msg.charAt(index)==u_rightCurlyBrace) {
  712. if(!inMessageFormatPattern(nestingLevel)) {
  713. setParseError(parseError, start); // Bad choice pattern syntax.
  714. errorCode=U_PATTERN_SYNTAX_ERROR;
  715. return 0;
  716. }
  717. return index;
  718. } // else the terminator is '|'
  719. index=skipWhiteSpace(index+1);
  720. }
  721. }
  722. int32_t
  723. MessagePattern::parsePluralOrSelectStyle(UMessagePatternArgType argType,
  724. int32_t index, int32_t nestingLevel,
  725. UParseError *parseError, UErrorCode &errorCode) {
  726. if(U_FAILURE(errorCode)) {
  727. return 0;
  728. }
  729. int32_t start=index;
  730. UBool isEmpty=true;
  731. UBool hasOther=false;
  732. for(;;) {
  733. // First, collect the selector looking for a small set of terminators.
  734. // It would be a little faster to consider the syntax of each possible
  735. // token right here, but that makes the code too complicated.
  736. index=skipWhiteSpace(index);
  737. UBool eos=index==msg.length();
  738. if(eos || msg.charAt(index)==u_rightCurlyBrace) {
  739. if(eos==inMessageFormatPattern(nestingLevel)) {
  740. setParseError(parseError, start); // Bad plural/select pattern syntax.
  741. errorCode=U_PATTERN_SYNTAX_ERROR;
  742. return 0;
  743. }
  744. if(!hasOther) {
  745. setParseError(parseError, 0); // Missing 'other' keyword in plural/select pattern.
  746. errorCode=U_DEFAULT_KEYWORD_MISSING;
  747. return 0;
  748. }
  749. return index;
  750. }
  751. int32_t selectorIndex=index;
  752. if(UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE(argType) && msg.charAt(selectorIndex)==u_equal) {
  753. // explicit-value plural selector: =double
  754. index=skipDouble(index+1);
  755. int32_t length=index-selectorIndex;
  756. if(length==1) {
  757. setParseError(parseError, start); // Bad plural/select pattern syntax.
  758. errorCode=U_PATTERN_SYNTAX_ERROR;
  759. return 0;
  760. }
  761. if(length>Part::MAX_LENGTH) {
  762. setParseError(parseError, selectorIndex); // Argument selector too long.
  763. errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
  764. return 0;
  765. }
  766. addPart(UMSGPAT_PART_TYPE_ARG_SELECTOR, selectorIndex, length, 0, errorCode);
  767. parseDouble(selectorIndex+1, index, false,
  768. parseError, errorCode); // adds ARG_INT or ARG_DOUBLE
  769. } else {
  770. index=skipIdentifier(index);
  771. int32_t length=index-selectorIndex;
  772. if(length==0) {
  773. setParseError(parseError, start); // Bad plural/select pattern syntax.
  774. errorCode=U_PATTERN_SYNTAX_ERROR;
  775. return 0;
  776. }
  777. // Note: The ':' in "offset:" is just beyond the skipIdentifier() range.
  778. if( UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE(argType) && length==6 && index<msg.length() &&
  779. 0==msg.compare(selectorIndex, 7, kOffsetColon, 0, 7)
  780. ) {
  781. // plural offset, not a selector
  782. if(!isEmpty) {
  783. // Plural argument 'offset:' (if present) must precede key-message pairs.
  784. setParseError(parseError, start);
  785. errorCode=U_PATTERN_SYNTAX_ERROR;
  786. return 0;
  787. }
  788. // allow whitespace between offset: and its value
  789. int32_t valueIndex=skipWhiteSpace(index+1); // The ':' is at index.
  790. index=skipDouble(valueIndex);
  791. if(index==valueIndex) {
  792. setParseError(parseError, start); // Missing value for plural 'offset:'.
  793. errorCode=U_PATTERN_SYNTAX_ERROR;
  794. return 0;
  795. }
  796. if((index-valueIndex)>Part::MAX_LENGTH) {
  797. setParseError(parseError, valueIndex); // Plural offset value too long.
  798. errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
  799. return 0;
  800. }
  801. parseDouble(valueIndex, index, false,
  802. parseError, errorCode); // adds ARG_INT or ARG_DOUBLE
  803. if(U_FAILURE(errorCode)) {
  804. return 0;
  805. }
  806. isEmpty=false;
  807. continue; // no message fragment after the offset
  808. } else {
  809. // normal selector word
  810. if(length>Part::MAX_LENGTH) {
  811. setParseError(parseError, selectorIndex); // Argument selector too long.
  812. errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
  813. return 0;
  814. }
  815. addPart(UMSGPAT_PART_TYPE_ARG_SELECTOR, selectorIndex, length, 0, errorCode);
  816. if(0==msg.compare(selectorIndex, length, kOther, 0, 5)) {
  817. hasOther=true;
  818. }
  819. }
  820. }
  821. if(U_FAILURE(errorCode)) {
  822. return 0;
  823. }
  824. // parse the message fragment following the selector
  825. index=skipWhiteSpace(index);
  826. if(index==msg.length() || msg.charAt(index)!=u_leftCurlyBrace) {
  827. setParseError(parseError, selectorIndex); // No message fragment after plural/select selector.
  828. errorCode=U_PATTERN_SYNTAX_ERROR;
  829. return 0;
  830. }
  831. index=parseMessage(index, 1, nestingLevel+1, argType, parseError, errorCode);
  832. if(U_FAILURE(errorCode)) {
  833. return 0;
  834. }
  835. isEmpty=false;
  836. }
  837. }
  838. int32_t
  839. MessagePattern::parseArgNumber(const UnicodeString &s, int32_t start, int32_t limit) {
  840. // If the identifier contains only ASCII digits, then it is an argument _number_
  841. // and must not have leading zeros (except "0" itself).
  842. // Otherwise it is an argument _name_.
  843. if(start>=limit) {
  844. return UMSGPAT_ARG_NAME_NOT_VALID;
  845. }
  846. int32_t number;
  847. // Defer numeric errors until we know there are only digits.
  848. UBool badNumber;
  849. char16_t c=s.charAt(start++);
  850. if(c==0x30) {
  851. if(start==limit) {
  852. return 0;
  853. } else {
  854. number=0;
  855. badNumber=true; // leading zero
  856. }
  857. } else if(0x31<=c && c<=0x39) {
  858. number=c-0x30;
  859. badNumber=false;
  860. } else {
  861. return UMSGPAT_ARG_NAME_NOT_NUMBER;
  862. }
  863. while(start<limit) {
  864. c=s.charAt(start++);
  865. if(0x30<=c && c<=0x39) {
  866. if(number>=INT32_MAX/10) {
  867. badNumber=true; // overflow
  868. }
  869. number=number*10+(c-0x30);
  870. } else {
  871. return UMSGPAT_ARG_NAME_NOT_NUMBER;
  872. }
  873. }
  874. // There are only ASCII digits.
  875. if(badNumber) {
  876. return UMSGPAT_ARG_NAME_NOT_VALID;
  877. } else {
  878. return number;
  879. }
  880. }
  881. void
  882. MessagePattern::parseDouble(int32_t start, int32_t limit, UBool allowInfinity,
  883. UParseError *parseError, UErrorCode &errorCode) {
  884. if(U_FAILURE(errorCode)) {
  885. return;
  886. }
  887. U_ASSERT(start<limit);
  888. // fake loop for easy exit and single throw statement
  889. for(;;) { /*loop doesn't iterate*/
  890. // fast path for small integers and infinity
  891. int32_t value=0;
  892. int32_t isNegative=0; // not boolean so that we can easily add it to value
  893. int32_t index=start;
  894. char16_t c=msg.charAt(index++);
  895. if(c==u_minus) {
  896. isNegative=1;
  897. if(index==limit) {
  898. break; // no number
  899. }
  900. c=msg.charAt(index++);
  901. } else if(c==u_plus) {
  902. if(index==limit) {
  903. break; // no number
  904. }
  905. c=msg.charAt(index++);
  906. }
  907. if(c==0x221e) { // infinity
  908. if(allowInfinity && index==limit) {
  909. double infinity=uprv_getInfinity();
  910. addArgDoublePart(
  911. isNegative!=0 ? -infinity : infinity,
  912. start, limit-start, errorCode);
  913. return;
  914. } else {
  915. break;
  916. }
  917. }
  918. // try to parse the number as a small integer but fall back to a double
  919. while('0'<=c && c<='9') {
  920. value=value*10+(c-'0');
  921. if(value>(Part::MAX_VALUE+isNegative)) {
  922. break; // not a small-enough integer
  923. }
  924. if(index==limit) {
  925. addPart(UMSGPAT_PART_TYPE_ARG_INT, start, limit-start,
  926. isNegative!=0 ? -value : value, errorCode);
  927. return;
  928. }
  929. c=msg.charAt(index++);
  930. }
  931. // Let Double.parseDouble() throw a NumberFormatException.
  932. char numberChars[128];
  933. int32_t capacity = static_cast<int32_t>(sizeof(numberChars));
  934. int32_t length=limit-start;
  935. if(length>=capacity) {
  936. break; // number too long
  937. }
  938. msg.extract(start, length, numberChars, capacity, US_INV);
  939. if (static_cast<int32_t>(uprv_strlen(numberChars)) < length) {
  940. break; // contains non-invariant character that was turned into NUL
  941. }
  942. char *end;
  943. double numericValue=uprv_strtod(numberChars, &end);
  944. if(end!=(numberChars+length)) {
  945. break; // parsing error
  946. }
  947. addArgDoublePart(numericValue, start, length, errorCode);
  948. return;
  949. }
  950. setParseError(parseError, start /*, limit*/); // Bad syntax for numeric value.
  951. errorCode=U_PATTERN_SYNTAX_ERROR;
  952. }
  953. int32_t
  954. MessagePattern::skipWhiteSpace(int32_t index) {
  955. const char16_t *s=msg.getBuffer();
  956. int32_t msgLength=msg.length();
  957. const char16_t *t=PatternProps::skipWhiteSpace(s+index, msgLength-index);
  958. return static_cast<int32_t>(t - s);
  959. }
  960. int32_t
  961. MessagePattern::skipIdentifier(int32_t index) {
  962. const char16_t *s=msg.getBuffer();
  963. int32_t msgLength=msg.length();
  964. const char16_t *t=PatternProps::skipIdentifier(s+index, msgLength-index);
  965. return static_cast<int32_t>(t - s);
  966. }
  967. int32_t
  968. MessagePattern::skipDouble(int32_t index) {
  969. int32_t msgLength=msg.length();
  970. while(index<msgLength) {
  971. char16_t c=msg.charAt(index);
  972. // U+221E: Allow the infinity symbol, for ChoiceFormat patterns.
  973. if((c<0x30 && c!=u_plus && c!=u_minus && c!=u_dot) || (c>0x39 && c!=u_e && c!=u_E && c!=0x221e)) {
  974. break;
  975. }
  976. ++index;
  977. }
  978. return index;
  979. }
  980. UBool
  981. MessagePattern::isArgTypeChar(UChar32 c) {
  982. return (u_a<=c && c<=u_z) || (u_A<=c && c<=u_Z);
  983. }
  984. UBool
  985. MessagePattern::isChoice(int32_t index) {
  986. char16_t c;
  987. return
  988. ((c=msg.charAt(index++))==u_c || c==u_C) &&
  989. ((c=msg.charAt(index++))==u_h || c==u_H) &&
  990. ((c=msg.charAt(index++))==u_o || c==u_O) &&
  991. ((c=msg.charAt(index++))==u_i || c==u_I) &&
  992. ((c=msg.charAt(index++))==u_c || c==u_C) &&
  993. ((c=msg.charAt(index))==u_e || c==u_E);
  994. }
  995. UBool
  996. MessagePattern::isPlural(int32_t index) {
  997. char16_t c;
  998. return
  999. ((c=msg.charAt(index++))==u_p || c==u_P) &&
  1000. ((c=msg.charAt(index++))==u_l || c==u_L) &&
  1001. ((c=msg.charAt(index++))==u_u || c==u_U) &&
  1002. ((c=msg.charAt(index++))==u_r || c==u_R) &&
  1003. ((c=msg.charAt(index++))==u_a || c==u_A) &&
  1004. ((c=msg.charAt(index))==u_l || c==u_L);
  1005. }
  1006. UBool
  1007. MessagePattern::isSelect(int32_t index) {
  1008. char16_t c;
  1009. return
  1010. ((c=msg.charAt(index++))==u_s || c==u_S) &&
  1011. ((c=msg.charAt(index++))==u_e || c==u_E) &&
  1012. ((c=msg.charAt(index++))==u_l || c==u_L) &&
  1013. ((c=msg.charAt(index++))==u_e || c==u_E) &&
  1014. ((c=msg.charAt(index++))==u_c || c==u_C) &&
  1015. ((c=msg.charAt(index))==u_t || c==u_T);
  1016. }
  1017. UBool
  1018. MessagePattern::isOrdinal(int32_t index) {
  1019. char16_t c;
  1020. return
  1021. ((c=msg.charAt(index++))==u_o || c==u_O) &&
  1022. ((c=msg.charAt(index++))==u_r || c==u_R) &&
  1023. ((c=msg.charAt(index++))==u_d || c==u_D) &&
  1024. ((c=msg.charAt(index++))==u_i || c==u_I) &&
  1025. ((c=msg.charAt(index++))==u_n || c==u_N) &&
  1026. ((c=msg.charAt(index++))==u_a || c==u_A) &&
  1027. ((c=msg.charAt(index))==u_l || c==u_L);
  1028. }
  1029. UBool
  1030. MessagePattern::inMessageFormatPattern(int32_t nestingLevel) {
  1031. return nestingLevel>0 || partsList->a[0].type==UMSGPAT_PART_TYPE_MSG_START;
  1032. }
  1033. UBool
  1034. MessagePattern::inTopLevelChoiceMessage(int32_t nestingLevel, UMessagePatternArgType parentType) {
  1035. return
  1036. nestingLevel==1 &&
  1037. parentType==UMSGPAT_ARG_TYPE_CHOICE &&
  1038. partsList->a[0].type!=UMSGPAT_PART_TYPE_MSG_START;
  1039. }
  1040. void
  1041. MessagePattern::addPart(UMessagePatternPartType type, int32_t index, int32_t length,
  1042. int32_t value, UErrorCode &errorCode) {
  1043. if(partsList->ensureCapacityForOneMore(partsLength, errorCode)) {
  1044. Part &part=partsList->a[partsLength++];
  1045. part.type=type;
  1046. part.index=index;
  1047. part.length = static_cast<uint16_t>(length);
  1048. part.value = static_cast<int16_t>(value);
  1049. part.limitPartIndex=0;
  1050. }
  1051. }
  1052. void
  1053. MessagePattern::addLimitPart(int32_t start,
  1054. UMessagePatternPartType type, int32_t index, int32_t length,
  1055. int32_t value, UErrorCode &errorCode) {
  1056. partsList->a[start].limitPartIndex=partsLength;
  1057. addPart(type, index, length, value, errorCode);
  1058. }
  1059. void
  1060. MessagePattern::addArgDoublePart(double numericValue, int32_t start, int32_t length,
  1061. UErrorCode &errorCode) {
  1062. if(U_FAILURE(errorCode)) {
  1063. return;
  1064. }
  1065. int32_t numericIndex=numericValuesLength;
  1066. if(numericValuesList==nullptr) {
  1067. numericValuesList=new MessagePatternDoubleList();
  1068. if(numericValuesList==nullptr) {
  1069. errorCode=U_MEMORY_ALLOCATION_ERROR;
  1070. return;
  1071. }
  1072. } else if(!numericValuesList->ensureCapacityForOneMore(numericValuesLength, errorCode)) {
  1073. return;
  1074. } else {
  1075. if(numericIndex>Part::MAX_VALUE) {
  1076. errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
  1077. return;
  1078. }
  1079. }
  1080. numericValuesList->a[numericValuesLength++]=numericValue;
  1081. addPart(UMSGPAT_PART_TYPE_ARG_DOUBLE, start, length, numericIndex, errorCode);
  1082. }
  1083. void
  1084. MessagePattern::setParseError(UParseError *parseError, int32_t index) {
  1085. if(parseError==nullptr) {
  1086. return;
  1087. }
  1088. parseError->offset=index;
  1089. // Set preContext to some of msg before index.
  1090. // Avoid splitting a surrogate pair.
  1091. int32_t length=index;
  1092. if(length>=U_PARSE_CONTEXT_LEN) {
  1093. length=U_PARSE_CONTEXT_LEN-1;
  1094. if(length>0 && U16_IS_TRAIL(msg[index-length])) {
  1095. --length;
  1096. }
  1097. }
  1098. msg.extract(index-length, length, parseError->preContext);
  1099. parseError->preContext[length]=0;
  1100. // Set postContext to some of msg starting at index.
  1101. length=msg.length()-index;
  1102. if(length>=U_PARSE_CONTEXT_LEN) {
  1103. length=U_PARSE_CONTEXT_LEN-1;
  1104. if(length>0 && U16_IS_LEAD(msg[index+length-1])) {
  1105. --length;
  1106. }
  1107. }
  1108. msg.extract(index, length, parseError->postContext);
  1109. parseError->postContext[length]=0;
  1110. }
  1111. // MessageImpl ------------------------------------------------------------- ***
  1112. void
  1113. MessageImpl::appendReducedApostrophes(const UnicodeString &s, int32_t start, int32_t limit,
  1114. UnicodeString &sb) {
  1115. int32_t doubleApos=-1;
  1116. for(;;) {
  1117. int32_t i=s.indexOf(u_apos, start);
  1118. if(i<0 || i>=limit) {
  1119. sb.append(s, start, limit-start);
  1120. break;
  1121. }
  1122. if(i==doubleApos) {
  1123. // Double apostrophe at start-1 and start==i, append one.
  1124. sb.append(u_apos);
  1125. ++start;
  1126. doubleApos=-1;
  1127. } else {
  1128. // Append text between apostrophes and skip this one.
  1129. sb.append(s, start, i-start);
  1130. doubleApos=start=i+1;
  1131. }
  1132. }
  1133. }
  1134. // Ported from second half of ICU4J SelectFormat.format(String).
  1135. UnicodeString &
  1136. MessageImpl::appendSubMessageWithoutSkipSyntax(const MessagePattern &msgPattern,
  1137. int32_t msgStart,
  1138. UnicodeString &result) {
  1139. const UnicodeString &msgString=msgPattern.getPatternString();
  1140. int32_t prevIndex=msgPattern.getPart(msgStart).getLimit();
  1141. for(int32_t i=msgStart;;) {
  1142. const MessagePattern::Part &part=msgPattern.getPart(++i);
  1143. UMessagePatternPartType type=part.getType();
  1144. int32_t index=part.getIndex();
  1145. if(type==UMSGPAT_PART_TYPE_MSG_LIMIT) {
  1146. return result.append(msgString, prevIndex, index-prevIndex);
  1147. } else if(type==UMSGPAT_PART_TYPE_SKIP_SYNTAX) {
  1148. result.append(msgString, prevIndex, index-prevIndex);
  1149. prevIndex=part.getLimit();
  1150. } else if(type==UMSGPAT_PART_TYPE_ARG_START) {
  1151. result.append(msgString, prevIndex, index-prevIndex);
  1152. prevIndex=index;
  1153. i=msgPattern.getLimitPartIndex(i);
  1154. index=msgPattern.getPart(i).getLimit();
  1155. appendReducedApostrophes(msgString, prevIndex, index, result);
  1156. prevIndex=index;
  1157. }
  1158. }
  1159. }
  1160. U_NAMESPACE_END
  1161. #endif // !UCONFIG_NO_FORMATTING