locid.cpp 85 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733
  1. // © 2016 and later: Unicode, Inc. and others.
  2. // License & terms of use: http://www.unicode.org/copyright.html
  3. /*
  4. **********************************************************************
  5. * Copyright (C) 1997-2016, International Business Machines
  6. * Corporation and others. All Rights Reserved.
  7. **********************************************************************
  8. *
  9. * File locid.cpp
  10. *
  11. * Created by: Richard Gillam
  12. *
  13. * Modification History:
  14. *
  15. * Date Name Description
  16. * 02/11/97 aliu Changed gLocPath to fgDataDirectory and added
  17. * methods to get and set it.
  18. * 04/02/97 aliu Made operator!= inline; fixed return value
  19. * of getName().
  20. * 04/15/97 aliu Cleanup for AIX/Win32.
  21. * 04/24/97 aliu Numerous changes per code review.
  22. * 08/18/98 stephen Changed getDisplayName()
  23. * Added SIMPLIFIED_CHINESE, TRADITIONAL_CHINESE
  24. * Added getISOCountries(), getISOLanguages(),
  25. * getLanguagesForCountry()
  26. * 03/16/99 bertrand rehaul.
  27. * 07/21/99 stephen Added U_CFUNC setDefault
  28. * 11/09/99 weiv Added const char * getName() const;
  29. * 04/12/00 srl removing unicodestring api's and cached hash code
  30. * 08/10/01 grhoten Change the static Locales to accessor functions
  31. ******************************************************************************
  32. */
  33. #include <optional>
  34. #include <string_view>
  35. #include <utility>
  36. #include "unicode/bytestream.h"
  37. #include "unicode/locid.h"
  38. #include "unicode/localebuilder.h"
  39. #include "unicode/strenum.h"
  40. #include "unicode/stringpiece.h"
  41. #include "unicode/uloc.h"
  42. #include "unicode/ures.h"
  43. #include "bytesinkutil.h"
  44. #include "charstr.h"
  45. #include "charstrmap.h"
  46. #include "cmemory.h"
  47. #include "cstring.h"
  48. #include "mutex.h"
  49. #include "putilimp.h"
  50. #include "uassert.h"
  51. #include "ucln_cmn.h"
  52. #include "uhash.h"
  53. #include "ulocimp.h"
  54. #include "umutex.h"
  55. #include "uniquecharstr.h"
  56. #include "ustr_imp.h"
  57. #include "uvector.h"
  58. U_NAMESPACE_BEGIN
  59. static Locale *gLocaleCache = nullptr;
  60. static UInitOnce gLocaleCacheInitOnce {};
  61. // gDefaultLocaleMutex protects all access to gDefaultLocalesHashT and gDefaultLocale.
  62. static UMutex gDefaultLocaleMutex;
  63. static UHashtable *gDefaultLocalesHashT = nullptr;
  64. static Locale *gDefaultLocale = nullptr;
  65. /**
  66. * \def ULOC_STRING_LIMIT
  67. * strings beyond this value crash in CharString
  68. */
  69. #define ULOC_STRING_LIMIT 357913941
  70. U_NAMESPACE_END
  71. typedef enum ELocalePos {
  72. eENGLISH,
  73. eFRENCH,
  74. eGERMAN,
  75. eITALIAN,
  76. eJAPANESE,
  77. eKOREAN,
  78. eCHINESE,
  79. eFRANCE,
  80. eGERMANY,
  81. eITALY,
  82. eJAPAN,
  83. eKOREA,
  84. eCHINA, /* Alias for PRC */
  85. eTAIWAN,
  86. eUK,
  87. eUS,
  88. eCANADA,
  89. eCANADA_FRENCH,
  90. eROOT,
  91. //eDEFAULT,
  92. eMAX_LOCALES
  93. } ELocalePos;
  94. namespace {
  95. //
  96. // Deleter function for Locales owned by the default Locale hash table/
  97. //
  98. void U_CALLCONV
  99. deleteLocale(void *obj) {
  100. delete static_cast<icu::Locale*>(obj);
  101. }
  102. UBool U_CALLCONV locale_cleanup()
  103. {
  104. U_NAMESPACE_USE
  105. delete [] gLocaleCache;
  106. gLocaleCache = nullptr;
  107. gLocaleCacheInitOnce.reset();
  108. if (gDefaultLocalesHashT) {
  109. uhash_close(gDefaultLocalesHashT); // Automatically deletes all elements, using deleter func.
  110. gDefaultLocalesHashT = nullptr;
  111. }
  112. gDefaultLocale = nullptr;
  113. return true;
  114. }
  115. void U_CALLCONV locale_init(UErrorCode &status) {
  116. U_NAMESPACE_USE
  117. U_ASSERT(gLocaleCache == nullptr);
  118. gLocaleCache = new Locale[static_cast<int>(eMAX_LOCALES)];
  119. if (gLocaleCache == nullptr) {
  120. status = U_MEMORY_ALLOCATION_ERROR;
  121. return;
  122. }
  123. ucln_common_registerCleanup(UCLN_COMMON_LOCALE, locale_cleanup);
  124. gLocaleCache[eROOT] = Locale("");
  125. gLocaleCache[eENGLISH] = Locale("en");
  126. gLocaleCache[eFRENCH] = Locale("fr");
  127. gLocaleCache[eGERMAN] = Locale("de");
  128. gLocaleCache[eITALIAN] = Locale("it");
  129. gLocaleCache[eJAPANESE] = Locale("ja");
  130. gLocaleCache[eKOREAN] = Locale("ko");
  131. gLocaleCache[eCHINESE] = Locale("zh");
  132. gLocaleCache[eFRANCE] = Locale("fr", "FR");
  133. gLocaleCache[eGERMANY] = Locale("de", "DE");
  134. gLocaleCache[eITALY] = Locale("it", "IT");
  135. gLocaleCache[eJAPAN] = Locale("ja", "JP");
  136. gLocaleCache[eKOREA] = Locale("ko", "KR");
  137. gLocaleCache[eCHINA] = Locale("zh", "CN");
  138. gLocaleCache[eTAIWAN] = Locale("zh", "TW");
  139. gLocaleCache[eUK] = Locale("en", "GB");
  140. gLocaleCache[eUS] = Locale("en", "US");
  141. gLocaleCache[eCANADA] = Locale("en", "CA");
  142. gLocaleCache[eCANADA_FRENCH] = Locale("fr", "CA");
  143. }
  144. } // namespace
  145. U_NAMESPACE_BEGIN
  146. Locale *locale_set_default_internal(const char *id, UErrorCode& status) {
  147. // Synchronize this entire function.
  148. Mutex lock(&gDefaultLocaleMutex);
  149. UBool canonicalize = false;
  150. // If given a nullptr string for the locale id, grab the default
  151. // name from the system.
  152. // (Different from most other locale APIs, where a null name means use
  153. // the current ICU default locale.)
  154. if (id == nullptr) {
  155. id = uprv_getDefaultLocaleID(); // This function not thread safe? TODO: verify.
  156. canonicalize = true; // always canonicalize host ID
  157. }
  158. CharString localeNameBuf =
  159. canonicalize ? ulocimp_canonicalize(id, status) : ulocimp_getName(id, status);
  160. if (U_FAILURE(status)) {
  161. return gDefaultLocale;
  162. }
  163. if (gDefaultLocalesHashT == nullptr) {
  164. gDefaultLocalesHashT = uhash_open(uhash_hashChars, uhash_compareChars, nullptr, &status);
  165. if (U_FAILURE(status)) {
  166. return gDefaultLocale;
  167. }
  168. uhash_setValueDeleter(gDefaultLocalesHashT, deleteLocale);
  169. ucln_common_registerCleanup(UCLN_COMMON_LOCALE, locale_cleanup);
  170. }
  171. Locale* newDefault = static_cast<Locale*>(uhash_get(gDefaultLocalesHashT, localeNameBuf.data()));
  172. if (newDefault == nullptr) {
  173. newDefault = new Locale(Locale::eBOGUS);
  174. if (newDefault == nullptr) {
  175. status = U_MEMORY_ALLOCATION_ERROR;
  176. return gDefaultLocale;
  177. }
  178. newDefault->init(localeNameBuf.data(), false);
  179. uhash_put(gDefaultLocalesHashT, const_cast<char*>(newDefault->getName()), newDefault, &status);
  180. if (U_FAILURE(status)) {
  181. return gDefaultLocale;
  182. }
  183. }
  184. gDefaultLocale = newDefault;
  185. return gDefaultLocale;
  186. }
  187. U_NAMESPACE_END
  188. /* sfb 07/21/99 */
  189. U_CFUNC void
  190. locale_set_default(const char *id)
  191. {
  192. U_NAMESPACE_USE
  193. UErrorCode status = U_ZERO_ERROR;
  194. locale_set_default_internal(id, status);
  195. }
  196. /* end */
  197. U_CFUNC const char *
  198. locale_get_default()
  199. {
  200. U_NAMESPACE_USE
  201. return Locale::getDefault().getName();
  202. }
  203. U_NAMESPACE_BEGIN
  204. UOBJECT_DEFINE_RTTI_IMPLEMENTATION(Locale)
  205. /*Character separating the posix id fields*/
  206. // '_'
  207. // In the platform codepage.
  208. #define SEP_CHAR '_'
  209. #define NULL_CHAR '\0'
  210. Locale::~Locale()
  211. {
  212. if ((baseName != fullName) && (baseName != fullNameBuffer)) {
  213. uprv_free(baseName);
  214. }
  215. baseName = nullptr;
  216. /*if fullName is on the heap, we free it*/
  217. if (fullName != fullNameBuffer)
  218. {
  219. uprv_free(fullName);
  220. fullName = nullptr;
  221. }
  222. }
  223. Locale::Locale()
  224. : UObject(), fullName(fullNameBuffer), baseName(nullptr)
  225. {
  226. init(nullptr, false);
  227. }
  228. /*
  229. * Internal constructor to allow construction of a locale object with
  230. * NO side effects. (Default constructor tries to get
  231. * the default locale.)
  232. */
  233. Locale::Locale(Locale::ELocaleType)
  234. : UObject(), fullName(fullNameBuffer), baseName(nullptr)
  235. {
  236. setToBogus();
  237. }
  238. Locale::Locale( const char * newLanguage,
  239. const char * newCountry,
  240. const char * newVariant,
  241. const char * newKeywords)
  242. : UObject(), fullName(fullNameBuffer), baseName(nullptr)
  243. {
  244. if( (newLanguage==nullptr) && (newCountry == nullptr) && (newVariant == nullptr) )
  245. {
  246. init(nullptr, false); /* shortcut */
  247. }
  248. else
  249. {
  250. UErrorCode status = U_ZERO_ERROR;
  251. int32_t lsize = 0;
  252. int32_t csize = 0;
  253. int32_t vsize = 0;
  254. int32_t ksize = 0;
  255. // Check the sizes of the input strings.
  256. // Language
  257. if ( newLanguage != nullptr )
  258. {
  259. lsize = static_cast<int32_t>(uprv_strlen(newLanguage));
  260. if ( lsize < 0 || lsize > ULOC_STRING_LIMIT ) { // int32 wrap
  261. setToBogus();
  262. return;
  263. }
  264. }
  265. CharString togo(newLanguage, lsize, status); // start with newLanguage
  266. // _Country
  267. if ( newCountry != nullptr )
  268. {
  269. csize = static_cast<int32_t>(uprv_strlen(newCountry));
  270. if ( csize < 0 || csize > ULOC_STRING_LIMIT ) { // int32 wrap
  271. setToBogus();
  272. return;
  273. }
  274. }
  275. // _Variant
  276. if ( newVariant != nullptr )
  277. {
  278. // remove leading _'s
  279. while(newVariant[0] == SEP_CHAR)
  280. {
  281. newVariant++;
  282. }
  283. // remove trailing _'s
  284. vsize = static_cast<int32_t>(uprv_strlen(newVariant));
  285. if ( vsize < 0 || vsize > ULOC_STRING_LIMIT ) { // int32 wrap
  286. setToBogus();
  287. return;
  288. }
  289. while( (vsize>1) && (newVariant[vsize-1] == SEP_CHAR) )
  290. {
  291. vsize--;
  292. }
  293. }
  294. if ( newKeywords != nullptr)
  295. {
  296. ksize = static_cast<int32_t>(uprv_strlen(newKeywords));
  297. if ( ksize < 0 || ksize > ULOC_STRING_LIMIT ) {
  298. setToBogus();
  299. return;
  300. }
  301. }
  302. // We've checked the input sizes, now build up the full locale string..
  303. // newLanguage is already copied
  304. if ( ( vsize != 0 ) || (csize != 0) ) // at least: __v
  305. { // ^
  306. togo.append(SEP_CHAR, status);
  307. }
  308. if ( csize != 0 )
  309. {
  310. togo.append(newCountry, status);
  311. }
  312. if ( vsize != 0)
  313. {
  314. togo.append(SEP_CHAR, status)
  315. .append(newVariant, vsize, status);
  316. }
  317. if ( ksize != 0)
  318. {
  319. if (uprv_strchr(newKeywords, '=')) {
  320. togo.append('@', status); /* keyword parsing */
  321. }
  322. else {
  323. togo.append('_', status); /* Variant parsing with a script */
  324. if ( vsize == 0) {
  325. togo.append('_', status); /* No country found */
  326. }
  327. }
  328. togo.append(newKeywords, status);
  329. }
  330. if (U_FAILURE(status)) {
  331. // Something went wrong with appending, etc.
  332. setToBogus();
  333. return;
  334. }
  335. // Parse it, because for example 'language' might really be a complete
  336. // string.
  337. init(togo.data(), false);
  338. }
  339. }
  340. Locale::Locale(const Locale &other)
  341. : UObject(other), fullName(fullNameBuffer), baseName(nullptr)
  342. {
  343. *this = other;
  344. }
  345. Locale::Locale(Locale&& other) noexcept
  346. : UObject(other), fullName(fullNameBuffer), baseName(fullName) {
  347. *this = std::move(other);
  348. }
  349. Locale& Locale::operator=(const Locale& other) {
  350. if (this == &other) {
  351. return *this;
  352. }
  353. setToBogus();
  354. if (other.fullName == other.fullNameBuffer) {
  355. uprv_strcpy(fullNameBuffer, other.fullNameBuffer);
  356. } else if (other.fullName == nullptr) {
  357. fullName = nullptr;
  358. } else {
  359. fullName = uprv_strdup(other.fullName);
  360. if (fullName == nullptr) return *this;
  361. }
  362. if (other.baseName == other.fullName) {
  363. baseName = fullName;
  364. } else if (other.baseName != nullptr) {
  365. baseName = uprv_strdup(other.baseName);
  366. if (baseName == nullptr) return *this;
  367. }
  368. uprv_strcpy(language, other.language);
  369. uprv_strcpy(script, other.script);
  370. uprv_strcpy(country, other.country);
  371. variantBegin = other.variantBegin;
  372. fIsBogus = other.fIsBogus;
  373. return *this;
  374. }
  375. Locale& Locale::operator=(Locale&& other) noexcept {
  376. if ((baseName != fullName) && (baseName != fullNameBuffer)) uprv_free(baseName);
  377. if (fullName != fullNameBuffer) uprv_free(fullName);
  378. if (other.fullName == other.fullNameBuffer || other.baseName == other.fullNameBuffer) {
  379. uprv_strcpy(fullNameBuffer, other.fullNameBuffer);
  380. }
  381. if (other.fullName == other.fullNameBuffer) {
  382. fullName = fullNameBuffer;
  383. } else {
  384. fullName = other.fullName;
  385. }
  386. if (other.baseName == other.fullNameBuffer) {
  387. baseName = fullNameBuffer;
  388. } else if (other.baseName == other.fullName) {
  389. baseName = fullName;
  390. } else {
  391. baseName = other.baseName;
  392. }
  393. uprv_strcpy(language, other.language);
  394. uprv_strcpy(script, other.script);
  395. uprv_strcpy(country, other.country);
  396. variantBegin = other.variantBegin;
  397. fIsBogus = other.fIsBogus;
  398. other.baseName = other.fullName = other.fullNameBuffer;
  399. return *this;
  400. }
  401. Locale *
  402. Locale::clone() const {
  403. return new Locale(*this);
  404. }
  405. bool
  406. Locale::operator==( const Locale& other) const
  407. {
  408. return (uprv_strcmp(other.fullName, fullName) == 0);
  409. }
  410. namespace {
  411. UInitOnce gKnownCanonicalizedInitOnce {};
  412. UHashtable *gKnownCanonicalized = nullptr;
  413. constexpr const char* KNOWN_CANONICALIZED[] = {
  414. "c",
  415. // Commonly used locales known are already canonicalized
  416. "af", "af_ZA", "am", "am_ET", "ar", "ar_001", "as", "as_IN", "az", "az_AZ",
  417. "be", "be_BY", "bg", "bg_BG", "bn", "bn_IN", "bs", "bs_BA", "ca", "ca_ES",
  418. "cs", "cs_CZ", "cy", "cy_GB", "da", "da_DK", "de", "de_DE", "el", "el_GR",
  419. "en", "en_GB", "en_US", "es", "es_419", "es_ES", "et", "et_EE", "eu",
  420. "eu_ES", "fa", "fa_IR", "fi", "fi_FI", "fil", "fil_PH", "fr", "fr_FR",
  421. "ga", "ga_IE", "gl", "gl_ES", "gu", "gu_IN", "he", "he_IL", "hi", "hi_IN",
  422. "hr", "hr_HR", "hu", "hu_HU", "hy", "hy_AM", "id", "id_ID", "is", "is_IS",
  423. "it", "it_IT", "ja", "ja_JP", "jv", "jv_ID", "ka", "ka_GE", "kk", "kk_KZ",
  424. "km", "km_KH", "kn", "kn_IN", "ko", "ko_KR", "ky", "ky_KG", "lo", "lo_LA",
  425. "lt", "lt_LT", "lv", "lv_LV", "mk", "mk_MK", "ml", "ml_IN", "mn", "mn_MN",
  426. "mr", "mr_IN", "ms", "ms_MY", "my", "my_MM", "nb", "nb_NO", "ne", "ne_NP",
  427. "nl", "nl_NL", "no", "or", "or_IN", "pa", "pa_IN", "pl", "pl_PL", "ps", "ps_AF",
  428. "pt", "pt_BR", "pt_PT", "ro", "ro_RO", "ru", "ru_RU", "sd", "sd_IN", "si",
  429. "si_LK", "sk", "sk_SK", "sl", "sl_SI", "so", "so_SO", "sq", "sq_AL", "sr",
  430. "sr_Cyrl_RS", "sr_Latn", "sr_RS", "sv", "sv_SE", "sw", "sw_TZ", "ta",
  431. "ta_IN", "te", "te_IN", "th", "th_TH", "tk", "tk_TM", "tr", "tr_TR", "uk",
  432. "uk_UA", "ur", "ur_PK", "uz", "uz_UZ", "vi", "vi_VN", "yue", "yue_Hant",
  433. "yue_Hant_HK", "yue_HK", "zh", "zh_CN", "zh_Hans", "zh_Hans_CN", "zh_Hant",
  434. "zh_Hant_TW", "zh_TW", "zu", "zu_ZA"
  435. };
  436. UBool U_CALLCONV cleanupKnownCanonicalized() {
  437. gKnownCanonicalizedInitOnce.reset();
  438. if (gKnownCanonicalized) { uhash_close(gKnownCanonicalized); }
  439. return true;
  440. }
  441. void U_CALLCONV loadKnownCanonicalized(UErrorCode &status) {
  442. ucln_common_registerCleanup(UCLN_COMMON_LOCALE_KNOWN_CANONICALIZED,
  443. cleanupKnownCanonicalized);
  444. LocalUHashtablePointer newKnownCanonicalizedMap(
  445. uhash_open(uhash_hashChars, uhash_compareChars, nullptr, &status));
  446. for (int32_t i = 0;
  447. U_SUCCESS(status) && i < UPRV_LENGTHOF(KNOWN_CANONICALIZED);
  448. i++) {
  449. uhash_puti(newKnownCanonicalizedMap.getAlias(),
  450. (void*)KNOWN_CANONICALIZED[i],
  451. 1, &status);
  452. }
  453. if (U_FAILURE(status)) {
  454. return;
  455. }
  456. gKnownCanonicalized = newKnownCanonicalizedMap.orphan();
  457. }
  458. class AliasData;
  459. /**
  460. * A Builder class to build the alias data.
  461. */
  462. class AliasDataBuilder {
  463. public:
  464. AliasDataBuilder() {
  465. }
  466. // Build the AliasData from resource.
  467. AliasData* build(UErrorCode &status);
  468. private:
  469. void readAlias(UResourceBundle* alias,
  470. UniqueCharStrings* strings,
  471. LocalMemory<const char*>& types,
  472. LocalMemory<int32_t>& replacementIndexes,
  473. int32_t &length,
  474. void (*checkType)(const char* type),
  475. void (*checkReplacement)(const UChar* replacement),
  476. UErrorCode &status);
  477. // Read the languageAlias data from alias to
  478. // strings+types+replacementIndexes
  479. // The number of record will be stored into length.
  480. // Allocate length items for types, to store the type field.
  481. // Allocate length items for replacementIndexes,
  482. // to store the index in the strings for the replacement script.
  483. void readLanguageAlias(UResourceBundle* alias,
  484. UniqueCharStrings* strings,
  485. LocalMemory<const char*>& types,
  486. LocalMemory<int32_t>& replacementIndexes,
  487. int32_t &length,
  488. UErrorCode &status);
  489. // Read the scriptAlias data from alias to
  490. // strings+types+replacementIndexes
  491. // Allocate length items for types, to store the type field.
  492. // Allocate length items for replacementIndexes,
  493. // to store the index in the strings for the replacement script.
  494. void readScriptAlias(UResourceBundle* alias,
  495. UniqueCharStrings* strings,
  496. LocalMemory<const char*>& types,
  497. LocalMemory<int32_t>& replacementIndexes,
  498. int32_t &length, UErrorCode &status);
  499. // Read the territoryAlias data from alias to
  500. // strings+types+replacementIndexes
  501. // Allocate length items for types, to store the type field.
  502. // Allocate length items for replacementIndexes,
  503. // to store the index in the strings for the replacement script.
  504. void readTerritoryAlias(UResourceBundle* alias,
  505. UniqueCharStrings* strings,
  506. LocalMemory<const char*>& types,
  507. LocalMemory<int32_t>& replacementIndexes,
  508. int32_t &length, UErrorCode &status);
  509. // Read the variantAlias data from alias to
  510. // strings+types+replacementIndexes
  511. // Allocate length items for types, to store the type field.
  512. // Allocate length items for replacementIndexes,
  513. // to store the index in the strings for the replacement variant.
  514. void readVariantAlias(UResourceBundle* alias,
  515. UniqueCharStrings* strings,
  516. LocalMemory<const char*>& types,
  517. LocalMemory<int32_t>& replacementIndexes,
  518. int32_t &length, UErrorCode &status);
  519. // Read the subdivisionAlias data from alias to
  520. // strings+types+replacementIndexes
  521. // Allocate length items for types, to store the type field.
  522. // Allocate length items for replacementIndexes,
  523. // to store the index in the strings for the replacement variant.
  524. void readSubdivisionAlias(UResourceBundle* alias,
  525. UniqueCharStrings* strings,
  526. LocalMemory<const char*>& types,
  527. LocalMemory<int32_t>& replacementIndexes,
  528. int32_t &length, UErrorCode &status);
  529. };
  530. /**
  531. * A class to hold the Alias Data.
  532. */
  533. class AliasData : public UMemory {
  534. public:
  535. static const AliasData* singleton(UErrorCode& status) {
  536. if (U_FAILURE(status)) {
  537. // Do not get into loadData if the status already has error.
  538. return nullptr;
  539. }
  540. umtx_initOnce(AliasData::gInitOnce, &AliasData::loadData, status);
  541. return gSingleton;
  542. }
  543. const CharStringMap& languageMap() const { return language; }
  544. const CharStringMap& scriptMap() const { return script; }
  545. const CharStringMap& territoryMap() const { return territory; }
  546. const CharStringMap& variantMap() const { return variant; }
  547. const CharStringMap& subdivisionMap() const { return subdivision; }
  548. static void U_CALLCONV loadData(UErrorCode &status);
  549. static UBool U_CALLCONV cleanup();
  550. static UInitOnce gInitOnce;
  551. private:
  552. AliasData(CharStringMap languageMap,
  553. CharStringMap scriptMap,
  554. CharStringMap territoryMap,
  555. CharStringMap variantMap,
  556. CharStringMap subdivisionMap,
  557. CharString* strings)
  558. : language(std::move(languageMap)),
  559. script(std::move(scriptMap)),
  560. territory(std::move(territoryMap)),
  561. variant(std::move(variantMap)),
  562. subdivision(std::move(subdivisionMap)),
  563. strings(strings) {
  564. }
  565. ~AliasData() {
  566. delete strings;
  567. }
  568. static const AliasData* gSingleton;
  569. CharStringMap language;
  570. CharStringMap script;
  571. CharStringMap territory;
  572. CharStringMap variant;
  573. CharStringMap subdivision;
  574. CharString* strings;
  575. friend class AliasDataBuilder;
  576. };
  577. const AliasData* AliasData::gSingleton = nullptr;
  578. UInitOnce AliasData::gInitOnce {};
  579. UBool U_CALLCONV
  580. AliasData::cleanup()
  581. {
  582. gInitOnce.reset();
  583. delete gSingleton;
  584. return true;
  585. }
  586. void
  587. AliasDataBuilder::readAlias(
  588. UResourceBundle* alias,
  589. UniqueCharStrings* strings,
  590. LocalMemory<const char*>& types,
  591. LocalMemory<int32_t>& replacementIndexes,
  592. int32_t &length,
  593. void (*checkType)(const char* type),
  594. void (*checkReplacement)(const UChar* replacement),
  595. UErrorCode &status) {
  596. if (U_FAILURE(status)) {
  597. return;
  598. }
  599. length = ures_getSize(alias);
  600. const char** rawTypes = types.allocateInsteadAndCopy(length);
  601. if (rawTypes == nullptr) {
  602. status = U_MEMORY_ALLOCATION_ERROR;
  603. return;
  604. }
  605. int32_t* rawIndexes = replacementIndexes.allocateInsteadAndCopy(length);
  606. if (rawIndexes == nullptr) {
  607. status = U_MEMORY_ALLOCATION_ERROR;
  608. return;
  609. }
  610. for (int i = 0; U_SUCCESS(status) && ures_hasNext(alias); i++) {
  611. LocalUResourceBundlePointer res(
  612. ures_getNextResource(alias, nullptr, &status));
  613. const char* aliasFrom = ures_getKey(res.getAlias());
  614. const UChar* aliasTo =
  615. ures_getStringByKey(res.getAlias(), "replacement", nullptr, &status);
  616. if (U_FAILURE(status)) return;
  617. checkType(aliasFrom);
  618. checkReplacement(aliasTo);
  619. rawTypes[i] = aliasFrom;
  620. rawIndexes[i] = strings->add(aliasTo, status);
  621. }
  622. }
  623. /**
  624. * Read the languageAlias data from alias to strings+types+replacementIndexes.
  625. * Allocate length items for types, to store the type field. Allocate length
  626. * items for replacementIndexes, to store the index in the strings for the
  627. * replacement language.
  628. */
  629. void
  630. AliasDataBuilder::readLanguageAlias(
  631. UResourceBundle* alias,
  632. UniqueCharStrings* strings,
  633. LocalMemory<const char*>& types,
  634. LocalMemory<int32_t>& replacementIndexes,
  635. int32_t &length,
  636. UErrorCode &status)
  637. {
  638. return readAlias(
  639. alias, strings, types, replacementIndexes, length,
  640. #if U_DEBUG
  641. [](const char* type) {
  642. // Assert the aliasFrom only contains the following possibilities
  643. // language_REGION_variant
  644. // language_REGION
  645. // language_variant
  646. // language
  647. // und_variant
  648. Locale test(type);
  649. // Assert no script in aliasFrom
  650. U_ASSERT(test.getScript()[0] == '\0');
  651. // Assert when language is und, no REGION in aliasFrom.
  652. U_ASSERT(test.getLanguage()[0] != '\0' || test.getCountry()[0] == '\0');
  653. },
  654. #else
  655. [](const char*) {},
  656. #endif
  657. [](const UChar*) {}, status);
  658. }
  659. /**
  660. * Read the scriptAlias data from alias to strings+types+replacementIndexes.
  661. * Allocate length items for types, to store the type field. Allocate length
  662. * items for replacementIndexes, to store the index in the strings for the
  663. * replacement script.
  664. */
  665. void
  666. AliasDataBuilder::readScriptAlias(
  667. UResourceBundle* alias,
  668. UniqueCharStrings* strings,
  669. LocalMemory<const char*>& types,
  670. LocalMemory<int32_t>& replacementIndexes,
  671. int32_t &length,
  672. UErrorCode &status)
  673. {
  674. return readAlias(
  675. alias, strings, types, replacementIndexes, length,
  676. #if U_DEBUG
  677. [](const char* type) {
  678. U_ASSERT(uprv_strlen(type) == 4);
  679. },
  680. [](const UChar* replacement) {
  681. U_ASSERT(u_strlen(replacement) == 4);
  682. },
  683. #else
  684. [](const char*) {},
  685. [](const UChar*) { },
  686. #endif
  687. status);
  688. }
  689. /**
  690. * Read the territoryAlias data from alias to strings+types+replacementIndexes.
  691. * Allocate length items for types, to store the type field. Allocate length
  692. * items for replacementIndexes, to store the index in the strings for the
  693. * replacement regions.
  694. */
  695. void
  696. AliasDataBuilder::readTerritoryAlias(
  697. UResourceBundle* alias,
  698. UniqueCharStrings* strings,
  699. LocalMemory<const char*>& types,
  700. LocalMemory<int32_t>& replacementIndexes,
  701. int32_t &length,
  702. UErrorCode &status)
  703. {
  704. return readAlias(
  705. alias, strings, types, replacementIndexes, length,
  706. #if U_DEBUG
  707. [](const char* type) {
  708. U_ASSERT(uprv_strlen(type) == 2 || uprv_strlen(type) == 3);
  709. },
  710. #else
  711. [](const char*) {},
  712. #endif
  713. [](const UChar*) { },
  714. status);
  715. }
  716. /**
  717. * Read the variantAlias data from alias to strings+types+replacementIndexes.
  718. * Allocate length items for types, to store the type field. Allocate length
  719. * items for replacementIndexes, to store the index in the strings for the
  720. * replacement variant.
  721. */
  722. void
  723. AliasDataBuilder::readVariantAlias(
  724. UResourceBundle* alias,
  725. UniqueCharStrings* strings,
  726. LocalMemory<const char*>& types,
  727. LocalMemory<int32_t>& replacementIndexes,
  728. int32_t &length,
  729. UErrorCode &status)
  730. {
  731. return readAlias(
  732. alias, strings, types, replacementIndexes, length,
  733. #if U_DEBUG
  734. [](const char* type) {
  735. U_ASSERT(uprv_strlen(type) >= 4 && uprv_strlen(type) <= 8);
  736. U_ASSERT(uprv_strlen(type) != 4 ||
  737. (type[0] >= '0' && type[0] <= '9'));
  738. },
  739. [](const UChar* replacement) {
  740. int32_t len = u_strlen(replacement);
  741. U_ASSERT(len >= 4 && len <= 8);
  742. U_ASSERT(len != 4 ||
  743. (*replacement >= u'0' &&
  744. *replacement <= u'9'));
  745. },
  746. #else
  747. [](const char*) {},
  748. [](const UChar*) { },
  749. #endif
  750. status);
  751. }
  752. /**
  753. * Read the subdivisionAlias data from alias to strings+types+replacementIndexes.
  754. * Allocate length items for types, to store the type field. Allocate length
  755. * items for replacementIndexes, to store the index in the strings for the
  756. * replacement regions.
  757. */
  758. void
  759. AliasDataBuilder::readSubdivisionAlias(
  760. UResourceBundle* alias,
  761. UniqueCharStrings* strings,
  762. LocalMemory<const char*>& types,
  763. LocalMemory<int32_t>& replacementIndexes,
  764. int32_t &length,
  765. UErrorCode &status)
  766. {
  767. return readAlias(
  768. alias, strings, types, replacementIndexes, length,
  769. #if U_DEBUG
  770. [](const char* type) {
  771. U_ASSERT(uprv_strlen(type) >= 3 && uprv_strlen(type) <= 8);
  772. },
  773. #else
  774. [](const char*) {},
  775. #endif
  776. [](const UChar*) { },
  777. status);
  778. }
  779. /**
  780. * Initializes the alias data from the ICU resource bundles. The alias data
  781. * contains alias of language, country, script and variants.
  782. *
  783. * If the alias data has already loaded, then this method simply returns without
  784. * doing anything meaningful.
  785. */
  786. void U_CALLCONV
  787. AliasData::loadData(UErrorCode &status)
  788. {
  789. #ifdef LOCALE_CANONICALIZATION_DEBUG
  790. UDate start = uprv_getRawUTCtime();
  791. #endif // LOCALE_CANONICALIZATION_DEBUG
  792. ucln_common_registerCleanup(UCLN_COMMON_LOCALE_ALIAS, cleanup);
  793. AliasDataBuilder builder;
  794. gSingleton = builder.build(status);
  795. #ifdef LOCALE_CANONICALIZATION_DEBUG
  796. UDate end = uprv_getRawUTCtime();
  797. printf("AliasData::loadData took total %f ms\n", end - start);
  798. #endif // LOCALE_CANONICALIZATION_DEBUG
  799. }
  800. /**
  801. * Build the alias data from resources.
  802. */
  803. AliasData*
  804. AliasDataBuilder::build(UErrorCode &status) {
  805. if (U_FAILURE(status)) { return nullptr; }
  806. LocalUResourceBundlePointer metadata(
  807. ures_openDirect(nullptr, "metadata", &status));
  808. LocalUResourceBundlePointer metadataAlias(
  809. ures_getByKey(metadata.getAlias(), "alias", nullptr, &status));
  810. LocalUResourceBundlePointer languageAlias(
  811. ures_getByKey(metadataAlias.getAlias(), "language", nullptr, &status));
  812. LocalUResourceBundlePointer scriptAlias(
  813. ures_getByKey(metadataAlias.getAlias(), "script", nullptr, &status));
  814. LocalUResourceBundlePointer territoryAlias(
  815. ures_getByKey(metadataAlias.getAlias(), "territory", nullptr, &status));
  816. LocalUResourceBundlePointer variantAlias(
  817. ures_getByKey(metadataAlias.getAlias(), "variant", nullptr, &status));
  818. LocalUResourceBundlePointer subdivisionAlias(
  819. ures_getByKey(metadataAlias.getAlias(), "subdivision", nullptr, &status));
  820. if (U_FAILURE(status)) {
  821. return nullptr;
  822. }
  823. int32_t languagesLength = 0, scriptLength = 0, territoryLength = 0,
  824. variantLength = 0, subdivisionLength = 0;
  825. // Read the languageAlias into languageTypes, languageReplacementIndexes
  826. // and strings
  827. UniqueCharStrings strings(status);
  828. LocalMemory<const char*> languageTypes;
  829. LocalMemory<int32_t> languageReplacementIndexes;
  830. readLanguageAlias(languageAlias.getAlias(),
  831. &strings,
  832. languageTypes,
  833. languageReplacementIndexes,
  834. languagesLength,
  835. status);
  836. // Read the scriptAlias into scriptTypes, scriptReplacementIndexes
  837. // and strings
  838. LocalMemory<const char*> scriptTypes;
  839. LocalMemory<int32_t> scriptReplacementIndexes;
  840. readScriptAlias(scriptAlias.getAlias(),
  841. &strings,
  842. scriptTypes,
  843. scriptReplacementIndexes,
  844. scriptLength,
  845. status);
  846. // Read the territoryAlias into territoryTypes, territoryReplacementIndexes
  847. // and strings
  848. LocalMemory<const char*> territoryTypes;
  849. LocalMemory<int32_t> territoryReplacementIndexes;
  850. readTerritoryAlias(territoryAlias.getAlias(),
  851. &strings,
  852. territoryTypes,
  853. territoryReplacementIndexes,
  854. territoryLength, status);
  855. // Read the variantAlias into variantTypes, variantReplacementIndexes
  856. // and strings
  857. LocalMemory<const char*> variantTypes;
  858. LocalMemory<int32_t> variantReplacementIndexes;
  859. readVariantAlias(variantAlias.getAlias(),
  860. &strings,
  861. variantTypes,
  862. variantReplacementIndexes,
  863. variantLength, status);
  864. // Read the subdivisionAlias into subdivisionTypes, subdivisionReplacementIndexes
  865. // and strings
  866. LocalMemory<const char*> subdivisionTypes;
  867. LocalMemory<int32_t> subdivisionReplacementIndexes;
  868. readSubdivisionAlias(subdivisionAlias.getAlias(),
  869. &strings,
  870. subdivisionTypes,
  871. subdivisionReplacementIndexes,
  872. subdivisionLength, status);
  873. if (U_FAILURE(status)) {
  874. return nullptr;
  875. }
  876. // We can only use strings after freeze it.
  877. strings.freeze();
  878. // Build the languageMap from languageTypes & languageReplacementIndexes
  879. CharStringMap languageMap(490, status);
  880. for (int32_t i = 0; U_SUCCESS(status) && i < languagesLength; i++) {
  881. languageMap.put(languageTypes[i],
  882. strings.get(languageReplacementIndexes[i]),
  883. status);
  884. }
  885. // Build the scriptMap from scriptTypes & scriptReplacementIndexes
  886. CharStringMap scriptMap(1, status);
  887. for (int32_t i = 0; U_SUCCESS(status) && i < scriptLength; i++) {
  888. scriptMap.put(scriptTypes[i],
  889. strings.get(scriptReplacementIndexes[i]),
  890. status);
  891. }
  892. // Build the territoryMap from territoryTypes & territoryReplacementIndexes
  893. CharStringMap territoryMap(650, status);
  894. for (int32_t i = 0; U_SUCCESS(status) && i < territoryLength; i++) {
  895. territoryMap.put(territoryTypes[i],
  896. strings.get(territoryReplacementIndexes[i]),
  897. status);
  898. }
  899. // Build the variantMap from variantTypes & variantReplacementIndexes.
  900. CharStringMap variantMap(2, status);
  901. for (int32_t i = 0; U_SUCCESS(status) && i < variantLength; i++) {
  902. variantMap.put(variantTypes[i],
  903. strings.get(variantReplacementIndexes[i]),
  904. status);
  905. }
  906. // Build the subdivisionMap from subdivisionTypes & subdivisionReplacementIndexes.
  907. CharStringMap subdivisionMap(2, status);
  908. for (int32_t i = 0; U_SUCCESS(status) && i < subdivisionLength; i++) {
  909. subdivisionMap.put(subdivisionTypes[i],
  910. strings.get(subdivisionReplacementIndexes[i]),
  911. status);
  912. }
  913. if (U_FAILURE(status)) {
  914. return nullptr;
  915. }
  916. // copy hashtables
  917. auto *data = new AliasData(
  918. std::move(languageMap),
  919. std::move(scriptMap),
  920. std::move(territoryMap),
  921. std::move(variantMap),
  922. std::move(subdivisionMap),
  923. strings.orphanCharStrings());
  924. if (data == nullptr) {
  925. status = U_MEMORY_ALLOCATION_ERROR;
  926. }
  927. return data;
  928. }
  929. /**
  930. * A class that find the replacement values of locale fields by using AliasData.
  931. */
  932. class AliasReplacer {
  933. public:
  934. AliasReplacer(UErrorCode& status) :
  935. language(nullptr), script(nullptr), region(nullptr),
  936. extensions(nullptr),
  937. // store value in variants only once
  938. variants(nullptr,
  939. ([](UElement e1, UElement e2) -> UBool {
  940. return 0==uprv_strcmp((const char*)e1.pointer,
  941. (const char*)e2.pointer);}),
  942. status),
  943. data(nullptr) {
  944. }
  945. ~AliasReplacer() {
  946. }
  947. // Check the fields inside locale, if need to replace fields,
  948. // place the the replaced locale ID in out and return true.
  949. // Otherwise return false for no replacement or error.
  950. bool replace(
  951. const Locale& locale, CharString& out, UErrorCode& status);
  952. private:
  953. const char* language;
  954. const char* script;
  955. const char* region;
  956. const char* extensions;
  957. UVector variants;
  958. const AliasData* data;
  959. inline bool notEmpty(const char* str) {
  960. return str && str[0] != NULL_CHAR;
  961. }
  962. /**
  963. * If replacement is neither null nor empty and input is either null or empty,
  964. * return replacement.
  965. * If replacement is neither null nor empty but input is not empty, return input.
  966. * If replacement is either null or empty and type is either null or empty,
  967. * return input.
  968. * Otherwise return null.
  969. * replacement input type return
  970. * AAA nullptr * AAA
  971. * AAA BBB * BBB
  972. * nullptr || "" CCC nullptr CCC
  973. * nullptr || "" * DDD nullptr
  974. */
  975. inline const char* deleteOrReplace(
  976. const char* input, const char* type, const char* replacement) {
  977. return notEmpty(replacement) ?
  978. ((input == nullptr) ? replacement : input) :
  979. ((type == nullptr) ? input : nullptr);
  980. }
  981. inline bool same(const char* a, const char* b) {
  982. if (a == nullptr && b == nullptr) {
  983. return true;
  984. }
  985. if ((a == nullptr && b != nullptr) ||
  986. (a != nullptr && b == nullptr)) {
  987. return false;
  988. }
  989. return uprv_strcmp(a, b) == 0;
  990. }
  991. // Gather fields and generate locale ID into out.
  992. CharString& outputToString(CharString& out, UErrorCode& status);
  993. // Generate the lookup key.
  994. CharString& generateKey(const char* language, const char* region,
  995. const char* variant, CharString& out,
  996. UErrorCode& status);
  997. void parseLanguageReplacement(const char* replacement,
  998. const char*& replaceLanguage,
  999. const char*& replaceScript,
  1000. const char*& replaceRegion,
  1001. const char*& replaceVariant,
  1002. const char*& replaceExtensions,
  1003. UVector& toBeFreed,
  1004. UErrorCode& status);
  1005. // Replace by using languageAlias.
  1006. bool replaceLanguage(bool checkLanguage, bool checkRegion,
  1007. bool checkVariants, UVector& toBeFreed,
  1008. UErrorCode& status);
  1009. // Replace by using territoryAlias.
  1010. bool replaceTerritory(UVector& toBeFreed, UErrorCode& status);
  1011. // Replace by using scriptAlias.
  1012. bool replaceScript(UErrorCode& status);
  1013. // Replace by using variantAlias.
  1014. bool replaceVariant(UErrorCode& status);
  1015. // Replace by using subdivisionAlias.
  1016. bool replaceSubdivision(StringPiece subdivision,
  1017. CharString& output, UErrorCode& status);
  1018. // Replace transformed extensions.
  1019. bool replaceTransformedExtensions(
  1020. CharString& transformedExtensions, CharString& output, UErrorCode& status);
  1021. };
  1022. CharString&
  1023. AliasReplacer::generateKey(
  1024. const char* language, const char* region, const char* variant,
  1025. CharString& out, UErrorCode& status)
  1026. {
  1027. if (U_FAILURE(status)) { return out; }
  1028. out.append(language, status);
  1029. if (notEmpty(region)) {
  1030. out.append(SEP_CHAR, status)
  1031. .append(region, status);
  1032. }
  1033. if (notEmpty(variant)) {
  1034. out.append(SEP_CHAR, status)
  1035. .append(variant, status);
  1036. }
  1037. return out;
  1038. }
  1039. void
  1040. AliasReplacer::parseLanguageReplacement(
  1041. const char* replacement,
  1042. const char*& replacedLanguage,
  1043. const char*& replacedScript,
  1044. const char*& replacedRegion,
  1045. const char*& replacedVariant,
  1046. const char*& replacedExtensions,
  1047. UVector& toBeFreed,
  1048. UErrorCode& status)
  1049. {
  1050. if (U_FAILURE(status)) {
  1051. return;
  1052. }
  1053. replacedScript = replacedRegion = replacedVariant
  1054. = replacedExtensions = nullptr;
  1055. if (uprv_strchr(replacement, '_') == nullptr) {
  1056. replacedLanguage = replacement;
  1057. // reach the end, just return it.
  1058. return;
  1059. }
  1060. // We have multiple field so we have to allocate and parse
  1061. CharString* str =
  1062. new CharString(replacement, static_cast<int32_t>(uprv_strlen(replacement)), status);
  1063. LocalPointer<CharString> lpStr(str, status);
  1064. toBeFreed.adoptElement(lpStr.orphan(), status);
  1065. if (U_FAILURE(status)) {
  1066. return;
  1067. }
  1068. char* data = str->data();
  1069. replacedLanguage = (const char*) data;
  1070. char* endOfField = uprv_strchr(data, '_');
  1071. *endOfField = '\0'; // null terminiate it.
  1072. endOfField++;
  1073. const char* start = endOfField;
  1074. endOfField = const_cast<char*>(uprv_strchr(start, '_'));
  1075. size_t len = 0;
  1076. if (endOfField == nullptr) {
  1077. len = uprv_strlen(start);
  1078. } else {
  1079. len = endOfField - start;
  1080. *endOfField = '\0'; // null terminiate it.
  1081. }
  1082. if (len == 4 && uprv_isASCIILetter(*start)) {
  1083. // Got a script
  1084. replacedScript = start;
  1085. if (endOfField == nullptr) {
  1086. return;
  1087. }
  1088. start = endOfField++;
  1089. endOfField = const_cast<char*>(uprv_strchr(start, '_'));
  1090. if (endOfField == nullptr) {
  1091. len = uprv_strlen(start);
  1092. } else {
  1093. len = endOfField - start;
  1094. *endOfField = '\0'; // null terminiate it.
  1095. }
  1096. }
  1097. if (len >= 2 && len <= 3) {
  1098. // Got a region
  1099. replacedRegion = start;
  1100. if (endOfField == nullptr) {
  1101. return;
  1102. }
  1103. start = endOfField++;
  1104. endOfField = const_cast<char*>(uprv_strchr(start, '_'));
  1105. if (endOfField == nullptr) {
  1106. len = uprv_strlen(start);
  1107. } else {
  1108. len = endOfField - start;
  1109. *endOfField = '\0'; // null terminiate it.
  1110. }
  1111. }
  1112. if (len >= 4) {
  1113. // Got a variant
  1114. replacedVariant = start;
  1115. if (endOfField == nullptr) {
  1116. return;
  1117. }
  1118. start = endOfField++;
  1119. }
  1120. replacedExtensions = start;
  1121. }
  1122. bool
  1123. AliasReplacer::replaceLanguage(
  1124. bool checkLanguage, bool checkRegion,
  1125. bool checkVariants, UVector& toBeFreed, UErrorCode& status)
  1126. {
  1127. if (U_FAILURE(status)) {
  1128. return false;
  1129. }
  1130. if ( (checkRegion && region == nullptr) ||
  1131. (checkVariants && variants.size() == 0)) {
  1132. // Nothing to search.
  1133. return false;
  1134. }
  1135. int32_t variant_size = checkVariants ? variants.size() : 1;
  1136. // Since we may have more than one variant, we need to loop through them.
  1137. const char* searchLanguage = checkLanguage ? language : "und";
  1138. const char* searchRegion = checkRegion ? region : nullptr;
  1139. const char* searchVariant = nullptr;
  1140. for (int32_t variant_index = 0;
  1141. variant_index < variant_size;
  1142. variant_index++) {
  1143. if (checkVariants) {
  1144. U_ASSERT(variant_index < variant_size);
  1145. searchVariant = static_cast<const char*>(variants.elementAt(variant_index));
  1146. }
  1147. if (searchVariant != nullptr && uprv_strlen(searchVariant) < 4) {
  1148. // Do not consider ill-formed variant subtag.
  1149. searchVariant = nullptr;
  1150. }
  1151. CharString typeKey;
  1152. generateKey(searchLanguage, searchRegion, searchVariant, typeKey,
  1153. status);
  1154. if (U_FAILURE(status)) {
  1155. return false;
  1156. }
  1157. const char *replacement = data->languageMap().get(typeKey.data());
  1158. if (replacement == nullptr) {
  1159. // Found no replacement data.
  1160. continue;
  1161. }
  1162. const char* replacedLanguage = nullptr;
  1163. const char* replacedScript = nullptr;
  1164. const char* replacedRegion = nullptr;
  1165. const char* replacedVariant = nullptr;
  1166. const char* replacedExtensions = nullptr;
  1167. parseLanguageReplacement(replacement,
  1168. replacedLanguage,
  1169. replacedScript,
  1170. replacedRegion,
  1171. replacedVariant,
  1172. replacedExtensions,
  1173. toBeFreed,
  1174. status);
  1175. replacedLanguage =
  1176. (replacedLanguage != nullptr && uprv_strcmp(replacedLanguage, "und") == 0) ?
  1177. language : replacedLanguage;
  1178. replacedScript = deleteOrReplace(script, nullptr, replacedScript);
  1179. replacedRegion = deleteOrReplace(region, searchRegion, replacedRegion);
  1180. replacedVariant = deleteOrReplace(
  1181. searchVariant, searchVariant, replacedVariant);
  1182. if ( same(language, replacedLanguage) &&
  1183. same(script, replacedScript) &&
  1184. same(region, replacedRegion) &&
  1185. same(searchVariant, replacedVariant) &&
  1186. replacedExtensions == nullptr) {
  1187. // Replacement produce no changes.
  1188. continue;
  1189. }
  1190. language = replacedLanguage;
  1191. region = replacedRegion;
  1192. script = replacedScript;
  1193. if (searchVariant != nullptr) {
  1194. if (notEmpty(replacedVariant)) {
  1195. variants.setElementAt((void*)replacedVariant, variant_index);
  1196. } else {
  1197. variants.removeElementAt(variant_index);
  1198. }
  1199. }
  1200. if (replacedExtensions != nullptr) {
  1201. // DO NOTHING
  1202. // UTS35 does not specify what should we do if we have extensions in the
  1203. // replacement. Currently we know only the following 4 "BCP47 LegacyRules" have
  1204. // extensions in them languageAlias:
  1205. // i_default => en_x_i_default
  1206. // i_enochian => und_x_i_enochian
  1207. // i_mingo => see_x_i_mingo
  1208. // zh_min => nan_x_zh_min
  1209. // But all of them are already changed by code inside ultag_parse() before
  1210. // hitting this code.
  1211. }
  1212. // Something changed by language alias data.
  1213. return true;
  1214. }
  1215. // Nothing changed by language alias data.
  1216. return false;
  1217. }
  1218. bool
  1219. AliasReplacer::replaceTerritory(UVector& toBeFreed, UErrorCode& status)
  1220. {
  1221. if (U_FAILURE(status)) {
  1222. return false;
  1223. }
  1224. if (region == nullptr) {
  1225. // No region to search.
  1226. return false;
  1227. }
  1228. const char *replacement = data->territoryMap().get(region);
  1229. if (replacement == nullptr) {
  1230. // Found no replacement data for this region.
  1231. return false;
  1232. }
  1233. const char* replacedRegion = replacement;
  1234. const char* firstSpace = uprv_strchr(replacement, ' ');
  1235. if (firstSpace != nullptr) {
  1236. // If there are are more than one region in the replacement.
  1237. // We need to check which one match based on the language.
  1238. // Cannot use nullptr for language because that will construct
  1239. // the default locale, in that case, use "und" to get the correct
  1240. // locale.
  1241. Locale l = LocaleBuilder()
  1242. .setLanguage(language == nullptr ? "und" : language)
  1243. .setScript(script)
  1244. .build(status);
  1245. l.addLikelySubtags(status);
  1246. const char* likelyRegion = l.getCountry();
  1247. LocalPointer<CharString> item;
  1248. if (likelyRegion != nullptr && uprv_strlen(likelyRegion) > 0) {
  1249. size_t len = uprv_strlen(likelyRegion);
  1250. const char* foundInReplacement = uprv_strstr(replacement,
  1251. likelyRegion);
  1252. if (foundInReplacement != nullptr) {
  1253. // Assuming the case there are no three letter region code in
  1254. // the replacement of territoryAlias
  1255. U_ASSERT(foundInReplacement == replacement ||
  1256. *(foundInReplacement-1) == ' ');
  1257. U_ASSERT(foundInReplacement[len] == ' ' ||
  1258. foundInReplacement[len] == '\0');
  1259. item.adoptInsteadAndCheckErrorCode(
  1260. new CharString(foundInReplacement, static_cast<int32_t>(len), status), status);
  1261. }
  1262. }
  1263. if (item.isNull() && U_SUCCESS(status)) {
  1264. item.adoptInsteadAndCheckErrorCode(
  1265. new CharString(replacement,
  1266. static_cast<int32_t>(firstSpace - replacement), status), status);
  1267. }
  1268. if (U_FAILURE(status)) { return false; }
  1269. replacedRegion = item->data();
  1270. toBeFreed.adoptElement(item.orphan(), status);
  1271. if (U_FAILURE(status)) { return false; }
  1272. }
  1273. U_ASSERT(!same(region, replacedRegion));
  1274. region = replacedRegion;
  1275. // The region is changed by data in territory alias.
  1276. return true;
  1277. }
  1278. bool
  1279. AliasReplacer::replaceScript(UErrorCode& status)
  1280. {
  1281. if (U_FAILURE(status)) {
  1282. return false;
  1283. }
  1284. if (script == nullptr) {
  1285. // No script to search.
  1286. return false;
  1287. }
  1288. const char *replacement = data->scriptMap().get(script);
  1289. if (replacement == nullptr) {
  1290. // Found no replacement data for this script.
  1291. return false;
  1292. }
  1293. U_ASSERT(!same(script, replacement));
  1294. script = replacement;
  1295. // The script is changed by data in script alias.
  1296. return true;
  1297. }
  1298. bool
  1299. AliasReplacer::replaceVariant(UErrorCode& status)
  1300. {
  1301. if (U_FAILURE(status)) {
  1302. return false;
  1303. }
  1304. // Since we may have more than one variant, we need to loop through them.
  1305. for (int32_t i = 0; i < variants.size(); i++) {
  1306. const char* variant = static_cast<const char*>(variants.elementAt(i));
  1307. const char *replacement = data->variantMap().get(variant);
  1308. if (replacement == nullptr) {
  1309. // Found no replacement data for this variant.
  1310. continue;
  1311. }
  1312. U_ASSERT((uprv_strlen(replacement) >= 5 &&
  1313. uprv_strlen(replacement) <= 8) ||
  1314. (uprv_strlen(replacement) == 4 &&
  1315. replacement[0] >= '0' &&
  1316. replacement[0] <= '9'));
  1317. if (!same(variant, replacement)) {
  1318. variants.setElementAt((void*)replacement, i);
  1319. // Special hack to handle hepburn-heploc => alalc97
  1320. if (uprv_strcmp(variant, "heploc") == 0) {
  1321. for (int32_t j = 0; j < variants.size(); j++) {
  1322. if (uprv_strcmp((const char*)(variants.elementAt(j)),
  1323. "hepburn") == 0) {
  1324. variants.removeElementAt(j);
  1325. }
  1326. }
  1327. }
  1328. return true;
  1329. }
  1330. }
  1331. return false;
  1332. }
  1333. bool
  1334. AliasReplacer::replaceSubdivision(
  1335. StringPiece subdivision, CharString& output, UErrorCode& status)
  1336. {
  1337. if (U_FAILURE(status)) {
  1338. return false;
  1339. }
  1340. const char *replacement = data->subdivisionMap().get(subdivision.data());
  1341. if (replacement != nullptr) {
  1342. const char* firstSpace = uprv_strchr(replacement, ' ');
  1343. // Found replacement data for this subdivision.
  1344. size_t len = (firstSpace != nullptr) ?
  1345. (firstSpace - replacement) : uprv_strlen(replacement);
  1346. if (2 <= len && len <= 8) {
  1347. output.append(replacement, static_cast<int32_t>(len), status);
  1348. if (2 == len) {
  1349. // Add 'zzzz' based on changes to UTS #35 for CLDR-14312.
  1350. output.append("zzzz", 4, status);
  1351. }
  1352. }
  1353. return true;
  1354. }
  1355. return false;
  1356. }
  1357. bool
  1358. AliasReplacer::replaceTransformedExtensions(
  1359. CharString& transformedExtensions, CharString& output, UErrorCode& status)
  1360. {
  1361. // The content of the transformedExtensions will be modified in this
  1362. // function to NUL-terminating (tkey-tvalue) pairs.
  1363. if (U_FAILURE(status)) {
  1364. return false;
  1365. }
  1366. int32_t len = transformedExtensions.length();
  1367. const char* str = transformedExtensions.data();
  1368. const char* tkey = ultag_getTKeyStart(str);
  1369. int32_t tlangLen = (tkey == str) ? 0 :
  1370. ((tkey == nullptr) ? len : static_cast<int32_t>((tkey - str - 1)));
  1371. if (tlangLen > 0) {
  1372. Locale tlang = LocaleBuilder()
  1373. .setLanguageTag(StringPiece(str, tlangLen))
  1374. .build(status);
  1375. tlang.canonicalize(status);
  1376. output = tlang.toLanguageTag<CharString>(status);
  1377. if (U_FAILURE(status)) {
  1378. return false;
  1379. }
  1380. T_CString_toLowerCase(output.data());
  1381. }
  1382. if (tkey != nullptr) {
  1383. // We need to sort the tfields by tkey
  1384. UVector tfields(status);
  1385. if (U_FAILURE(status)) {
  1386. return false;
  1387. }
  1388. do {
  1389. const char* tvalue = uprv_strchr(tkey, '-');
  1390. if (tvalue == nullptr) {
  1391. status = U_ILLEGAL_ARGUMENT_ERROR;
  1392. return false;
  1393. }
  1394. const char* nextTKey = ultag_getTKeyStart(tvalue);
  1395. if (nextTKey != nullptr) {
  1396. *const_cast<char*>(nextTKey - 1) = '\0'; // NUL terminate tvalue
  1397. }
  1398. tfields.insertElementAt((void*)tkey, tfields.size(), status);
  1399. if (U_FAILURE(status)) {
  1400. return false;
  1401. }
  1402. tkey = nextTKey;
  1403. } while (tkey != nullptr);
  1404. tfields.sort([](UElement e1, UElement e2) -> int32_t {
  1405. return uprv_strcmp((const char*)e1.pointer, (const char*)e2.pointer);
  1406. }, status);
  1407. for (int32_t i = 0; i < tfields.size(); i++) {
  1408. if (output.length() > 0) {
  1409. output.append('-', status);
  1410. }
  1411. const char* tfield = static_cast<const char*>(tfields.elementAt(i));
  1412. const char* tvalue = uprv_strchr(tfield, '-');
  1413. if (tvalue == nullptr) {
  1414. status = U_ILLEGAL_ARGUMENT_ERROR;
  1415. return false;
  1416. }
  1417. // Split the "tkey-tvalue" pair string so that we can canonicalize the tvalue.
  1418. *const_cast<char*>(tvalue++) = '\0'; // NUL terminate tkey
  1419. output.append(tfield, status).append('-', status);
  1420. std::optional<std::string_view> bcpTValue = ulocimp_toBcpType(tfield, tvalue);
  1421. output.append(bcpTValue.has_value() ? *bcpTValue : tvalue, status);
  1422. }
  1423. }
  1424. if (U_FAILURE(status)) {
  1425. return false;
  1426. }
  1427. return true;
  1428. }
  1429. CharString&
  1430. AliasReplacer::outputToString(
  1431. CharString& out, UErrorCode& status)
  1432. {
  1433. if (U_FAILURE(status)) { return out; }
  1434. out.append(language, status);
  1435. if (notEmpty(script)) {
  1436. out.append(SEP_CHAR, status)
  1437. .append(script, status);
  1438. }
  1439. if (notEmpty(region)) {
  1440. out.append(SEP_CHAR, status)
  1441. .append(region, status);
  1442. }
  1443. if (variants.size() > 0) {
  1444. if (!notEmpty(script) && !notEmpty(region)) {
  1445. out.append(SEP_CHAR, status);
  1446. }
  1447. variants.sort([](UElement e1, UElement e2) -> int32_t {
  1448. return uprv_strcmp((const char*)e1.pointer, (const char*)e2.pointer);
  1449. }, status);
  1450. int32_t variantsStart = out.length();
  1451. for (int32_t i = 0; i < variants.size(); i++) {
  1452. out.append(SEP_CHAR, status)
  1453. .append(static_cast<const char*>(variants.elementAt(i)),
  1454. status);
  1455. }
  1456. T_CString_toUpperCase(out.data() + variantsStart);
  1457. }
  1458. if (notEmpty(extensions)) {
  1459. CharString tmp("und_", status);
  1460. tmp.append(extensions, status);
  1461. Locale tmpLocale(tmp.data());
  1462. // only support x extension inside CLDR for now.
  1463. U_ASSERT(extensions[0] == 'x');
  1464. out.append(tmpLocale.getName() + 1, status);
  1465. }
  1466. return out;
  1467. }
  1468. bool
  1469. AliasReplacer::replace(const Locale& locale, CharString& out, UErrorCode& status)
  1470. {
  1471. data = AliasData::singleton(status);
  1472. if (U_FAILURE(status)) {
  1473. return false;
  1474. }
  1475. U_ASSERT(data != nullptr);
  1476. out.clear();
  1477. language = locale.getLanguage();
  1478. if (!notEmpty(language)) {
  1479. language = nullptr;
  1480. }
  1481. script = locale.getScript();
  1482. if (!notEmpty(script)) {
  1483. script = nullptr;
  1484. }
  1485. region = locale.getCountry();
  1486. if (!notEmpty(region)) {
  1487. region = nullptr;
  1488. }
  1489. const char* variantsStr = locale.getVariant();
  1490. CharString variantsBuff(variantsStr, -1, status);
  1491. if (!variantsBuff.isEmpty()) {
  1492. if (U_FAILURE(status)) { return false; }
  1493. char* start = variantsBuff.data();
  1494. T_CString_toLowerCase(start);
  1495. char* end;
  1496. while ((end = uprv_strchr(start, SEP_CHAR)) != nullptr &&
  1497. U_SUCCESS(status)) {
  1498. *end = NULL_CHAR; // null terminate inside variantsBuff
  1499. // do not add "" or duplicate data to variants
  1500. if (*start && !variants.contains(start)) {
  1501. variants.addElement(start, status);
  1502. }
  1503. start = end + 1;
  1504. }
  1505. // do not add "" or duplicate data to variants
  1506. if (*start && !variants.contains(start)) {
  1507. variants.addElement(start, status);
  1508. }
  1509. }
  1510. if (U_FAILURE(status)) { return false; }
  1511. // Sort the variants
  1512. variants.sort([](UElement e1, UElement e2) -> int32_t {
  1513. return uprv_strcmp((const char*)e1.pointer, (const char*)e2.pointer);
  1514. }, status);
  1515. // A changed count to assert when loop too many times.
  1516. int changed = 0;
  1517. // A UVector to to hold CharString allocated by the replace* method
  1518. // and freed when out of scope from his function.
  1519. UVector stringsToBeFreed([](void *obj) { delete static_cast<CharString*>(obj); },
  1520. nullptr, 10, status);
  1521. while (U_SUCCESS(status)) {
  1522. // Something wrong with the data cause looping here more than 10 times
  1523. // already.
  1524. U_ASSERT(changed < 5);
  1525. // From observation of key in data/misc/metadata.txt
  1526. // we know currently we only need to search in the following combination
  1527. // of fields for type in languageAlias:
  1528. // * lang_region_variant
  1529. // * lang_region
  1530. // * lang_variant
  1531. // * lang
  1532. // * und_variant
  1533. // This assumption is ensured by the U_ASSERT in readLanguageAlias
  1534. //
  1535. // lang REGION variant
  1536. if ( replaceLanguage(true, true, true, stringsToBeFreed, status) ||
  1537. replaceLanguage(true, true, false, stringsToBeFreed, status) ||
  1538. replaceLanguage(true, false, true, stringsToBeFreed, status) ||
  1539. replaceLanguage(true, false, false, stringsToBeFreed, status) ||
  1540. replaceLanguage(false,false, true, stringsToBeFreed, status) ||
  1541. replaceTerritory(stringsToBeFreed, status) ||
  1542. replaceScript(status) ||
  1543. replaceVariant(status)) {
  1544. // Some values in data is changed, try to match from the beginning
  1545. // again.
  1546. changed++;
  1547. continue;
  1548. }
  1549. // Nothing changed. Break out.
  1550. break;
  1551. } // while(1)
  1552. if (U_FAILURE(status)) { return false; }
  1553. // Nothing changed and we know the order of the variants are not change
  1554. // because we have no variant or only one.
  1555. const char* extensionsStr = locale_getKeywordsStart(locale.getName());
  1556. if (changed == 0 && variants.size() <= 1 && extensionsStr == nullptr) {
  1557. return false;
  1558. }
  1559. outputToString(out, status);
  1560. if (U_FAILURE(status)) {
  1561. return false;
  1562. }
  1563. if (extensionsStr != nullptr) {
  1564. changed = 0;
  1565. Locale temp(locale);
  1566. LocalPointer<icu::StringEnumeration> iter(locale.createKeywords(status));
  1567. if (U_SUCCESS(status) && !iter.isNull()) {
  1568. const char* key;
  1569. while ((key = iter->next(nullptr, status)) != nullptr) {
  1570. if (uprv_strcmp("sd", key) == 0 || uprv_strcmp("rg", key) == 0 ||
  1571. uprv_strcmp("t", key) == 0) {
  1572. auto value = locale.getKeywordValue<CharString>(key, status);
  1573. if (U_FAILURE(status)) {
  1574. status = U_ZERO_ERROR;
  1575. continue;
  1576. }
  1577. CharString replacement;
  1578. if (uprv_strlen(key) == 2) {
  1579. if (replaceSubdivision(value.toStringPiece(), replacement, status)) {
  1580. changed++;
  1581. temp.setKeywordValue(key, replacement.data(), status);
  1582. }
  1583. } else {
  1584. U_ASSERT(uprv_strcmp(key, "t") == 0);
  1585. if (replaceTransformedExtensions(value, replacement, status)) {
  1586. changed++;
  1587. temp.setKeywordValue(key, replacement.data(), status);
  1588. }
  1589. }
  1590. if (U_FAILURE(status)) {
  1591. return false;
  1592. }
  1593. }
  1594. }
  1595. }
  1596. if (changed != 0) {
  1597. extensionsStr = locale_getKeywordsStart(temp.getName());
  1598. }
  1599. out.append(extensionsStr, status);
  1600. }
  1601. if (U_FAILURE(status)) {
  1602. return false;
  1603. }
  1604. // If the tag is not changed, return.
  1605. if (uprv_strcmp(out.data(), locale.getName()) == 0) {
  1606. out.clear();
  1607. return false;
  1608. }
  1609. return true;
  1610. }
  1611. // Return true if the locale is changed during canonicalization.
  1612. // The replaced value then will be put into out.
  1613. bool
  1614. canonicalizeLocale(const Locale& locale, CharString& out, UErrorCode& status)
  1615. {
  1616. if (U_FAILURE(status)) { return false; }
  1617. AliasReplacer replacer(status);
  1618. return replacer.replace(locale, out, status);
  1619. }
  1620. // Function to optimize for known cases without so we can skip the loading
  1621. // of resources in the startup time until we really need it.
  1622. bool
  1623. isKnownCanonicalizedLocale(const char* locale, UErrorCode& status)
  1624. {
  1625. if (U_FAILURE(status)) { return false; }
  1626. if ( uprv_strcmp(locale, "c") == 0 ||
  1627. uprv_strcmp(locale, "en") == 0 ||
  1628. uprv_strcmp(locale, "en_US") == 0) {
  1629. return true;
  1630. }
  1631. // common well-known Canonicalized.
  1632. umtx_initOnce(gKnownCanonicalizedInitOnce,
  1633. &loadKnownCanonicalized, status);
  1634. if (U_FAILURE(status)) {
  1635. return false;
  1636. }
  1637. U_ASSERT(gKnownCanonicalized != nullptr);
  1638. return uhash_geti(gKnownCanonicalized, locale) != 0;
  1639. }
  1640. } // namespace
  1641. U_NAMESPACE_END
  1642. // Function for testing.
  1643. U_EXPORT const char* const*
  1644. ulocimp_getKnownCanonicalizedLocaleForTest(int32_t& length)
  1645. {
  1646. U_NAMESPACE_USE
  1647. length = UPRV_LENGTHOF(KNOWN_CANONICALIZED);
  1648. return KNOWN_CANONICALIZED;
  1649. }
  1650. // Function for testing.
  1651. U_EXPORT bool
  1652. ulocimp_isCanonicalizedLocaleForTest(const char* localeName)
  1653. {
  1654. U_NAMESPACE_USE
  1655. Locale l(localeName);
  1656. UErrorCode status = U_ZERO_ERROR;
  1657. CharString temp;
  1658. return !canonicalizeLocale(l, temp, status) && U_SUCCESS(status);
  1659. }
  1660. U_NAMESPACE_BEGIN
  1661. /*This function initializes a Locale from a C locale ID*/
  1662. Locale& Locale::init(const char* localeID, UBool canonicalize)
  1663. {
  1664. fIsBogus = false;
  1665. /* Free our current storage */
  1666. if ((baseName != fullName) && (baseName != fullNameBuffer)) {
  1667. uprv_free(baseName);
  1668. }
  1669. baseName = nullptr;
  1670. if(fullName != fullNameBuffer) {
  1671. uprv_free(fullName);
  1672. fullName = fullNameBuffer;
  1673. }
  1674. // not a loop:
  1675. // just an easy way to have a common error-exit
  1676. // without goto and without another function
  1677. do {
  1678. char *separator;
  1679. char *field[5] = {nullptr};
  1680. int32_t fieldLen[5] = {0};
  1681. int32_t fieldIdx;
  1682. int32_t variantField;
  1683. int32_t length;
  1684. UErrorCode err;
  1685. if(localeID == nullptr) {
  1686. // not an error, just set the default locale
  1687. return *this = getDefault();
  1688. }
  1689. /* preset all fields to empty */
  1690. language[0] = script[0] = country[0] = 0;
  1691. // "canonicalize" the locale ID to ICU/Java format
  1692. err = U_ZERO_ERROR;
  1693. length = canonicalize ?
  1694. uloc_canonicalize(localeID, fullName, sizeof(fullNameBuffer), &err) :
  1695. uloc_getName(localeID, fullName, sizeof(fullNameBuffer), &err);
  1696. if (err == U_BUFFER_OVERFLOW_ERROR || length >= static_cast<int32_t>(sizeof(fullNameBuffer))) {
  1697. U_ASSERT(baseName == nullptr);
  1698. /*Go to heap for the fullName if necessary*/
  1699. char* newFullName = static_cast<char*>(uprv_malloc(sizeof(char) * (length + 1)));
  1700. if (newFullName == nullptr) {
  1701. break; // error: out of memory
  1702. }
  1703. fullName = newFullName;
  1704. err = U_ZERO_ERROR;
  1705. length = canonicalize ?
  1706. uloc_canonicalize(localeID, fullName, length+1, &err) :
  1707. uloc_getName(localeID, fullName, length+1, &err);
  1708. }
  1709. if(U_FAILURE(err) || err == U_STRING_NOT_TERMINATED_WARNING) {
  1710. /* should never occur */
  1711. break;
  1712. }
  1713. variantBegin = length;
  1714. /* after uloc_getName/canonicalize() we know that only '_' are separators */
  1715. /* But _ could also appeared in timezone such as "en@timezone=America/Los_Angeles" */
  1716. separator = field[0] = fullName;
  1717. fieldIdx = 1;
  1718. char* at = uprv_strchr(fullName, '@');
  1719. while ((separator = uprv_strchr(field[fieldIdx-1], SEP_CHAR)) != nullptr &&
  1720. fieldIdx < UPRV_LENGTHOF(field)-1 &&
  1721. (at == nullptr || separator < at)) {
  1722. field[fieldIdx] = separator + 1;
  1723. fieldLen[fieldIdx - 1] = static_cast<int32_t>(separator - field[fieldIdx - 1]);
  1724. fieldIdx++;
  1725. }
  1726. // variant may contain @foo or .foo POSIX cruft; remove it
  1727. separator = uprv_strchr(field[fieldIdx-1], '@');
  1728. char* sep2 = uprv_strchr(field[fieldIdx-1], '.');
  1729. if (separator!=nullptr || sep2!=nullptr) {
  1730. if (separator==nullptr || (sep2!=nullptr && separator > sep2)) {
  1731. separator = sep2;
  1732. }
  1733. fieldLen[fieldIdx - 1] = static_cast<int32_t>(separator - field[fieldIdx - 1]);
  1734. } else {
  1735. fieldLen[fieldIdx - 1] = length - static_cast<int32_t>(field[fieldIdx - 1] - fullName);
  1736. }
  1737. if (fieldLen[0] >= static_cast<int32_t>(sizeof(language)))
  1738. {
  1739. break; // error: the language field is too long
  1740. }
  1741. variantField = 1; /* Usually the 2nd one, except when a script or country is also used. */
  1742. if (fieldLen[0] > 0) {
  1743. /* We have a language */
  1744. uprv_memcpy(language, fullName, fieldLen[0]);
  1745. language[fieldLen[0]] = 0;
  1746. }
  1747. if (fieldLen[1] == 4 && uprv_isASCIILetter(field[1][0]) &&
  1748. uprv_isASCIILetter(field[1][1]) && uprv_isASCIILetter(field[1][2]) &&
  1749. uprv_isASCIILetter(field[1][3])) {
  1750. /* We have at least a script */
  1751. uprv_memcpy(script, field[1], fieldLen[1]);
  1752. script[fieldLen[1]] = 0;
  1753. variantField++;
  1754. }
  1755. if (fieldLen[variantField] == 2 || fieldLen[variantField] == 3) {
  1756. /* We have a country */
  1757. uprv_memcpy(country, field[variantField], fieldLen[variantField]);
  1758. country[fieldLen[variantField]] = 0;
  1759. variantField++;
  1760. } else if (fieldLen[variantField] == 0) {
  1761. variantField++; /* script or country empty but variant in next field (i.e. en__POSIX) */
  1762. }
  1763. if (fieldLen[variantField] > 0) {
  1764. /* We have a variant */
  1765. variantBegin = static_cast<int32_t>(field[variantField] - fullName);
  1766. }
  1767. err = U_ZERO_ERROR;
  1768. initBaseName(err);
  1769. if (U_FAILURE(err)) {
  1770. break;
  1771. }
  1772. if (canonicalize) {
  1773. if (!isKnownCanonicalizedLocale(fullName, err)) {
  1774. CharString replaced;
  1775. // Not sure it is already canonicalized
  1776. if (canonicalizeLocale(*this, replaced, err)) {
  1777. U_ASSERT(U_SUCCESS(err));
  1778. // If need replacement, call init again.
  1779. init(replaced.data(), false);
  1780. }
  1781. if (U_FAILURE(err)) {
  1782. break;
  1783. }
  1784. }
  1785. } // if (canonicalize) {
  1786. // successful end of init()
  1787. return *this;
  1788. } while(0); /*loop doesn't iterate*/
  1789. // when an error occurs, then set this object to "bogus" (there is no UErrorCode here)
  1790. setToBogus();
  1791. return *this;
  1792. }
  1793. /*
  1794. * Set up the base name.
  1795. * If there are no key words, it's exactly the full name.
  1796. * If key words exist, it's the full name truncated at the '@' character.
  1797. * Need to set up both at init() and after setting a keyword.
  1798. */
  1799. void
  1800. Locale::initBaseName(UErrorCode &status) {
  1801. if (U_FAILURE(status)) {
  1802. return;
  1803. }
  1804. U_ASSERT(baseName==nullptr || baseName==fullName);
  1805. const char *atPtr = uprv_strchr(fullName, '@');
  1806. const char *eqPtr = uprv_strchr(fullName, '=');
  1807. if (atPtr && eqPtr && atPtr < eqPtr) {
  1808. // Key words exist.
  1809. int32_t baseNameLength = static_cast<int32_t>(atPtr - fullName);
  1810. char* newBaseName = static_cast<char*>(uprv_malloc(baseNameLength + 1));
  1811. if (newBaseName == nullptr) {
  1812. status = U_MEMORY_ALLOCATION_ERROR;
  1813. return;
  1814. }
  1815. baseName = newBaseName;
  1816. uprv_strncpy(baseName, fullName, baseNameLength);
  1817. baseName[baseNameLength] = 0;
  1818. // The original computation of variantBegin leaves it equal to the length
  1819. // of fullName if there is no variant. It should instead be
  1820. // the length of the baseName.
  1821. if (variantBegin > baseNameLength) {
  1822. variantBegin = baseNameLength;
  1823. }
  1824. } else {
  1825. baseName = fullName;
  1826. }
  1827. }
  1828. int32_t
  1829. Locale::hashCode() const
  1830. {
  1831. return ustr_hashCharsN(fullName, static_cast<int32_t>(uprv_strlen(fullName)));
  1832. }
  1833. void
  1834. Locale::setToBogus() {
  1835. /* Free our current storage */
  1836. if((baseName != fullName) && (baseName != fullNameBuffer)) {
  1837. uprv_free(baseName);
  1838. }
  1839. baseName = nullptr;
  1840. if(fullName != fullNameBuffer) {
  1841. uprv_free(fullName);
  1842. fullName = fullNameBuffer;
  1843. }
  1844. *fullNameBuffer = 0;
  1845. *language = 0;
  1846. *script = 0;
  1847. *country = 0;
  1848. fIsBogus = true;
  1849. variantBegin = 0;
  1850. }
  1851. const Locale& U_EXPORT2
  1852. Locale::getDefault()
  1853. {
  1854. {
  1855. Mutex lock(&gDefaultLocaleMutex);
  1856. if (gDefaultLocale != nullptr) {
  1857. return *gDefaultLocale;
  1858. }
  1859. }
  1860. UErrorCode status = U_ZERO_ERROR;
  1861. return *locale_set_default_internal(nullptr, status);
  1862. }
  1863. void U_EXPORT2
  1864. Locale::setDefault( const Locale& newLocale,
  1865. UErrorCode& status)
  1866. {
  1867. if (U_FAILURE(status)) {
  1868. return;
  1869. }
  1870. /* Set the default from the full name string of the supplied locale.
  1871. * This is a convenient way to access the default locale caching mechanisms.
  1872. */
  1873. const char *localeID = newLocale.getName();
  1874. locale_set_default_internal(localeID, status);
  1875. }
  1876. void
  1877. Locale::addLikelySubtags(UErrorCode& status) {
  1878. if (U_FAILURE(status)) {
  1879. return;
  1880. }
  1881. CharString maximizedLocaleID = ulocimp_addLikelySubtags(fullName, status);
  1882. if (U_FAILURE(status)) {
  1883. return;
  1884. }
  1885. init(maximizedLocaleID.data(), /*canonicalize=*/false);
  1886. if (isBogus()) {
  1887. status = U_ILLEGAL_ARGUMENT_ERROR;
  1888. }
  1889. }
  1890. void
  1891. Locale::minimizeSubtags(UErrorCode& status) {
  1892. Locale::minimizeSubtags(false, status);
  1893. }
  1894. void
  1895. Locale::minimizeSubtags(bool favorScript, UErrorCode& status) {
  1896. if (U_FAILURE(status)) {
  1897. return;
  1898. }
  1899. CharString minimizedLocaleID = ulocimp_minimizeSubtags(fullName, favorScript, status);
  1900. if (U_FAILURE(status)) {
  1901. return;
  1902. }
  1903. init(minimizedLocaleID.data(), /*canonicalize=*/false);
  1904. if (isBogus()) {
  1905. status = U_ILLEGAL_ARGUMENT_ERROR;
  1906. }
  1907. }
  1908. void
  1909. Locale::canonicalize(UErrorCode& status) {
  1910. if (U_FAILURE(status)) {
  1911. return;
  1912. }
  1913. if (isBogus()) {
  1914. status = U_ILLEGAL_ARGUMENT_ERROR;
  1915. return;
  1916. }
  1917. CharString uncanonicalized(fullName, status);
  1918. if (U_FAILURE(status)) {
  1919. return;
  1920. }
  1921. init(uncanonicalized.data(), /*canonicalize=*/true);
  1922. if (isBogus()) {
  1923. status = U_ILLEGAL_ARGUMENT_ERROR;
  1924. }
  1925. }
  1926. Locale U_EXPORT2
  1927. Locale::forLanguageTag(StringPiece tag, UErrorCode& status)
  1928. {
  1929. Locale result(Locale::eBOGUS);
  1930. if (U_FAILURE(status)) {
  1931. return result;
  1932. }
  1933. // If a BCP 47 language tag is passed as the language parameter to the
  1934. // normal Locale constructor, it will actually fall back to invoking
  1935. // uloc_forLanguageTag() to parse it if it somehow is able to detect that
  1936. // the string actually is BCP 47. This works well for things like strings
  1937. // using BCP 47 extensions, but it does not at all work for things like
  1938. // legacy language tags (marked as “Type: grandfathered” in BCP 47,
  1939. // e.g., "en-GB-oed") which are possible to also
  1940. // interpret as ICU locale IDs and because of that won't trigger the BCP 47
  1941. // parsing. Therefore the code here explicitly calls uloc_forLanguageTag()
  1942. // and then Locale::init(), instead of just calling the normal constructor.
  1943. int32_t parsedLength;
  1944. CharString localeID = ulocimp_forLanguageTag(
  1945. tag.data(),
  1946. tag.length(),
  1947. &parsedLength,
  1948. status);
  1949. if (U_FAILURE(status)) {
  1950. return result;
  1951. }
  1952. if (parsedLength != tag.size()) {
  1953. status = U_ILLEGAL_ARGUMENT_ERROR;
  1954. return result;
  1955. }
  1956. result.init(localeID.data(), /*canonicalize=*/false);
  1957. if (result.isBogus()) {
  1958. status = U_ILLEGAL_ARGUMENT_ERROR;
  1959. }
  1960. return result;
  1961. }
  1962. void
  1963. Locale::toLanguageTag(ByteSink& sink, UErrorCode& status) const
  1964. {
  1965. if (U_FAILURE(status)) {
  1966. return;
  1967. }
  1968. if (fIsBogus) {
  1969. status = U_ILLEGAL_ARGUMENT_ERROR;
  1970. return;
  1971. }
  1972. ulocimp_toLanguageTag(fullName, sink, /*strict=*/false, status);
  1973. }
  1974. Locale U_EXPORT2
  1975. Locale::createFromName (const char *name)
  1976. {
  1977. if (name) {
  1978. Locale l("");
  1979. l.init(name, false);
  1980. return l;
  1981. }
  1982. else {
  1983. return getDefault();
  1984. }
  1985. }
  1986. Locale U_EXPORT2
  1987. Locale::createCanonical(const char* name) {
  1988. Locale loc("");
  1989. loc.init(name, true);
  1990. return loc;
  1991. }
  1992. const char *
  1993. Locale::getISO3Language() const
  1994. {
  1995. return uloc_getISO3Language(fullName);
  1996. }
  1997. const char *
  1998. Locale::getISO3Country() const
  1999. {
  2000. return uloc_getISO3Country(fullName);
  2001. }
  2002. /**
  2003. * Return the LCID value as specified in the "LocaleID" resource for this
  2004. * locale. The LocaleID must be expressed as a hexadecimal number, from
  2005. * one to four digits. If the LocaleID resource is not present, or is
  2006. * in an incorrect format, 0 is returned. The LocaleID is for use in
  2007. * Windows (it is an LCID), but is available on all platforms.
  2008. */
  2009. uint32_t
  2010. Locale::getLCID() const
  2011. {
  2012. return uloc_getLCID(fullName);
  2013. }
  2014. const char* const* U_EXPORT2 Locale::getISOCountries()
  2015. {
  2016. return uloc_getISOCountries();
  2017. }
  2018. const char* const* U_EXPORT2 Locale::getISOLanguages()
  2019. {
  2020. return uloc_getISOLanguages();
  2021. }
  2022. // Set the locale's data based on a posix id.
  2023. void Locale::setFromPOSIXID(const char *posixID)
  2024. {
  2025. init(posixID, true);
  2026. }
  2027. const Locale & U_EXPORT2
  2028. Locale::getRoot()
  2029. {
  2030. return getLocale(eROOT);
  2031. }
  2032. const Locale & U_EXPORT2
  2033. Locale::getEnglish()
  2034. {
  2035. return getLocale(eENGLISH);
  2036. }
  2037. const Locale & U_EXPORT2
  2038. Locale::getFrench()
  2039. {
  2040. return getLocale(eFRENCH);
  2041. }
  2042. const Locale & U_EXPORT2
  2043. Locale::getGerman()
  2044. {
  2045. return getLocale(eGERMAN);
  2046. }
  2047. const Locale & U_EXPORT2
  2048. Locale::getItalian()
  2049. {
  2050. return getLocale(eITALIAN);
  2051. }
  2052. const Locale & U_EXPORT2
  2053. Locale::getJapanese()
  2054. {
  2055. return getLocale(eJAPANESE);
  2056. }
  2057. const Locale & U_EXPORT2
  2058. Locale::getKorean()
  2059. {
  2060. return getLocale(eKOREAN);
  2061. }
  2062. const Locale & U_EXPORT2
  2063. Locale::getChinese()
  2064. {
  2065. return getLocale(eCHINESE);
  2066. }
  2067. const Locale & U_EXPORT2
  2068. Locale::getSimplifiedChinese()
  2069. {
  2070. return getLocale(eCHINA);
  2071. }
  2072. const Locale & U_EXPORT2
  2073. Locale::getTraditionalChinese()
  2074. {
  2075. return getLocale(eTAIWAN);
  2076. }
  2077. const Locale & U_EXPORT2
  2078. Locale::getFrance()
  2079. {
  2080. return getLocale(eFRANCE);
  2081. }
  2082. const Locale & U_EXPORT2
  2083. Locale::getGermany()
  2084. {
  2085. return getLocale(eGERMANY);
  2086. }
  2087. const Locale & U_EXPORT2
  2088. Locale::getItaly()
  2089. {
  2090. return getLocale(eITALY);
  2091. }
  2092. const Locale & U_EXPORT2
  2093. Locale::getJapan()
  2094. {
  2095. return getLocale(eJAPAN);
  2096. }
  2097. const Locale & U_EXPORT2
  2098. Locale::getKorea()
  2099. {
  2100. return getLocale(eKOREA);
  2101. }
  2102. const Locale & U_EXPORT2
  2103. Locale::getChina()
  2104. {
  2105. return getLocale(eCHINA);
  2106. }
  2107. const Locale & U_EXPORT2
  2108. Locale::getPRC()
  2109. {
  2110. return getLocale(eCHINA);
  2111. }
  2112. const Locale & U_EXPORT2
  2113. Locale::getTaiwan()
  2114. {
  2115. return getLocale(eTAIWAN);
  2116. }
  2117. const Locale & U_EXPORT2
  2118. Locale::getUK()
  2119. {
  2120. return getLocale(eUK);
  2121. }
  2122. const Locale & U_EXPORT2
  2123. Locale::getUS()
  2124. {
  2125. return getLocale(eUS);
  2126. }
  2127. const Locale & U_EXPORT2
  2128. Locale::getCanada()
  2129. {
  2130. return getLocale(eCANADA);
  2131. }
  2132. const Locale & U_EXPORT2
  2133. Locale::getCanadaFrench()
  2134. {
  2135. return getLocale(eCANADA_FRENCH);
  2136. }
  2137. const Locale &
  2138. Locale::getLocale(int locid)
  2139. {
  2140. Locale *localeCache = getLocaleCache();
  2141. U_ASSERT((locid < eMAX_LOCALES)&&(locid>=0));
  2142. if (localeCache == nullptr) {
  2143. // Failure allocating the locale cache.
  2144. // The best we can do is return a nullptr reference.
  2145. locid = 0;
  2146. }
  2147. return localeCache[locid]; /*operating on nullptr*/
  2148. }
  2149. /*
  2150. This function is defined this way in order to get around static
  2151. initialization and static destruction.
  2152. */
  2153. Locale *
  2154. Locale::getLocaleCache()
  2155. {
  2156. UErrorCode status = U_ZERO_ERROR;
  2157. umtx_initOnce(gLocaleCacheInitOnce, locale_init, status);
  2158. return gLocaleCache;
  2159. }
  2160. class KeywordEnumeration : public StringEnumeration {
  2161. protected:
  2162. CharString keywords;
  2163. private:
  2164. const char *current;
  2165. static const char fgClassID;
  2166. public:
  2167. static UClassID U_EXPORT2 getStaticClassID() { return (UClassID)&fgClassID; }
  2168. virtual UClassID getDynamicClassID() const override { return getStaticClassID(); }
  2169. public:
  2170. KeywordEnumeration(const char *keys, int32_t keywordLen, int32_t currentIndex, UErrorCode &status)
  2171. : keywords(), current(keywords.data()) {
  2172. if(U_SUCCESS(status) && keywordLen != 0) {
  2173. if(keys == nullptr || keywordLen < 0) {
  2174. status = U_ILLEGAL_ARGUMENT_ERROR;
  2175. } else {
  2176. keywords.append(keys, keywordLen, status);
  2177. current = keywords.data() + currentIndex;
  2178. }
  2179. }
  2180. }
  2181. virtual ~KeywordEnumeration();
  2182. virtual StringEnumeration * clone() const override
  2183. {
  2184. UErrorCode status = U_ZERO_ERROR;
  2185. return new KeywordEnumeration(
  2186. keywords.data(), keywords.length(),
  2187. static_cast<int32_t>(current - keywords.data()), status);
  2188. }
  2189. virtual int32_t count(UErrorCode& status) const override {
  2190. if (U_FAILURE(status)) { return 0; }
  2191. const char *kw = keywords.data();
  2192. int32_t result = 0;
  2193. while(*kw) {
  2194. result++;
  2195. kw += uprv_strlen(kw)+1;
  2196. }
  2197. return result;
  2198. }
  2199. virtual const char* next(int32_t* resultLength, UErrorCode& status) override {
  2200. const char* result;
  2201. int32_t len;
  2202. if(U_SUCCESS(status) && *current != 0) {
  2203. result = current;
  2204. len = static_cast<int32_t>(uprv_strlen(current));
  2205. current += len+1;
  2206. if(resultLength != nullptr) {
  2207. *resultLength = len;
  2208. }
  2209. } else {
  2210. if(resultLength != nullptr) {
  2211. *resultLength = 0;
  2212. }
  2213. result = nullptr;
  2214. }
  2215. return result;
  2216. }
  2217. virtual const UnicodeString* snext(UErrorCode& status) override {
  2218. if (U_FAILURE(status)) { return nullptr; }
  2219. int32_t resultLength = 0;
  2220. const char *s = next(&resultLength, status);
  2221. return setChars(s, resultLength, status);
  2222. }
  2223. virtual void reset(UErrorCode& status) override {
  2224. if (U_FAILURE(status)) { return; }
  2225. current = keywords.data();
  2226. }
  2227. };
  2228. const char KeywordEnumeration::fgClassID = '\0';
  2229. // Out-of-line virtual destructor to serve as the "key function".
  2230. KeywordEnumeration::~KeywordEnumeration() = default;
  2231. // A wrapper around KeywordEnumeration that calls uloc_toUnicodeLocaleKey() in
  2232. // the next() method for each keyword before returning it.
  2233. class UnicodeKeywordEnumeration : public KeywordEnumeration {
  2234. public:
  2235. using KeywordEnumeration::KeywordEnumeration;
  2236. virtual ~UnicodeKeywordEnumeration();
  2237. virtual const char* next(int32_t* resultLength, UErrorCode& status) override {
  2238. const char* legacy_key = KeywordEnumeration::next(nullptr, status);
  2239. while (U_SUCCESS(status) && legacy_key != nullptr) {
  2240. const char* key = uloc_toUnicodeLocaleKey(legacy_key);
  2241. if (key != nullptr) {
  2242. if (resultLength != nullptr) {
  2243. *resultLength = static_cast<int32_t>(uprv_strlen(key));
  2244. }
  2245. return key;
  2246. }
  2247. // Not a Unicode keyword, could be a t, x or other, continue to look at the next one.
  2248. legacy_key = KeywordEnumeration::next(nullptr, status);
  2249. }
  2250. if (resultLength != nullptr) *resultLength = 0;
  2251. return nullptr;
  2252. }
  2253. virtual int32_t count(UErrorCode& status) const override {
  2254. if (U_FAILURE(status)) { return 0; }
  2255. const char *kw = keywords.data();
  2256. int32_t result = 0;
  2257. while(*kw) {
  2258. if (uloc_toUnicodeLocaleKey(kw) != nullptr) {
  2259. result++;
  2260. }
  2261. kw += uprv_strlen(kw)+1;
  2262. }
  2263. return result;
  2264. }
  2265. };
  2266. // Out-of-line virtual destructor to serve as the "key function".
  2267. UnicodeKeywordEnumeration::~UnicodeKeywordEnumeration() = default;
  2268. StringEnumeration *
  2269. Locale::createKeywords(UErrorCode &status) const
  2270. {
  2271. StringEnumeration *result = nullptr;
  2272. if (U_FAILURE(status)) {
  2273. return result;
  2274. }
  2275. const char* variantStart = uprv_strchr(fullName, '@');
  2276. const char* assignment = uprv_strchr(fullName, '=');
  2277. if(variantStart) {
  2278. if(assignment > variantStart) {
  2279. CharString keywords = ulocimp_getKeywords(variantStart + 1, '@', false, status);
  2280. if (U_SUCCESS(status) && !keywords.isEmpty()) {
  2281. result = new KeywordEnumeration(keywords.data(), keywords.length(), 0, status);
  2282. if (!result) {
  2283. status = U_MEMORY_ALLOCATION_ERROR;
  2284. }
  2285. }
  2286. } else {
  2287. status = U_INVALID_FORMAT_ERROR;
  2288. }
  2289. }
  2290. return result;
  2291. }
  2292. StringEnumeration *
  2293. Locale::createUnicodeKeywords(UErrorCode &status) const
  2294. {
  2295. StringEnumeration *result = nullptr;
  2296. if (U_FAILURE(status)) {
  2297. return result;
  2298. }
  2299. const char* variantStart = uprv_strchr(fullName, '@');
  2300. const char* assignment = uprv_strchr(fullName, '=');
  2301. if(variantStart) {
  2302. if(assignment > variantStart) {
  2303. CharString keywords = ulocimp_getKeywords(variantStart + 1, '@', false, status);
  2304. if (U_SUCCESS(status) && !keywords.isEmpty()) {
  2305. result = new UnicodeKeywordEnumeration(keywords.data(), keywords.length(), 0, status);
  2306. if (!result) {
  2307. status = U_MEMORY_ALLOCATION_ERROR;
  2308. }
  2309. }
  2310. } else {
  2311. status = U_INVALID_FORMAT_ERROR;
  2312. }
  2313. }
  2314. return result;
  2315. }
  2316. int32_t
  2317. Locale::getKeywordValue(const char* keywordName, char *buffer, int32_t bufLen, UErrorCode &status) const
  2318. {
  2319. return uloc_getKeywordValue(fullName, keywordName, buffer, bufLen, &status);
  2320. }
  2321. void
  2322. Locale::getKeywordValue(StringPiece keywordName, ByteSink& sink, UErrorCode& status) const {
  2323. if (U_FAILURE(status)) {
  2324. return;
  2325. }
  2326. if (fIsBogus) {
  2327. status = U_ILLEGAL_ARGUMENT_ERROR;
  2328. return;
  2329. }
  2330. ulocimp_getKeywordValue(fullName, keywordName, sink, status);
  2331. }
  2332. void
  2333. Locale::getUnicodeKeywordValue(StringPiece keywordName,
  2334. ByteSink& sink,
  2335. UErrorCode& status) const {
  2336. if (U_FAILURE(status)) {
  2337. return;
  2338. }
  2339. std::optional<std::string_view> legacy_key = ulocimp_toLegacyKeyWithFallback(keywordName);
  2340. if (!legacy_key.has_value()) {
  2341. status = U_ILLEGAL_ARGUMENT_ERROR;
  2342. return;
  2343. }
  2344. auto legacy_value = getKeywordValue<CharString>(*legacy_key, status);
  2345. if (U_FAILURE(status)) {
  2346. return;
  2347. }
  2348. std::optional<std::string_view> unicode_value =
  2349. ulocimp_toBcpTypeWithFallback(keywordName, legacy_value.toStringPiece());
  2350. if (!unicode_value.has_value()) {
  2351. status = U_ILLEGAL_ARGUMENT_ERROR;
  2352. return;
  2353. }
  2354. sink.Append(unicode_value->data(), static_cast<int32_t>(unicode_value->size()));
  2355. }
  2356. void
  2357. Locale::setKeywordValue(StringPiece keywordName,
  2358. StringPiece keywordValue,
  2359. UErrorCode& status) {
  2360. if (U_FAILURE(status)) { return; }
  2361. if (keywordName.empty()) {
  2362. status = U_ILLEGAL_ARGUMENT_ERROR;
  2363. return;
  2364. }
  2365. if (status == U_STRING_NOT_TERMINATED_WARNING) {
  2366. status = U_ZERO_ERROR;
  2367. }
  2368. int32_t length = static_cast<int32_t>(uprv_strlen(fullName));
  2369. int32_t capacity = fullName == fullNameBuffer ? ULOC_FULLNAME_CAPACITY : length + 1;
  2370. const char* start = locale_getKeywordsStart(fullName);
  2371. int32_t offset = start == nullptr ? length : start - fullName;
  2372. for (;;) {
  2373. // Remove -1 from the capacity so that this function can guarantee NUL termination.
  2374. CheckedArrayByteSink sink(fullName + offset, capacity - offset - 1);
  2375. int32_t reslen = ulocimp_setKeywordValue(
  2376. {fullName + offset, static_cast<std::string_view::size_type>(length - offset)},
  2377. keywordName,
  2378. keywordValue,
  2379. sink,
  2380. status);
  2381. if (status == U_BUFFER_OVERFLOW_ERROR) {
  2382. capacity = reslen + offset + 1;
  2383. char* newFullName = static_cast<char*>(uprv_malloc(capacity));
  2384. if (newFullName == nullptr) {
  2385. status = U_MEMORY_ALLOCATION_ERROR;
  2386. return;
  2387. }
  2388. uprv_memcpy(newFullName, fullName, length + 1);
  2389. if (fullName != fullNameBuffer) {
  2390. if (baseName == fullName) {
  2391. baseName = newFullName; // baseName should not point to freed memory.
  2392. }
  2393. // if fullName is already on the heap, need to free it.
  2394. uprv_free(fullName);
  2395. }
  2396. fullName = newFullName;
  2397. status = U_ZERO_ERROR;
  2398. continue;
  2399. }
  2400. if (U_FAILURE(status)) { return; }
  2401. u_terminateChars(fullName, capacity, reslen + offset, &status);
  2402. break;
  2403. }
  2404. if (baseName == fullName) {
  2405. // May have added the first keyword, meaning that the fullName is no longer also the baseName.
  2406. initBaseName(status);
  2407. }
  2408. }
  2409. void
  2410. Locale::setUnicodeKeywordValue(StringPiece keywordName,
  2411. StringPiece keywordValue,
  2412. UErrorCode& status) {
  2413. if (U_FAILURE(status)) {
  2414. return;
  2415. }
  2416. std::optional<std::string_view> legacy_key = ulocimp_toLegacyKeyWithFallback(keywordName);
  2417. if (!legacy_key.has_value()) {
  2418. status = U_ILLEGAL_ARGUMENT_ERROR;
  2419. return;
  2420. }
  2421. std::string_view value;
  2422. if (!keywordValue.empty()) {
  2423. std::optional<std::string_view> legacy_value =
  2424. ulocimp_toLegacyTypeWithFallback(keywordName, keywordValue);
  2425. if (!legacy_value.has_value()) {
  2426. status = U_ILLEGAL_ARGUMENT_ERROR;
  2427. return;
  2428. }
  2429. value = *legacy_value;
  2430. }
  2431. setKeywordValue(*legacy_key, value, status);
  2432. }
  2433. const char *
  2434. Locale::getBaseName() const {
  2435. return baseName;
  2436. }
  2437. Locale::Iterator::~Iterator() = default;
  2438. //eof
  2439. U_NAMESPACE_END