ulocimp.h 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443
  1. // © 2016 and later: Unicode, Inc. and others.
  2. // License & terms of use: http://www.unicode.org/copyright.html
  3. /*
  4. **********************************************************************
  5. * Copyright (C) 2004-2016, International Business Machines
  6. * Corporation and others. All Rights Reserved.
  7. **********************************************************************
  8. */
  9. #ifndef ULOCIMP_H
  10. #define ULOCIMP_H
  11. #include <cstddef>
  12. #include <optional>
  13. #include <string_view>
  14. #include "unicode/bytestream.h"
  15. #include "unicode/uloc.h"
  16. #include "charstr.h"
  17. /**
  18. * Create an iterator over the specified keywords list
  19. * @param keywordList double-null terminated list. Will be copied.
  20. * @param keywordListSize size in bytes of keywordList
  21. * @param status err code
  22. * @return enumeration (owned by caller) of the keyword list.
  23. * @internal ICU 3.0
  24. */
  25. U_CAPI UEnumeration* U_EXPORT2
  26. uloc_openKeywordList(const char *keywordList, int32_t keywordListSize, UErrorCode* status);
  27. /**
  28. * Look up a resource bundle table item with fallback on the table level.
  29. * This is accessible so it can be called by C++ code.
  30. */
  31. U_CAPI const UChar * U_EXPORT2
  32. uloc_getTableStringWithFallback(
  33. const char *path,
  34. const char *locale,
  35. const char *tableKey,
  36. const char *subTableKey,
  37. const char *itemKey,
  38. int32_t *pLength,
  39. UErrorCode *pErrorCode);
  40. namespace {
  41. /*returns true if a is an ID separator false otherwise*/
  42. inline bool _isIDSeparator(char a) { return a == '_' || a == '-'; }
  43. } // namespace
  44. U_CFUNC const char*
  45. uloc_getCurrentCountryID(const char* oldID);
  46. U_CFUNC const char*
  47. uloc_getCurrentLanguageID(const char* oldID);
  48. U_EXPORT std::optional<std::string_view>
  49. ulocimp_toBcpKeyWithFallback(std::string_view keyword);
  50. U_EXPORT std::optional<std::string_view>
  51. ulocimp_toBcpTypeWithFallback(std::string_view keyword, std::string_view value);
  52. U_EXPORT std::optional<std::string_view>
  53. ulocimp_toLegacyKeyWithFallback(std::string_view keyword);
  54. U_EXPORT std::optional<std::string_view>
  55. ulocimp_toLegacyTypeWithFallback(std::string_view keyword, std::string_view value);
  56. U_EXPORT icu::CharString
  57. ulocimp_getKeywords(const char* localeID,
  58. char prev,
  59. bool valuesToo,
  60. UErrorCode& status);
  61. U_EXPORT void
  62. ulocimp_getKeywords(const char* localeID,
  63. char prev,
  64. icu::ByteSink& sink,
  65. bool valuesToo,
  66. UErrorCode& status);
  67. U_EXPORT icu::CharString
  68. ulocimp_getName(const char* localeID,
  69. UErrorCode& err);
  70. U_EXPORT void
  71. ulocimp_getName(const char* localeID,
  72. icu::ByteSink& sink,
  73. UErrorCode& err);
  74. U_EXPORT icu::CharString
  75. ulocimp_getBaseName(const char* localeID,
  76. UErrorCode& err);
  77. U_EXPORT void
  78. ulocimp_getBaseName(const char* localeID,
  79. icu::ByteSink& sink,
  80. UErrorCode& err);
  81. U_EXPORT icu::CharString
  82. ulocimp_canonicalize(const char* localeID,
  83. UErrorCode& err);
  84. U_EXPORT void
  85. ulocimp_canonicalize(const char* localeID,
  86. icu::ByteSink& sink,
  87. UErrorCode& err);
  88. U_EXPORT icu::CharString
  89. ulocimp_getKeywordValue(const char* localeID,
  90. std::string_view keywordName,
  91. UErrorCode& status);
  92. U_EXPORT void
  93. ulocimp_getKeywordValue(const char* localeID,
  94. std::string_view keywordName,
  95. icu::ByteSink& sink,
  96. UErrorCode& status);
  97. U_EXPORT icu::CharString
  98. ulocimp_getLanguage(const char* localeID, UErrorCode& status);
  99. U_EXPORT icu::CharString
  100. ulocimp_getScript(const char* localeID, UErrorCode& status);
  101. U_EXPORT icu::CharString
  102. ulocimp_getRegion(const char* localeID, UErrorCode& status);
  103. U_EXPORT icu::CharString
  104. ulocimp_getVariant(const char* localeID, UErrorCode& status);
  105. U_EXPORT void
  106. ulocimp_setKeywordValue(std::string_view keywordName,
  107. std::string_view keywordValue,
  108. icu::CharString& localeID,
  109. UErrorCode& status);
  110. U_EXPORT int32_t
  111. ulocimp_setKeywordValue(std::string_view keywords,
  112. std::string_view keywordName,
  113. std::string_view keywordValue,
  114. icu::ByteSink& sink,
  115. UErrorCode& status);
  116. U_EXPORT void
  117. ulocimp_getSubtags(
  118. const char* localeID,
  119. icu::CharString* language,
  120. icu::CharString* script,
  121. icu::CharString* region,
  122. icu::CharString* variant,
  123. const char** pEnd,
  124. UErrorCode& status);
  125. U_EXPORT void
  126. ulocimp_getSubtags(
  127. const char* localeID,
  128. icu::ByteSink* language,
  129. icu::ByteSink* script,
  130. icu::ByteSink* region,
  131. icu::ByteSink* variant,
  132. const char** pEnd,
  133. UErrorCode& status);
  134. inline void
  135. ulocimp_getSubtags(
  136. const char* localeID,
  137. std::nullptr_t,
  138. std::nullptr_t,
  139. std::nullptr_t,
  140. std::nullptr_t,
  141. const char** pEnd,
  142. UErrorCode& status) {
  143. ulocimp_getSubtags(
  144. localeID,
  145. static_cast<icu::ByteSink*>(nullptr),
  146. static_cast<icu::ByteSink*>(nullptr),
  147. static_cast<icu::ByteSink*>(nullptr),
  148. static_cast<icu::ByteSink*>(nullptr),
  149. pEnd,
  150. status);
  151. }
  152. U_EXPORT icu::CharString
  153. ulocimp_getParent(const char* localeID,
  154. UErrorCode& err);
  155. U_EXPORT void
  156. ulocimp_getParent(const char* localeID,
  157. icu::ByteSink& sink,
  158. UErrorCode& err);
  159. U_EXPORT icu::CharString
  160. ulocimp_toLanguageTag(const char* localeID,
  161. bool strict,
  162. UErrorCode& status);
  163. /**
  164. * Writes a well-formed language tag for this locale ID.
  165. *
  166. * **Note**: When `strict` is false, any locale fields which do not satisfy the
  167. * BCP47 syntax requirement will be omitted from the result. When `strict` is
  168. * true, this function sets U_ILLEGAL_ARGUMENT_ERROR to the `err` if any locale
  169. * fields do not satisfy the BCP47 syntax requirement.
  170. *
  171. * @param localeID the input locale ID
  172. * @param sink the output sink receiving the BCP47 language
  173. * tag for this Locale.
  174. * @param strict boolean value indicating if the function returns
  175. * an error for an ill-formed input locale ID.
  176. * @param err error information if receiving the language
  177. * tag failed.
  178. * @return The length of the BCP47 language tag.
  179. *
  180. * @internal ICU 64
  181. */
  182. U_EXPORT void
  183. ulocimp_toLanguageTag(const char* localeID,
  184. icu::ByteSink& sink,
  185. bool strict,
  186. UErrorCode& err);
  187. U_EXPORT icu::CharString
  188. ulocimp_forLanguageTag(const char* langtag,
  189. int32_t tagLen,
  190. int32_t* parsedLength,
  191. UErrorCode& status);
  192. /**
  193. * Returns a locale ID for the specified BCP47 language tag string.
  194. * If the specified language tag contains any ill-formed subtags,
  195. * the first such subtag and all following subtags are ignored.
  196. * <p>
  197. * This implements the 'Language-Tag' production of BCP 47, and so
  198. * supports legacy language tags (marked as “Type: grandfathered” in BCP 47)
  199. * (regular and irregular) as well as private use language tags.
  200. *
  201. * Private use tags are represented as 'x-whatever',
  202. * and legacy tags are converted to their canonical replacements where they exist.
  203. *
  204. * Note that a few legacy tags have no modern replacement;
  205. * these will be converted using the fallback described in
  206. * the first paragraph, so some information might be lost.
  207. *
  208. * @param langtag the input BCP47 language tag.
  209. * @param tagLen the length of langtag, or -1 to call uprv_strlen().
  210. * @param sink the output sink receiving a locale ID for the
  211. * specified BCP47 language tag.
  212. * @param parsedLength if not NULL, successfully parsed length
  213. * for the input language tag is set.
  214. * @param err error information if receiving the locald ID
  215. * failed.
  216. * @internal ICU 63
  217. */
  218. U_EXPORT void
  219. ulocimp_forLanguageTag(const char* langtag,
  220. int32_t tagLen,
  221. icu::ByteSink& sink,
  222. int32_t* parsedLength,
  223. UErrorCode& err);
  224. /**
  225. * Get the region to use for supplemental data lookup. Uses
  226. * (1) any region specified by locale tag "rg"; if none then
  227. * (2) any unicode_region_tag in the locale ID; if none then
  228. * (3) if inferRegion is true, the region suggested by
  229. * getLikelySubtags on the localeID.
  230. * If no region is found, returns an empty string.
  231. *
  232. * @param localeID
  233. * The complete locale ID (with keywords) from which
  234. * to get the region to use for supplemental data.
  235. * @param inferRegion
  236. * If true, will try to infer region from localeID if
  237. * no other region is found.
  238. * @param status
  239. * Pointer to in/out UErrorCode value for latest status.
  240. * @return
  241. * The region code found, empty if none found.
  242. * @internal ICU 57
  243. */
  244. U_EXPORT icu::CharString
  245. ulocimp_getRegionForSupplementalData(const char *localeID, bool inferRegion,
  246. UErrorCode& status);
  247. U_EXPORT icu::CharString
  248. ulocimp_addLikelySubtags(const char* localeID,
  249. UErrorCode& status);
  250. /**
  251. * Add the likely subtags for a provided locale ID, per the algorithm described
  252. * in the following CLDR technical report:
  253. *
  254. * http://www.unicode.org/reports/tr35/#Likely_Subtags
  255. *
  256. * If localeID is already in the maximal form, or there is no data available
  257. * for maximization, it will be copied to the output buffer. For example,
  258. * "und-Zzzz" cannot be maximized, since there is no reasonable maximization.
  259. *
  260. * Examples:
  261. *
  262. * "en" maximizes to "en_Latn_US"
  263. *
  264. * "de" maximizes to "de_Latn_US"
  265. *
  266. * "sr" maximizes to "sr_Cyrl_RS"
  267. *
  268. * "sh" maximizes to "sr_Latn_RS" (Note this will not reverse.)
  269. *
  270. * "zh_Hani" maximizes to "zh_Hans_CN" (Note this will not reverse.)
  271. *
  272. * @param localeID The locale to maximize
  273. * @param sink The output sink receiving the maximized locale
  274. * @param err Error information if maximizing the locale failed. If the length
  275. * of the localeID and the null-terminator is greater than the maximum allowed size,
  276. * or the localeId is not well-formed, the error code is U_ILLEGAL_ARGUMENT_ERROR.
  277. * @internal ICU 64
  278. */
  279. U_EXPORT void
  280. ulocimp_addLikelySubtags(const char* localeID,
  281. icu::ByteSink& sink,
  282. UErrorCode& err);
  283. U_EXPORT icu::CharString
  284. ulocimp_minimizeSubtags(const char* localeID,
  285. bool favorScript,
  286. UErrorCode& status);
  287. /**
  288. * Minimize the subtags for a provided locale ID, per the algorithm described
  289. * in the following CLDR technical report:
  290. *
  291. * http://www.unicode.org/reports/tr35/#Likely_Subtags
  292. *
  293. * If localeID is already in the minimal form, or there is no data available
  294. * for minimization, it will be copied to the output buffer. Since the
  295. * minimization algorithm relies on proper maximization, see the comments
  296. * for ulocimp_addLikelySubtags for reasons why there might not be any data.
  297. *
  298. * Examples:
  299. *
  300. * "en_Latn_US" minimizes to "en"
  301. *
  302. * "de_Latn_US" minimizes to "de"
  303. *
  304. * "sr_Cyrl_RS" minimizes to "sr"
  305. *
  306. * "zh_Hant_TW" minimizes to "zh_TW" (The region is preferred to the
  307. * script, and minimizing to "zh" would imply "zh_Hans_CN".)
  308. *
  309. * @param localeID The locale to minimize
  310. * @param sink The output sink receiving the maximized locale
  311. * @param favorScript favor to keep script if true, region if false.
  312. * @param err Error information if minimizing the locale failed. If the length
  313. * of the localeID and the null-terminator is greater than the maximum allowed size,
  314. * or the localeId is not well-formed, the error code is U_ILLEGAL_ARGUMENT_ERROR.
  315. * @internal ICU 64
  316. */
  317. U_EXPORT void
  318. ulocimp_minimizeSubtags(const char* localeID,
  319. icu::ByteSink& sink,
  320. bool favorScript,
  321. UErrorCode& err);
  322. U_CAPI const char * U_EXPORT2
  323. locale_getKeywordsStart(const char *localeID);
  324. bool
  325. ultag_isExtensionSubtags(const char* s, int32_t len);
  326. bool
  327. ultag_isLanguageSubtag(const char* s, int32_t len);
  328. bool
  329. ultag_isPrivateuseValueSubtags(const char* s, int32_t len);
  330. bool
  331. ultag_isRegionSubtag(const char* s, int32_t len);
  332. bool
  333. ultag_isScriptSubtag(const char* s, int32_t len);
  334. bool
  335. ultag_isTransformedExtensionSubtags(const char* s, int32_t len);
  336. bool
  337. ultag_isUnicodeExtensionSubtags(const char* s, int32_t len);
  338. bool
  339. ultag_isUnicodeLocaleAttribute(const char* s, int32_t len);
  340. bool
  341. ultag_isUnicodeLocaleAttributes(const char* s, int32_t len);
  342. bool
  343. ultag_isUnicodeLocaleKey(const char* s, int32_t len);
  344. bool
  345. ultag_isUnicodeLocaleType(const char* s, int32_t len);
  346. bool
  347. ultag_isVariantSubtags(const char* s, int32_t len);
  348. const char*
  349. ultag_getTKeyStart(const char* localeID);
  350. U_EXPORT std::optional<std::string_view>
  351. ulocimp_toBcpKey(std::string_view key);
  352. U_EXPORT std::optional<std::string_view>
  353. ulocimp_toLegacyKey(std::string_view key);
  354. U_EXPORT std::optional<std::string_view>
  355. ulocimp_toBcpType(std::string_view key, std::string_view type);
  356. U_EXPORT std::optional<std::string_view>
  357. ulocimp_toLegacyType(std::string_view key, std::string_view type);
  358. /* Function for testing purpose */
  359. U_EXPORT const char* const*
  360. ulocimp_getKnownCanonicalizedLocaleForTest(int32_t& length);
  361. // Return true if the value is already canonicalized.
  362. U_EXPORT bool
  363. ulocimp_isCanonicalizedLocaleForTest(const char* localeName);
  364. #ifdef __cplusplus
  365. U_NAMESPACE_BEGIN
  366. class U_COMMON_API RegionValidateMap : public UObject {
  367. public:
  368. RegionValidateMap();
  369. virtual ~RegionValidateMap();
  370. bool isSet(const char* region) const;
  371. bool equals(const RegionValidateMap& that) const;
  372. protected:
  373. int32_t value(const char* region) const;
  374. uint32_t map[22]; // 26x26/32 = 22;
  375. };
  376. U_NAMESPACE_END
  377. #endif /* __cplusplus */
  378. #endif