uscript.h 29 KB


  1. // © 2016 and later: Unicode, Inc. and others.
  2. // License & terms of use: http://www.unicode.org/copyright.html
  3. /*
  4. **********************************************************************
  5. * Copyright (C) 1997-2016, International Business Machines
  6. * Corporation and others. All Rights Reserved.
  7. **********************************************************************
  8. *
  9. * File USCRIPT.H
  10. *
  11. * Modification History:
  12. *
  13. * Date Name Description
  14. * 07/06/2001 Ram Creation.
  15. ******************************************************************************
  16. */
  17. #ifndef USCRIPT_H
  18. #define USCRIPT_H
  19. #include "unicode/utypes.h"
  20. /**
  21. * \file
  22. * \brief C API: Unicode Script Information
  23. */
  24. /**
  25. * Constants for ISO 15924 script codes.
  26. *
  27. * The current set of script code constants supports at least all scripts
  28. * that are encoded in the version of Unicode which ICU currently supports.
  29. * The names of the constants are usually derived from the
  30. * Unicode script property value aliases.
  31. * See UAX #24 Unicode Script Property (http://www.unicode.org/reports/tr24/)
  32. * and http://www.unicode.org/Public/UCD/latest/ucd/PropertyValueAliases.txt .
  33. *
  34. * In addition, constants for many ISO 15924 script codes
  35. * are included, for use with language tags, CLDR data, and similar.
  36. * Some of those codes are not used in the Unicode Character Database (UCD).
  37. * For example, there are no characters that have a UCD script property value of
  38. * Hans or Hant. All Han ideographs have the Hani script property value in Unicode.
  39. *
  40. * Private-use codes Qaaa..Qabx are not included, except as used in the UCD or in CLDR.
  41. *
  42. * Starting with ICU 55, script codes are only added when their scripts
  43. * have been or will certainly be encoded in Unicode,
  44. * and have been assigned Unicode script property value aliases,
  45. * to ensure that their script names are stable and match the names of the constants.
  46. * Script codes like Latf and Aran that are not subject to separate encoding
  47. * may be added at any time.
  48. *
  49. * @stable ICU 2.2
  50. */
  51. typedef enum UScriptCode {
  52. /*
  53. * Note: UScriptCode constants and their ISO script code comments
  54. * are parsed by preparseucd.py.
  55. * It matches lines like
  56. * USCRIPT_<Unicode Script value name> = <integer>, / * <ISO script code> * /
  57. */
  58. /** @stable ICU 2.2 */
  59. USCRIPT_INVALID_CODE = -1,
  60. /** @stable ICU 2.2 */
  61. USCRIPT_COMMON = 0, /* Zyyy */
  62. /** @stable ICU 2.2 */
  63. USCRIPT_INHERITED = 1, /* Zinh */ /* "Code for inherited script", for non-spacing combining marks; also Qaai */
  64. /** @stable ICU 2.2 */
  65. USCRIPT_ARABIC = 2, /* Arab */
  66. /** @stable ICU 2.2 */
  67. USCRIPT_ARMENIAN = 3, /* Armn */
  68. /** @stable ICU 2.2 */
  69. USCRIPT_BENGALI = 4, /* Beng */
  70. /** @stable ICU 2.2 */
  71. USCRIPT_BOPOMOFO = 5, /* Bopo */
  72. /** @stable ICU 2.2 */
  73. USCRIPT_CHEROKEE = 6, /* Cher */
  74. /** @stable ICU 2.2 */
  75. USCRIPT_COPTIC = 7, /* Copt */
  76. /** @stable ICU 2.2 */
  77. USCRIPT_CYRILLIC = 8, /* Cyrl */
  78. /** @stable ICU 2.2 */
  79. USCRIPT_DESERET = 9, /* Dsrt */
  80. /** @stable ICU 2.2 */
  81. USCRIPT_DEVANAGARI = 10, /* Deva */
  82. /** @stable ICU 2.2 */
  83. USCRIPT_ETHIOPIC = 11, /* Ethi */
  84. /** @stable ICU 2.2 */
  85. USCRIPT_GEORGIAN = 12, /* Geor */
  86. /** @stable ICU 2.2 */
  87. USCRIPT_GOTHIC = 13, /* Goth */
  88. /** @stable ICU 2.2 */
  89. USCRIPT_GREEK = 14, /* Grek */
  90. /** @stable ICU 2.2 */
  91. USCRIPT_GUJARATI = 15, /* Gujr */
  92. /** @stable ICU 2.2 */
  93. USCRIPT_GURMUKHI = 16, /* Guru */
  94. /** @stable ICU 2.2 */
  95. USCRIPT_HAN = 17, /* Hani */
  96. /** @stable ICU 2.2 */
  97. USCRIPT_HANGUL = 18, /* Hang */
  98. /** @stable ICU 2.2 */
  99. USCRIPT_HEBREW = 19, /* Hebr */
  100. /** @stable ICU 2.2 */
  101. USCRIPT_HIRAGANA = 20, /* Hira */
  102. /** @stable ICU 2.2 */
  103. USCRIPT_KANNADA = 21, /* Knda */
  104. /** @stable ICU 2.2 */
  105. USCRIPT_KATAKANA = 22, /* Kana */
  106. /** @stable ICU 2.2 */
  107. USCRIPT_KHMER = 23, /* Khmr */
  108. /** @stable ICU 2.2 */
  109. USCRIPT_LAO = 24, /* Laoo */
  110. /** @stable ICU 2.2 */
  111. USCRIPT_LATIN = 25, /* Latn */
  112. /** @stable ICU 2.2 */
  113. USCRIPT_MALAYALAM = 26, /* Mlym */
  114. /** @stable ICU 2.2 */
  115. USCRIPT_MONGOLIAN = 27, /* Mong */
  116. /** @stable ICU 2.2 */
  117. USCRIPT_MYANMAR = 28, /* Mymr */
  118. /** @stable ICU 2.2 */
  119. USCRIPT_OGHAM = 29, /* Ogam */
  120. /** @stable ICU 2.2 */
  121. USCRIPT_OLD_ITALIC = 30, /* Ital */
  122. /** @stable ICU 2.2 */
  123. USCRIPT_ORIYA = 31, /* Orya */
  124. /** @stable ICU 2.2 */
  125. USCRIPT_RUNIC = 32, /* Runr */
  126. /** @stable ICU 2.2 */
  127. USCRIPT_SINHALA = 33, /* Sinh */
  128. /** @stable ICU 2.2 */
  129. USCRIPT_SYRIAC = 34, /* Syrc */
  130. /** @stable ICU 2.2 */
  131. USCRIPT_TAMIL = 35, /* Taml */
  132. /** @stable ICU 2.2 */
  133. USCRIPT_TELUGU = 36, /* Telu */
  134. /** @stable ICU 2.2 */
  135. USCRIPT_THAANA = 37, /* Thaa */
  136. /** @stable ICU 2.2 */
  137. USCRIPT_THAI = 38, /* Thai */
  138. /** @stable ICU 2.2 */
  139. USCRIPT_TIBETAN = 39, /* Tibt */
  140. /** Canadian_Aboriginal script. @stable ICU 2.6 */
  141. USCRIPT_CANADIAN_ABORIGINAL = 40, /* Cans */
  142. /** Canadian_Aboriginal script (alias). @stable ICU 2.2 */
  143. USCRIPT_UCAS = USCRIPT_CANADIAN_ABORIGINAL,
  144. /** @stable ICU 2.2 */
  145. USCRIPT_YI = 41, /* Yiii */
  146. /* New scripts in Unicode 3.2 */
  147. /** @stable ICU 2.2 */
  148. USCRIPT_TAGALOG = 42, /* Tglg */
  149. /** @stable ICU 2.2 */
  150. USCRIPT_HANUNOO = 43, /* Hano */
  151. /** @stable ICU 2.2 */
  152. USCRIPT_BUHID = 44, /* Buhd */
  153. /** @stable ICU 2.2 */
  154. USCRIPT_TAGBANWA = 45, /* Tagb */
  155. /* New scripts in Unicode 4 */
  156. /** @stable ICU 2.6 */
  157. USCRIPT_BRAILLE = 46, /* Brai */
  158. /** @stable ICU 2.6 */
  159. USCRIPT_CYPRIOT = 47, /* Cprt */
  160. /** @stable ICU 2.6 */
  161. USCRIPT_LIMBU = 48, /* Limb */
  162. /** @stable ICU 2.6 */
  163. USCRIPT_LINEAR_B = 49, /* Linb */
  164. /** @stable ICU 2.6 */
  165. USCRIPT_OSMANYA = 50, /* Osma */
  166. /** @stable ICU 2.6 */
  167. USCRIPT_SHAVIAN = 51, /* Shaw */
  168. /** @stable ICU 2.6 */
  169. USCRIPT_TAI_LE = 52, /* Tale */
  170. /** @stable ICU 2.6 */
  171. USCRIPT_UGARITIC = 53, /* Ugar */
  172. /** New script code in Unicode 4.0.1 @stable ICU 3.0 */
  173. USCRIPT_KATAKANA_OR_HIRAGANA = 54,/*Hrkt */
  174. /* New scripts in Unicode 4.1 */
  175. /** @stable ICU 3.4 */
  176. USCRIPT_BUGINESE = 55, /* Bugi */
  177. /** @stable ICU 3.4 */
  178. USCRIPT_GLAGOLITIC = 56, /* Glag */
  179. /** @stable ICU 3.4 */
  180. USCRIPT_KHAROSHTHI = 57, /* Khar */
  181. /** @stable ICU 3.4 */
  182. USCRIPT_SYLOTI_NAGRI = 58, /* Sylo */
  183. /** @stable ICU 3.4 */
  184. USCRIPT_NEW_TAI_LUE = 59, /* Talu */
  185. /** @stable ICU 3.4 */
  186. USCRIPT_TIFINAGH = 60, /* Tfng */
  187. /** @stable ICU 3.4 */
  188. USCRIPT_OLD_PERSIAN = 61, /* Xpeo */
  189. /* New script codes from Unicode and ISO 15924 */
  190. /** @stable ICU 3.6 */
  191. USCRIPT_BALINESE = 62, /* Bali */
  192. /** @stable ICU 3.6 */
  193. USCRIPT_BATAK = 63, /* Batk */
  194. /** @stable ICU 3.6 */
  195. USCRIPT_BLISSYMBOLS = 64, /* Blis */
  196. /** @stable ICU 3.6 */
  197. USCRIPT_BRAHMI = 65, /* Brah */
  198. /** @stable ICU 3.6 */
  199. USCRIPT_CHAM = 66, /* Cham */
  200. /** @stable ICU 3.6 */
  201. USCRIPT_CIRTH = 67, /* Cirt */
  202. /** @stable ICU 3.6 */
  203. USCRIPT_OLD_CHURCH_SLAVONIC_CYRILLIC = 68, /* Cyrs */
  204. /** @stable ICU 3.6 */
  205. USCRIPT_DEMOTIC_EGYPTIAN = 69, /* Egyd */
  206. /** @stable ICU 3.6 */
  207. USCRIPT_HIERATIC_EGYPTIAN = 70, /* Egyh */
  208. /** @stable ICU 3.6 */
  209. USCRIPT_EGYPTIAN_HIEROGLYPHS = 71, /* Egyp */
  210. /** @stable ICU 3.6 */
  211. USCRIPT_KHUTSURI = 72, /* Geok */
  212. /** @stable ICU 3.6 */
  213. USCRIPT_SIMPLIFIED_HAN = 73, /* Hans */
  214. /** @stable ICU 3.6 */
  215. USCRIPT_TRADITIONAL_HAN = 74, /* Hant */
  216. /** @stable ICU 3.6 */
  217. USCRIPT_PAHAWH_HMONG = 75, /* Hmng */
  218. /** @stable ICU 3.6 */
  219. USCRIPT_OLD_HUNGARIAN = 76, /* Hung */
  220. /** @stable ICU 3.6 */
  221. USCRIPT_HARAPPAN_INDUS = 77, /* Inds */
  222. /** @stable ICU 3.6 */
  223. USCRIPT_JAVANESE = 78, /* Java */
  224. /** @stable ICU 3.6 */
  225. USCRIPT_KAYAH_LI = 79, /* Kali */
  226. /** @stable ICU 3.6 */
  227. USCRIPT_LATIN_FRAKTUR = 80, /* Latf */
  228. /** @stable ICU 3.6 */
  229. USCRIPT_LATIN_GAELIC = 81, /* Latg */
  230. /** @stable ICU 3.6 */
  231. USCRIPT_LEPCHA = 82, /* Lepc */
  232. /** @stable ICU 3.6 */
  233. USCRIPT_LINEAR_A = 83, /* Lina */
  234. /** @stable ICU 4.6 */
  235. USCRIPT_MANDAIC = 84, /* Mand */
  236. /** @stable ICU 3.6 */
  237. USCRIPT_MANDAEAN = USCRIPT_MANDAIC,
  238. /** @stable ICU 3.6 */
  239. USCRIPT_MAYAN_HIEROGLYPHS = 85, /* Maya */
  240. /** @stable ICU 4.6 */
  241. USCRIPT_MEROITIC_HIEROGLYPHS = 86, /* Mero */
  242. /** @stable ICU 3.6 */
  243. USCRIPT_MEROITIC = USCRIPT_MEROITIC_HIEROGLYPHS,
  244. /** @stable ICU 3.6 */
  245. USCRIPT_NKO = 87, /* Nkoo */
  246. /** @stable ICU 3.6 */
  247. USCRIPT_ORKHON = 88, /* Orkh */
  248. /** @stable ICU 3.6 */
  249. USCRIPT_OLD_PERMIC = 89, /* Perm */
  250. /** @stable ICU 3.6 */
  251. USCRIPT_PHAGS_PA = 90, /* Phag */
  252. /** @stable ICU 3.6 */
  253. USCRIPT_PHOENICIAN = 91, /* Phnx */
  254. /** @stable ICU 52 */
  255. USCRIPT_MIAO = 92, /* Plrd */
  256. /** @stable ICU 3.6 */
  257. USCRIPT_PHONETIC_POLLARD = USCRIPT_MIAO,
  258. /** @stable ICU 3.6 */
  259. USCRIPT_RONGORONGO = 93, /* Roro */
  260. /** @stable ICU 3.6 */
  261. USCRIPT_SARATI = 94, /* Sara */
  262. /** @stable ICU 3.6 */
  263. USCRIPT_ESTRANGELO_SYRIAC = 95, /* Syre */
  264. /** @stable ICU 3.6 */
  265. USCRIPT_WESTERN_SYRIAC = 96, /* Syrj */
  266. /** @stable ICU 3.6 */
  267. USCRIPT_EASTERN_SYRIAC = 97, /* Syrn */
  268. /** @stable ICU 3.6 */
  269. USCRIPT_TENGWAR = 98, /* Teng */
  270. /** @stable ICU 3.6 */
  271. USCRIPT_VAI = 99, /* Vaii */
  272. /** @stable ICU 3.6 */
  273. USCRIPT_VISIBLE_SPEECH = 100,/* Visp */
  274. /** @stable ICU 3.6 */
  275. USCRIPT_CUNEIFORM = 101,/* Xsux */
  276. /** @stable ICU 3.6 */
  277. USCRIPT_UNWRITTEN_LANGUAGES = 102,/* Zxxx */
  278. /** @stable ICU 3.6 */
  279. USCRIPT_UNKNOWN = 103,/* Zzzz */ /* Unknown="Code for uncoded script", for unassigned code points */
  280. /** @stable ICU 3.8 */
  281. USCRIPT_CARIAN = 104,/* Cari */
  282. /** @stable ICU 3.8 */
  283. USCRIPT_JAPANESE = 105,/* Jpan */
  284. /** @stable ICU 3.8 */
  285. USCRIPT_LANNA = 106,/* Lana */
  286. /** @stable ICU 3.8 */
  287. USCRIPT_LYCIAN = 107,/* Lyci */
  288. /** @stable ICU 3.8 */
  289. USCRIPT_LYDIAN = 108,/* Lydi */
  290. /** @stable ICU 3.8 */
  291. USCRIPT_OL_CHIKI = 109,/* Olck */
  292. /** @stable ICU 3.8 */
  293. USCRIPT_REJANG = 110,/* Rjng */
  294. /** @stable ICU 3.8 */
  295. USCRIPT_SAURASHTRA = 111,/* Saur */
  296. /** Sutton SignWriting @stable ICU 3.8 */
  297. USCRIPT_SIGN_WRITING = 112,/* Sgnw */
  298. /** @stable ICU 3.8 */
  299. USCRIPT_SUNDANESE = 113,/* Sund */
  300. /** @stable ICU 3.8 */
  301. USCRIPT_MOON = 114,/* Moon */
  302. /** @stable ICU 3.8 */
  303. USCRIPT_MEITEI_MAYEK = 115,/* Mtei */
  304. /** @stable ICU 4.0 */
  305. USCRIPT_IMPERIAL_ARAMAIC = 116,/* Armi */
  306. /** @stable ICU 4.0 */
  307. USCRIPT_AVESTAN = 117,/* Avst */
  308. /** @stable ICU 4.0 */
  309. USCRIPT_CHAKMA = 118,/* Cakm */
  310. /** @stable ICU 4.0 */
  311. USCRIPT_KOREAN = 119,/* Kore */
  312. /** @stable ICU 4.0 */
  313. USCRIPT_KAITHI = 120,/* Kthi */
  314. /** @stable ICU 4.0 */
  315. USCRIPT_MANICHAEAN = 121,/* Mani */
  316. /** @stable ICU 4.0 */
  317. USCRIPT_INSCRIPTIONAL_PAHLAVI = 122,/* Phli */
  318. /** @stable ICU 4.0 */
  319. USCRIPT_PSALTER_PAHLAVI = 123,/* Phlp */
  320. /** @stable ICU 4.0 */
  321. USCRIPT_BOOK_PAHLAVI = 124,/* Phlv */
  322. /** @stable ICU 4.0 */
  323. USCRIPT_INSCRIPTIONAL_PARTHIAN = 125,/* Prti */
  324. /** @stable ICU 4.0 */
  325. USCRIPT_SAMARITAN = 126,/* Samr */
  326. /** @stable ICU 4.0 */
  327. USCRIPT_TAI_VIET = 127,/* Tavt */
  328. /** @stable ICU 4.0 */
  329. USCRIPT_MATHEMATICAL_NOTATION = 128,/* Zmth */
  330. /** @stable ICU 4.0 */
  331. USCRIPT_SYMBOLS = 129,/* Zsym */
  332. /** @stable ICU 4.4 */
  333. USCRIPT_BAMUM = 130,/* Bamu */
  334. /** @stable ICU 4.4 */
  335. USCRIPT_LISU = 131,/* Lisu */
  336. /** @stable ICU 4.4 */
  337. USCRIPT_NAKHI_GEBA = 132,/* Nkgb */
  338. /** @stable ICU 4.4 */
  339. USCRIPT_OLD_SOUTH_ARABIAN = 133,/* Sarb */
  340. /** @stable ICU 4.6 */
  341. USCRIPT_BASSA_VAH = 134,/* Bass */
  342. /** @stable ICU 54 */
  343. USCRIPT_DUPLOYAN = 135,/* Dupl */
  344. #ifndef U_HIDE_DEPRECATED_API
  345. /** @deprecated ICU 54 Typo, use USCRIPT_DUPLOYAN */
  346. USCRIPT_DUPLOYAN_SHORTAND = USCRIPT_DUPLOYAN,
  347. #endif /* U_HIDE_DEPRECATED_API */
  348. /** @stable ICU 4.6 */
  349. USCRIPT_ELBASAN = 136,/* Elba */
  350. /** @stable ICU 4.6 */
  351. USCRIPT_GRANTHA = 137,/* Gran */
  352. /** @stable ICU 4.6 */
  353. USCRIPT_KPELLE = 138,/* Kpel */
  354. /** @stable ICU 4.6 */
  355. USCRIPT_LOMA = 139,/* Loma */
  356. /** Mende Kikakui @stable ICU 4.6 */
  357. USCRIPT_MENDE = 140,/* Mend */
  358. /** @stable ICU 4.6 */
  359. USCRIPT_MEROITIC_CURSIVE = 141,/* Merc */
  360. /** @stable ICU 4.6 */
  361. USCRIPT_OLD_NORTH_ARABIAN = 142,/* Narb */
  362. /** @stable ICU 4.6 */
  363. USCRIPT_NABATAEAN = 143,/* Nbat */
  364. /** @stable ICU 4.6 */
  365. USCRIPT_PALMYRENE = 144,/* Palm */
  366. /** @stable ICU 54 */
  367. USCRIPT_KHUDAWADI = 145,/* Sind */
  368. /** @stable ICU 4.6 */
  369. USCRIPT_SINDHI = USCRIPT_KHUDAWADI,
  370. /** @stable ICU 4.6 */
  371. USCRIPT_WARANG_CITI = 146,/* Wara */
  372. /** @stable ICU 4.8 */
  373. USCRIPT_AFAKA = 147,/* Afak */
  374. /** @stable ICU 4.8 */
  375. USCRIPT_JURCHEN = 148,/* Jurc */
  376. /** @stable ICU 4.8 */
  377. USCRIPT_MRO = 149,/* Mroo */
  378. /** @stable ICU 4.8 */
  379. USCRIPT_NUSHU = 150,/* Nshu */
  380. /** @stable ICU 4.8 */
  381. USCRIPT_SHARADA = 151,/* Shrd */
  382. /** @stable ICU 4.8 */
  383. USCRIPT_SORA_SOMPENG = 152,/* Sora */
  384. /** @stable ICU 4.8 */
  385. USCRIPT_TAKRI = 153,/* Takr */
  386. /** @stable ICU 4.8 */
  387. USCRIPT_TANGUT = 154,/* Tang */
  388. /** @stable ICU 4.8 */
  389. USCRIPT_WOLEAI = 155,/* Wole */
  390. /** @stable ICU 49 */
  391. USCRIPT_ANATOLIAN_HIEROGLYPHS = 156,/* Hluw */
  392. /** @stable ICU 49 */
  393. USCRIPT_KHOJKI = 157,/* Khoj */
  394. /** @stable ICU 49 */
  395. USCRIPT_TIRHUTA = 158,/* Tirh */
  396. /** @stable ICU 52 */
  397. USCRIPT_CAUCASIAN_ALBANIAN = 159,/* Aghb */
  398. /** @stable ICU 52 */
  399. USCRIPT_MAHAJANI = 160,/* Mahj */
  400. /** @stable ICU 54 */
  401. USCRIPT_AHOM = 161,/* Ahom */
  402. /** @stable ICU 54 */
  403. USCRIPT_HATRAN = 162,/* Hatr */
  404. /** @stable ICU 54 */
  405. USCRIPT_MODI = 163,/* Modi */
  406. /** @stable ICU 54 */
  407. USCRIPT_MULTANI = 164,/* Mult */
  408. /** @stable ICU 54 */
  409. USCRIPT_PAU_CIN_HAU = 165,/* Pauc */
  410. /** @stable ICU 54 */
  411. USCRIPT_SIDDHAM = 166,/* Sidd */
  412. /** @stable ICU 58 */
  413. USCRIPT_ADLAM = 167,/* Adlm */
  414. /** @stable ICU 58 */
  415. USCRIPT_BHAIKSUKI = 168,/* Bhks */
  416. /** @stable ICU 58 */
  417. USCRIPT_MARCHEN = 169,/* Marc */
  418. /** @stable ICU 58 */
  419. USCRIPT_NEWA = 170,/* Newa */
  420. /** @stable ICU 58 */
  421. USCRIPT_OSAGE = 171,/* Osge */
  422. /** @stable ICU 58 */
  423. USCRIPT_HAN_WITH_BOPOMOFO = 172,/* Hanb */
  424. /** @stable ICU 58 */
  425. USCRIPT_JAMO = 173,/* Jamo */
  426. /** @stable ICU 58 */
  427. USCRIPT_SYMBOLS_EMOJI = 174,/* Zsye */
  428. /** @stable ICU 60 */
  429. USCRIPT_MASARAM_GONDI = 175,/* Gonm */
  430. /** @stable ICU 60 */
  431. USCRIPT_SOYOMBO = 176,/* Soyo */
  432. /** @stable ICU 60 */
  433. USCRIPT_ZANABAZAR_SQUARE = 177,/* Zanb */
  434. /** @stable ICU 62 */
  435. USCRIPT_DOGRA = 178,/* Dogr */
  436. /** @stable ICU 62 */
  437. USCRIPT_GUNJALA_GONDI = 179,/* Gong */
  438. /** @stable ICU 62 */
  439. USCRIPT_MAKASAR = 180,/* Maka */
  440. /** @stable ICU 62 */
  441. USCRIPT_MEDEFAIDRIN = 181,/* Medf */
  442. /** @stable ICU 62 */
  443. USCRIPT_HANIFI_ROHINGYA = 182,/* Rohg */
  444. /** @stable ICU 62 */
  445. USCRIPT_SOGDIAN = 183,/* Sogd */
  446. /** @stable ICU 62 */
  447. USCRIPT_OLD_SOGDIAN = 184,/* Sogo */
  448. /** @stable ICU 64 */
  449. USCRIPT_ELYMAIC = 185,/* Elym */
  450. /** @stable ICU 64 */
  451. USCRIPT_NYIAKENG_PUACHUE_HMONG = 186,/* Hmnp */
  452. /** @stable ICU 64 */
  453. USCRIPT_NANDINAGARI = 187,/* Nand */
  454. /** @stable ICU 64 */
  455. USCRIPT_WANCHO = 188,/* Wcho */
  456. /** @stable ICU 66 */
  457. USCRIPT_CHORASMIAN = 189,/* Chrs */
  458. /** @stable ICU 66 */
  459. USCRIPT_DIVES_AKURU = 190,/* Diak */
  460. /** @stable ICU 66 */
  461. USCRIPT_KHITAN_SMALL_SCRIPT = 191,/* Kits */
  462. /** @stable ICU 66 */
  463. USCRIPT_YEZIDI = 192,/* Yezi */
  464. /** @stable ICU 70 */
  465. USCRIPT_CYPRO_MINOAN = 193,/* Cpmn */
  466. /** @stable ICU 70 */
  467. USCRIPT_OLD_UYGHUR = 194,/* Ougr */
  468. /** @stable ICU 70 */
  469. USCRIPT_TANGSA = 195,/* Tnsa */
  470. /** @stable ICU 70 */
  471. USCRIPT_TOTO = 196,/* Toto */
  472. /** @stable ICU 70 */
  473. USCRIPT_VITHKUQI = 197,/* Vith */
  474. /** @stable ICU 72 */
  475. USCRIPT_KAWI = 198,/* Kawi */
  476. /** @stable ICU 72 */
  477. USCRIPT_NAG_MUNDARI = 199,/* Nagm */
  478. /** @stable ICU 75 */
  479. USCRIPT_ARABIC_NASTALIQ = 200, /* Aran */
  480. /** @stable ICU 76 */
  481. USCRIPT_GARAY = 201, /* Gara */
  482. /** @stable ICU 76 */
  483. USCRIPT_GURUNG_KHEMA = 202, /* Gukh */
  484. /** @stable ICU 76 */
  485. USCRIPT_KIRAT_RAI = 203, /* Krai */
  486. /** @stable ICU 76 */
  487. USCRIPT_OL_ONAL = 204, /* Onao */
  488. /** @stable ICU 76 */
  489. USCRIPT_SUNUWAR = 205, /* Sunu */
  490. /** @stable ICU 76 */
  491. USCRIPT_TODHRI = 206, /* Todr */
  492. /** @stable ICU 76 */
  493. USCRIPT_TULU_TIGALARI = 207, /* Tutg */
  494. #ifndef U_HIDE_DEPRECATED_API
  495. /**
  496. * One more than the highest normal UScriptCode value.
  497. * The highest value is available via u_getIntPropertyMaxValue(UCHAR_SCRIPT).
  498. *
  499. * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
  500. */
  501. USCRIPT_CODE_LIMIT = 208
  502. #endif // U_HIDE_DEPRECATED_API
  503. } UScriptCode;
  504. /**
  505. * Gets the script codes associated with the given locale or ISO 15924 abbreviation or name.
  506. * Fills in USCRIPT_MALAYALAM given "Malayam" OR "Mlym".
  507. * Fills in USCRIPT_LATIN given "en" OR "en_US"
  508. * If the required capacity is greater than the capacity of the destination buffer,
  509. * then the error code is set to U_BUFFER_OVERFLOW_ERROR and the required capacity is returned.
  510. *
  511. * <p>Note: To search by short or long script alias only, use
  512. * u_getPropertyValueEnum(UCHAR_SCRIPT, alias) instead. That does
  513. * a fast lookup with no access of the locale data.
  514. *
  515. * @param nameOrAbbrOrLocale name of the script, as given in
  516. * PropertyValueAliases.txt, or ISO 15924 code or locale
  517. * @param fillIn the UScriptCode buffer to fill in the script code
  518. * @param capacity the capacity (size) of UScriptCode buffer passed in.
  519. * @param err the error status code.
  520. * @return The number of script codes filled in the buffer passed in
  521. * @stable ICU 2.4
  522. */
  523. U_CAPI int32_t U_EXPORT2
  524. uscript_getCode(const char* nameOrAbbrOrLocale,UScriptCode* fillIn,int32_t capacity,UErrorCode *err);
  525. /**
  526. * Returns the long Unicode script name, if there is one.
  527. * Otherwise returns the 4-letter ISO 15924 script code.
  528. * Returns "Malayam" given USCRIPT_MALAYALAM.
  529. *
  530. * @param scriptCode UScriptCode enum
  531. * @return long script name as given in PropertyValueAliases.txt, or the 4-letter code,
  532. * or NULL if scriptCode is invalid
  533. * @stable ICU 2.4
  534. */
  535. U_CAPI const char* U_EXPORT2
  536. uscript_getName(UScriptCode scriptCode);
  537. /**
  538. * Returns the 4-letter ISO 15924 script code,
  539. * which is the same as the short Unicode script name if Unicode has names for the script.
  540. * Returns "Mlym" given USCRIPT_MALAYALAM.
  541. *
  542. * @param scriptCode UScriptCode enum
  543. * @return short script name (4-letter code), or NULL if scriptCode is invalid
  544. * @stable ICU 2.4
  545. */
  546. U_CAPI const char* U_EXPORT2
  547. uscript_getShortName(UScriptCode scriptCode);
  548. /**
  549. * Gets the script code associated with the given codepoint.
  550. * Returns USCRIPT_MALAYALAM given 0x0D02
  551. * @param codepoint UChar32 codepoint
  552. * @param err the error status code.
  553. * @return The UScriptCode, or 0 if codepoint is invalid
  554. * @stable ICU 2.4
  555. */
  556. U_CAPI UScriptCode U_EXPORT2
  557. uscript_getScript(UChar32 codepoint, UErrorCode *err);
  558. /**
  559. * Do the Script_Extensions of code point c contain script sc?
  560. * If c does not have explicit Script_Extensions, then this tests whether
  561. * c has the Script property value sc.
  562. *
  563. * Some characters are commonly used in multiple scripts.
  564. * For more information, see UAX #24: http://www.unicode.org/reports/tr24/.
  565. * @param c code point
  566. * @param sc script code
  567. * @return true if sc is in Script_Extensions(c)
  568. * @stable ICU 49
  569. */
  570. U_CAPI UBool U_EXPORT2
  571. uscript_hasScript(UChar32 c, UScriptCode sc);
  572. /**
  573. * Writes code point c's Script_Extensions as a list of UScriptCode values
  574. * to the output scripts array and returns the number of script codes.
  575. * - If c does have Script_Extensions, then the Script property value
  576. * (normally Common or Inherited) is not included.
  577. * - If c does not have Script_Extensions, then the one Script code is written to the output array.
  578. * - If c is not a valid code point, then the one USCRIPT_UNKNOWN code is written.
  579. * In other words, if the return value is 1,
  580. * then the output array contains exactly c's single Script code.
  581. * If the return value is n>=2, then the output array contains c's n Script_Extensions script codes.
  582. *
  583. * Some characters are commonly used in multiple scripts.
  584. * For more information, see UAX #24: http://www.unicode.org/reports/tr24/.
  585. *
  586. * If there are more than capacity script codes to be written, then
  587. * U_BUFFER_OVERFLOW_ERROR is set and the number of Script_Extensions is returned.
  588. * (Usual ICU buffer handling behavior.)
  589. *
  590. * @param c code point
  591. * @param scripts output script code array
  592. * @param capacity capacity of the scripts array
  593. * @param errorCode Standard ICU error code. Its input value must
  594. * pass the U_SUCCESS() test, or else the function returns
  595. * immediately. Check for U_FAILURE() on output or use with
  596. * function chaining. (See User Guide for details.)
  597. * @return number of script codes in c's Script_Extensions, or 1 for the single Script value,
  598. * written to scripts unless U_BUFFER_OVERFLOW_ERROR indicates insufficient capacity
  599. * @stable ICU 49
  600. */
  601. U_CAPI int32_t U_EXPORT2
  602. uscript_getScriptExtensions(UChar32 c,
  603. UScriptCode *scripts, int32_t capacity,
  604. UErrorCode *errorCode);
  605. /**
  606. * Script usage constants.
  607. * See UAX #31 Unicode Identifier and Pattern Syntax.
  608. * http://www.unicode.org/reports/tr31/#Table_Candidate_Characters_for_Exclusion_from_Identifiers
  609. *
  610. * @stable ICU 51
  611. */
  612. typedef enum UScriptUsage {
  613. /** Not encoded in Unicode. @stable ICU 51 */
  614. USCRIPT_USAGE_NOT_ENCODED,
  615. /** Unknown script usage. @stable ICU 51 */
  616. USCRIPT_USAGE_UNKNOWN,
  617. /** Candidate for Exclusion from Identifiers. @stable ICU 51 */
  618. USCRIPT_USAGE_EXCLUDED,
  619. /** Limited Use script. @stable ICU 51 */
  620. USCRIPT_USAGE_LIMITED_USE,
  621. /** Aspirational Use script. @stable ICU 51 */
  622. USCRIPT_USAGE_ASPIRATIONAL,
  623. /** Recommended script. @stable ICU 51 */
  624. USCRIPT_USAGE_RECOMMENDED
  625. } UScriptUsage;
  626. /**
  627. * Writes the script sample character string.
  628. * This string normally consists of one code point but might be longer.
  629. * The string is empty if the script is not encoded.
  630. *
  631. * @param script script code
  632. * @param dest output string array
  633. * @param capacity number of UChars in the dest array
  634. * @param pErrorCode standard ICU in/out error code, must pass U_SUCCESS() on input
  635. * @return the string length, even if U_BUFFER_OVERFLOW_ERROR
  636. * @stable ICU 51
  637. */
  638. U_CAPI int32_t U_EXPORT2
  639. uscript_getSampleString(UScriptCode script, UChar *dest, int32_t capacity, UErrorCode *pErrorCode);
  640. #if U_SHOW_CPLUSPLUS_API
  641. U_NAMESPACE_BEGIN
  642. class UnicodeString;
  643. U_NAMESPACE_END
  644. /**
  645. * Returns the script sample character string.
  646. * This string normally consists of one code point but might be longer.
  647. * The string is empty if the script is not encoded.
  648. *
  649. * @param script script code
  650. * @return the sample character string
  651. * @stable ICU 51
  652. */
  653. U_COMMON_API icu::UnicodeString U_EXPORT2
  654. uscript_getSampleUnicodeString(UScriptCode script);
  655. #endif
  656. /**
  657. * Returns the script usage according to UAX #31 Unicode Identifier and Pattern Syntax.
  658. * Returns USCRIPT_USAGE_NOT_ENCODED if the script is not encoded in Unicode.
  659. *
  660. * @param script script code
  661. * @return script usage
  662. * @see UScriptUsage
  663. * @stable ICU 51
  664. */
  665. U_CAPI UScriptUsage U_EXPORT2
  666. uscript_getUsage(UScriptCode script);
  667. /**
  668. * Returns true if the script is written right-to-left.
  669. * For example, Arab and Hebr.
  670. *
  671. * @param script script code
  672. * @return true if the script is right-to-left
  673. * @stable ICU 51
  674. */
  675. U_CAPI UBool U_EXPORT2
  676. uscript_isRightToLeft(UScriptCode script);
  677. /**
  678. * Returns true if the script allows line breaks between letters (excluding hyphenation).
  679. * Such a script typically requires dictionary-based line breaking.
  680. * For example, Hani and Thai.
  681. *
  682. * @param script script code
  683. * @return true if the script allows line breaks between letters
  684. * @stable ICU 51
  685. */
  686. U_CAPI UBool U_EXPORT2
  687. uscript_breaksBetweenLetters(UScriptCode script);
  688. /**
  689. * Returns true if in modern (or most recent) usage of the script case distinctions are customary.
  690. * For example, Latn and Cyrl.
  691. *
  692. * @param script script code
  693. * @return true if the script is cased
  694. * @stable ICU 51
  695. */
  696. U_CAPI UBool U_EXPORT2
  697. uscript_isCased(UScriptCode script);
  698. #endif