locale.py 77 KB


  1. """Locale support module.
  2. The module provides low-level access to the C lib's locale APIs and adds high
  3. level number formatting APIs as well as a locale aliasing engine to complement
  4. these.
  5. The aliasing engine includes support for many commonly used locale names and
  6. maps them to values suitable for passing to the C lib's setlocale() function. It
  7. also includes default encodings for all supported locale names.
  8. """
  9. import sys
  10. import encodings
  11. import encodings.aliases
  12. import re
  13. import _collections_abc
  14. from builtins import str as _builtin_str
  15. import functools
  16. # Try importing the _locale module.
  17. #
  18. # If this fails, fall back on a basic 'C' locale emulation.
  19. # Yuck: LC_MESSAGES is non-standard: can't tell whether it exists before
  20. # trying the import. So __all__ is also fiddled at the end of the file.
  21. __all__ = ["getlocale", "getdefaultlocale", "getpreferredencoding", "Error",
  22. "setlocale", "resetlocale", "localeconv", "strcoll", "strxfrm",
  23. "str", "atof", "atoi", "format_string", "currency",
  24. "normalize", "LC_CTYPE", "LC_COLLATE", "LC_TIME", "LC_MONETARY",
  25. "LC_NUMERIC", "LC_ALL", "CHAR_MAX", "getencoding"]
  26. def _strcoll(a,b):
  27. """ strcoll(string,string) -> int.
  28. Compares two strings according to the locale.
  29. """
  30. return (a > b) - (a < b)
  31. def _strxfrm(s):
  32. """ strxfrm(string) -> string.
  33. Returns a string that behaves for cmp locale-aware.
  34. """
  35. return s
  36. try:
  37. from _locale import *
  38. except ImportError:
  39. # Locale emulation
  40. CHAR_MAX = 127
  41. LC_ALL = 6
  42. LC_COLLATE = 3
  43. LC_CTYPE = 0
  44. LC_MESSAGES = 5
  45. LC_MONETARY = 4
  46. LC_NUMERIC = 1
  47. LC_TIME = 2
  48. Error = ValueError
  49. def localeconv():
  50. """ localeconv() -> dict.
  51. Returns numeric and monetary locale-specific parameters.
  52. """
  53. # 'C' locale default values
  54. return {'grouping': [127],
  55. 'currency_symbol': '',
  56. 'n_sign_posn': 127,
  57. 'p_cs_precedes': 127,
  58. 'n_cs_precedes': 127,
  59. 'mon_grouping': [],
  60. 'n_sep_by_space': 127,
  61. 'decimal_point': '.',
  62. 'negative_sign': '',
  63. 'positive_sign': '',
  64. 'p_sep_by_space': 127,
  65. 'int_curr_symbol': '',
  66. 'p_sign_posn': 127,
  67. 'thousands_sep': '',
  68. 'mon_thousands_sep': '',
  69. 'frac_digits': 127,
  70. 'mon_decimal_point': '',
  71. 'int_frac_digits': 127}
  72. def setlocale(category, value=None):
  73. """ setlocale(integer,string=None) -> string.
  74. Activates/queries locale processing.
  75. """
  76. if value not in (None, '', 'C'):
  77. raise Error('_locale emulation only supports "C" locale')
  78. return 'C'
  79. # These may or may not exist in _locale, so be sure to set them.
  80. if 'strxfrm' not in globals():
  81. strxfrm = _strxfrm
  82. if 'strcoll' not in globals():
  83. strcoll = _strcoll
  84. _localeconv = localeconv
  85. # With this dict, you can override some items of localeconv's return value.
  86. # This is useful for testing purposes.
  87. _override_localeconv = {}
  88. @functools.wraps(_localeconv)
  89. def localeconv():
  90. d = _localeconv()
  91. if _override_localeconv:
  92. d.update(_override_localeconv)
  93. return d
  94. ### Number formatting APIs
  95. # Author: Martin von Loewis
  96. # improved by Georg Brandl
  97. # Iterate over grouping intervals
  98. def _grouping_intervals(grouping):
  99. last_interval = None
  100. for interval in grouping:
  101. # if grouping is -1, we are done
  102. if interval == CHAR_MAX:
  103. return
  104. # 0: re-use last group ad infinitum
  105. if interval == 0:
  106. if last_interval is None:
  107. raise ValueError("invalid grouping")
  108. while True:
  109. yield last_interval
  110. yield interval
  111. last_interval = interval
  112. #perform the grouping from right to left
  113. def _group(s, monetary=False):
  114. conv = localeconv()
  115. thousands_sep = conv[monetary and 'mon_thousands_sep' or 'thousands_sep']
  116. grouping = conv[monetary and 'mon_grouping' or 'grouping']
  117. if not grouping:
  118. return (s, 0)
  119. if s[-1] == ' ':
  120. stripped = s.rstrip()
  121. right_spaces = s[len(stripped):]
  122. s = stripped
  123. else:
  124. right_spaces = ''
  125. left_spaces = ''
  126. groups = []
  127. for interval in _grouping_intervals(grouping):
  128. if not s or s[-1] not in "0123456789":
  129. # only non-digit characters remain (sign, spaces)
  130. left_spaces = s
  131. s = ''
  132. break
  133. groups.append(s[-interval:])
  134. s = s[:-interval]
  135. if s:
  136. groups.append(s)
  137. groups.reverse()
  138. return (
  139. left_spaces + thousands_sep.join(groups) + right_spaces,
  140. len(thousands_sep) * (len(groups) - 1)
  141. )
  142. # Strip a given amount of excess padding from the given string
  143. def _strip_padding(s, amount):
  144. lpos = 0
  145. while amount and s[lpos] == ' ':
  146. lpos += 1
  147. amount -= 1
  148. rpos = len(s) - 1
  149. while amount and s[rpos] == ' ':
  150. rpos -= 1
  151. amount -= 1
  152. return s[lpos:rpos+1]
  153. _percent_re = re.compile(r'%(?:\((?P<key>.*?)\))?'
  154. r'(?P<modifiers>[-#0-9 +*.hlL]*?)[eEfFgGdiouxXcrs%]')
  155. def _format(percent, value, grouping=False, monetary=False, *additional):
  156. if additional:
  157. formatted = percent % ((value,) + additional)
  158. else:
  159. formatted = percent % value
  160. if percent[-1] in 'eEfFgGdiu':
  161. formatted = _localize(formatted, grouping, monetary)
  162. return formatted
  163. # Transform formatted as locale number according to the locale settings
  164. def _localize(formatted, grouping=False, monetary=False):
  165. # floats and decimal ints need special action!
  166. if '.' in formatted:
  167. seps = 0
  168. parts = formatted.split('.')
  169. if grouping:
  170. parts[0], seps = _group(parts[0], monetary=monetary)
  171. decimal_point = localeconv()[monetary and 'mon_decimal_point'
  172. or 'decimal_point']
  173. formatted = decimal_point.join(parts)
  174. if seps:
  175. formatted = _strip_padding(formatted, seps)
  176. else:
  177. seps = 0
  178. if grouping:
  179. formatted, seps = _group(formatted, monetary=monetary)
  180. if seps:
  181. formatted = _strip_padding(formatted, seps)
  182. return formatted
  183. def format_string(f, val, grouping=False, monetary=False):
  184. """Formats a string in the same way that the % formatting would use,
  185. but takes the current locale into account.
  186. Grouping is applied if the third parameter is true.
  187. Conversion uses monetary thousands separator and grouping strings if
  188. forth parameter monetary is true."""
  189. percents = list(_percent_re.finditer(f))
  190. new_f = _percent_re.sub('%s', f)
  191. if isinstance(val, _collections_abc.Mapping):
  192. new_val = []
  193. for perc in percents:
  194. if perc.group()[-1]=='%':
  195. new_val.append('%')
  196. else:
  197. new_val.append(_format(perc.group(), val, grouping, monetary))
  198. else:
  199. if not isinstance(val, tuple):
  200. val = (val,)
  201. new_val = []
  202. i = 0
  203. for perc in percents:
  204. if perc.group()[-1]=='%':
  205. new_val.append('%')
  206. else:
  207. starcount = perc.group('modifiers').count('*')
  208. new_val.append(_format(perc.group(),
  209. val[i],
  210. grouping,
  211. monetary,
  212. *val[i+1:i+1+starcount]))
  213. i += (1 + starcount)
  214. val = tuple(new_val)
  215. return new_f % val
  216. def currency(val, symbol=True, grouping=False, international=False):
  217. """Formats val according to the currency settings
  218. in the current locale."""
  219. conv = localeconv()
  220. # check for illegal values
  221. digits = conv[international and 'int_frac_digits' or 'frac_digits']
  222. if digits == 127:
  223. raise ValueError("Currency formatting is not possible using "
  224. "the 'C' locale.")
  225. s = _localize(f'{abs(val):.{digits}f}', grouping, monetary=True)
  226. # '<' and '>' are markers if the sign must be inserted between symbol and value
  227. s = '<' + s + '>'
  228. if symbol:
  229. smb = conv[international and 'int_curr_symbol' or 'currency_symbol']
  230. precedes = conv[val<0 and 'n_cs_precedes' or 'p_cs_precedes']
  231. separated = conv[val<0 and 'n_sep_by_space' or 'p_sep_by_space']
  232. if precedes:
  233. s = smb + (separated and ' ' or '') + s
  234. else:
  235. if international and smb[-1] == ' ':
  236. smb = smb[:-1]
  237. s = s + (separated and ' ' or '') + smb
  238. sign_pos = conv[val<0 and 'n_sign_posn' or 'p_sign_posn']
  239. sign = conv[val<0 and 'negative_sign' or 'positive_sign']
  240. if sign_pos == 0:
  241. s = '(' + s + ')'
  242. elif sign_pos == 1:
  243. s = sign + s
  244. elif sign_pos == 2:
  245. s = s + sign
  246. elif sign_pos == 3:
  247. s = s.replace('<', sign)
  248. elif sign_pos == 4:
  249. s = s.replace('>', sign)
  250. else:
  251. # the default if nothing specified;
  252. # this should be the most fitting sign position
  253. s = sign + s
  254. return s.replace('<', '').replace('>', '')
  255. def str(val):
  256. """Convert float to string, taking the locale into account."""
  257. return _format("%.12g", val)
  258. def delocalize(string):
  259. "Parses a string as a normalized number according to the locale settings."
  260. conv = localeconv()
  261. #First, get rid of the grouping
  262. ts = conv['thousands_sep']
  263. if ts:
  264. string = string.replace(ts, '')
  265. #next, replace the decimal point with a dot
  266. dd = conv['decimal_point']
  267. if dd:
  268. string = string.replace(dd, '.')
  269. return string
  270. def localize(string, grouping=False, monetary=False):
  271. """Parses a string as locale number according to the locale settings."""
  272. return _localize(string, grouping, monetary)
  273. def atof(string, func=float):
  274. "Parses a string as a float according to the locale settings."
  275. return func(delocalize(string))
  276. def atoi(string):
  277. "Converts a string to an integer according to the locale settings."
  278. return int(delocalize(string))
  279. def _test():
  280. setlocale(LC_ALL, "")
  281. #do grouping
  282. s1 = format_string("%d", 123456789,1)
  283. print(s1, "is", atoi(s1))
  284. #standard formatting
  285. s1 = str(3.14)
  286. print(s1, "is", atof(s1))
  287. ### Locale name aliasing engine
  288. # Author: Marc-Andre Lemburg, mal@lemburg.com
  289. # Various tweaks by Fredrik Lundh <fredrik@pythonware.com>
  290. # store away the low-level version of setlocale (it's
  291. # overridden below)
  292. _setlocale = setlocale
  293. def _replace_encoding(code, encoding):
  294. if '.' in code:
  295. langname = code[:code.index('.')]
  296. else:
  297. langname = code
  298. # Convert the encoding to a C lib compatible encoding string
  299. norm_encoding = encodings.normalize_encoding(encoding)
  300. #print('norm encoding: %r' % norm_encoding)
  301. norm_encoding = encodings.aliases.aliases.get(norm_encoding.lower(),
  302. norm_encoding)
  303. #print('aliased encoding: %r' % norm_encoding)
  304. encoding = norm_encoding
  305. norm_encoding = norm_encoding.lower()
  306. if norm_encoding in locale_encoding_alias:
  307. encoding = locale_encoding_alias[norm_encoding]
  308. else:
  309. norm_encoding = norm_encoding.replace('_', '')
  310. norm_encoding = norm_encoding.replace('-', '')
  311. if norm_encoding in locale_encoding_alias:
  312. encoding = locale_encoding_alias[norm_encoding]
  313. #print('found encoding %r' % encoding)
  314. return langname + '.' + encoding
  315. def _append_modifier(code, modifier):
  316. if modifier == 'euro':
  317. if '.' not in code:
  318. return code + '.ISO8859-15'
  319. _, _, encoding = code.partition('.')
  320. if encoding in ('ISO8859-15', 'UTF-8'):
  321. return code
  322. if encoding == 'ISO8859-1':
  323. return _replace_encoding(code, 'ISO8859-15')
  324. return code + '@' + modifier
  325. def normalize(localename):
  326. """ Returns a normalized locale code for the given locale
  327. name.
  328. The returned locale code is formatted for use with
  329. setlocale().
  330. If normalization fails, the original name is returned
  331. unchanged.
  332. If the given encoding is not known, the function defaults to
  333. the default encoding for the locale code just like setlocale()
  334. does.
  335. """
  336. # Normalize the locale name and extract the encoding and modifier
  337. code = localename.lower()
  338. if ':' in code:
  339. # ':' is sometimes used as encoding delimiter.
  340. code = code.replace(':', '.')
  341. if '@' in code:
  342. code, modifier = code.split('@', 1)
  343. else:
  344. modifier = ''
  345. if '.' in code:
  346. langname, encoding = code.split('.')[:2]
  347. else:
  348. langname = code
  349. encoding = ''
  350. # First lookup: fullname (possibly with encoding and modifier)
  351. lang_enc = langname
  352. if encoding:
  353. norm_encoding = encoding.replace('-', '')
  354. norm_encoding = norm_encoding.replace('_', '')
  355. lang_enc += '.' + norm_encoding
  356. lookup_name = lang_enc
  357. if modifier:
  358. lookup_name += '@' + modifier
  359. code = locale_alias.get(lookup_name, None)
  360. if code is not None:
  361. return code
  362. #print('first lookup failed')
  363. if modifier:
  364. # Second try: fullname without modifier (possibly with encoding)
  365. code = locale_alias.get(lang_enc, None)
  366. if code is not None:
  367. #print('lookup without modifier succeeded')
  368. if '@' not in code:
  369. return _append_modifier(code, modifier)
  370. if code.split('@', 1)[1].lower() == modifier:
  371. return code
  372. #print('second lookup failed')
  373. if encoding:
  374. # Third try: langname (without encoding, possibly with modifier)
  375. lookup_name = langname
  376. if modifier:
  377. lookup_name += '@' + modifier
  378. code = locale_alias.get(lookup_name, None)
  379. if code is not None:
  380. #print('lookup without encoding succeeded')
  381. if '@' not in code:
  382. return _replace_encoding(code, encoding)
  383. code, modifier = code.split('@', 1)
  384. return _replace_encoding(code, encoding) + '@' + modifier
  385. if modifier:
  386. # Fourth try: langname (without encoding and modifier)
  387. code = locale_alias.get(langname, None)
  388. if code is not None:
  389. #print('lookup without modifier and encoding succeeded')
  390. if '@' not in code:
  391. code = _replace_encoding(code, encoding)
  392. return _append_modifier(code, modifier)
  393. code, defmod = code.split('@', 1)
  394. if defmod.lower() == modifier:
  395. return _replace_encoding(code, encoding) + '@' + defmod
  396. return localename
  397. def _parse_localename(localename):
  398. """ Parses the locale code for localename and returns the
  399. result as tuple (language code, encoding).
  400. The localename is normalized and passed through the locale
  401. alias engine. A ValueError is raised in case the locale name
  402. cannot be parsed.
  403. The language code corresponds to RFC 1766. code and encoding
  404. can be None in case the values cannot be determined or are
  405. unknown to this implementation.
  406. """
  407. code = normalize(localename)
  408. if '@' in code:
  409. # Deal with locale modifiers
  410. code, modifier = code.split('@', 1)
  411. if modifier == 'euro' and '.' not in code:
  412. # Assume Latin-9 for @euro locales. This is bogus,
  413. # since some systems may use other encodings for these
  414. # locales. Also, we ignore other modifiers.
  415. return code, 'iso-8859-15'
  416. if '.' in code:
  417. return tuple(code.split('.')[:2])
  418. elif code == 'C':
  419. return None, None
  420. elif code == 'UTF-8':
  421. # On macOS "LC_CTYPE=UTF-8" is a valid locale setting
  422. # for getting UTF-8 handling for text.
  423. return None, 'UTF-8'
  424. raise ValueError('unknown locale: %s' % localename)
  425. def _build_localename(localetuple):
  426. """ Builds a locale code from the given tuple (language code,
  427. encoding).
  428. No aliasing or normalizing takes place.
  429. """
  430. try:
  431. language, encoding = localetuple
  432. if language is None:
  433. language = 'C'
  434. if encoding is None:
  435. return language
  436. else:
  437. return language + '.' + encoding
  438. except (TypeError, ValueError):
  439. raise TypeError('Locale must be None, a string, or an iterable of '
  440. 'two strings -- language code, encoding.') from None
  441. def getdefaultlocale(envvars=('LC_ALL', 'LC_CTYPE', 'LANG', 'LANGUAGE')):
  442. """ Tries to determine the default locale settings and returns
  443. them as tuple (language code, encoding).
  444. According to POSIX, a program which has not called
  445. setlocale(LC_ALL, "") runs using the portable 'C' locale.
  446. Calling setlocale(LC_ALL, "") lets it use the default locale as
  447. defined by the LANG variable. Since we don't want to interfere
  448. with the current locale setting we thus emulate the behavior
  449. in the way described above.
  450. To maintain compatibility with other platforms, not only the
  451. LANG variable is tested, but a list of variables given as
  452. envvars parameter. The first found to be defined will be
  453. used. envvars defaults to the search path used in GNU gettext;
  454. it must always contain the variable name 'LANG'.
  455. Except for the code 'C', the language code corresponds to RFC
  456. 1766. code and encoding can be None in case the values cannot
  457. be determined.
  458. """
  459. import warnings
  460. warnings._deprecated(
  461. "locale.getdefaultlocale",
  462. "{name!r} is deprecated and slated for removal in Python {remove}. "
  463. "Use setlocale(), getencoding() and getlocale() instead.",
  464. remove=(3, 15))
  465. return _getdefaultlocale(envvars)
  466. def _getdefaultlocale(envvars=('LC_ALL', 'LC_CTYPE', 'LANG', 'LANGUAGE')):
  467. try:
  468. # check if it's supported by the _locale module
  469. import _locale
  470. code, encoding = _locale._getdefaultlocale()
  471. except (ImportError, AttributeError):
  472. pass
  473. else:
  474. # make sure the code/encoding values are valid
  475. if sys.platform == "win32" and code and code[:2] == "0x":
  476. # map windows language identifier to language name
  477. code = windows_locale.get(int(code, 0))
  478. # ...add other platform-specific processing here, if
  479. # necessary...
  480. return code, encoding
  481. # fall back on POSIX behaviour
  482. import os
  483. lookup = os.environ.get
  484. for variable in envvars:
  485. localename = lookup(variable,None)
  486. if localename:
  487. if variable == 'LANGUAGE':
  488. localename = localename.split(':')[0]
  489. break
  490. else:
  491. localename = 'C'
  492. return _parse_localename(localename)
  493. def getlocale(category=LC_CTYPE):
  494. """ Returns the current setting for the given locale category as
  495. tuple (language code, encoding).
  496. category may be one of the LC_* value except LC_ALL. It
  497. defaults to LC_CTYPE.
  498. Except for the code 'C', the language code corresponds to RFC
  499. 1766. code and encoding can be None in case the values cannot
  500. be determined.
  501. """
  502. localename = _setlocale(category)
  503. if category == LC_ALL and ';' in localename:
  504. raise TypeError('category LC_ALL is not supported')
  505. return _parse_localename(localename)
  506. def setlocale(category, locale=None):
  507. """ Set the locale for the given category. The locale can be
  508. a string, an iterable of two strings (language code and encoding),
  509. or None.
  510. Iterables are converted to strings using the locale aliasing
  511. engine. Locale strings are passed directly to the C lib.
  512. category may be given as one of the LC_* values.
  513. """
  514. if locale and not isinstance(locale, _builtin_str):
  515. # convert to string
  516. locale = normalize(_build_localename(locale))
  517. return _setlocale(category, locale)
  518. def resetlocale(category=LC_ALL):
  519. """ Sets the locale for category to the default setting.
  520. The default setting is determined by calling
  521. getdefaultlocale(). category defaults to LC_ALL.
  522. """
  523. import warnings
  524. warnings.warn(
  525. 'Use locale.setlocale(locale.LC_ALL, "") instead',
  526. DeprecationWarning, stacklevel=2
  527. )
  528. with warnings.catch_warnings():
  529. warnings.simplefilter('ignore', category=DeprecationWarning)
  530. loc = getdefaultlocale()
  531. _setlocale(category, _build_localename(loc))
  532. try:
  533. from _locale import getencoding
  534. except ImportError:
  535. def getencoding():
  536. if hasattr(sys, 'getandroidapilevel'):
  537. # On Android langinfo.h and CODESET are missing, and UTF-8 is
  538. # always used in mbstowcs() and wcstombs().
  539. return 'utf-8'
  540. encoding = _getdefaultlocale()[1]
  541. if encoding is None:
  542. # LANG not set, default to UTF-8
  543. encoding = 'utf-8'
  544. return encoding
  545. try:
  546. CODESET
  547. except NameError:
  548. def getpreferredencoding(do_setlocale=True):
  549. """Return the charset that the user is likely using."""
  550. if sys.flags.warn_default_encoding:
  551. import warnings
  552. warnings.warn(
  553. "UTF-8 Mode affects locale.getpreferredencoding(). Consider locale.getencoding() instead.",
  554. EncodingWarning, 2)
  555. if sys.flags.utf8_mode:
  556. return 'utf-8'
  557. return getencoding()
  558. else:
  559. # On Unix, if CODESET is available, use that.
  560. def getpreferredencoding(do_setlocale=True):
  561. """Return the charset that the user is likely using,
  562. according to the system configuration."""
  563. if sys.flags.warn_default_encoding:
  564. import warnings
  565. warnings.warn(
  566. "UTF-8 Mode affects locale.getpreferredencoding(). Consider locale.getencoding() instead.",
  567. EncodingWarning, 2)
  568. if sys.flags.utf8_mode:
  569. return 'utf-8'
  570. if not do_setlocale:
  571. return getencoding()
  572. old_loc = setlocale(LC_CTYPE)
  573. try:
  574. try:
  575. setlocale(LC_CTYPE, "")
  576. except Error:
  577. pass
  578. return getencoding()
  579. finally:
  580. setlocale(LC_CTYPE, old_loc)
  581. ### Database
  582. #
  583. # The following data was extracted from the locale.alias file which
  584. # comes with X11 and then hand edited removing the explicit encoding
  585. # definitions and adding some more aliases. The file is usually
  586. # available as /usr/lib/X11/locale/locale.alias.
  587. #
  588. #
  589. # The local_encoding_alias table maps lowercase encoding alias names
  590. # to C locale encoding names (case-sensitive). Note that normalize()
  591. # first looks up the encoding in the encodings.aliases dictionary and
  592. # then applies this mapping to find the correct C lib name for the
  593. # encoding.
  594. #
  595. locale_encoding_alias = {
  596. # Mappings for non-standard encoding names used in locale names
  597. '437': 'C',
  598. 'c': 'C',
  599. 'en': 'ISO8859-1',
  600. 'jis': 'JIS7',
  601. 'jis7': 'JIS7',
  602. 'ajec': 'eucJP',
  603. 'koi8c': 'KOI8-C',
  604. 'microsoftcp1251': 'CP1251',
  605. 'microsoftcp1255': 'CP1255',
  606. 'microsoftcp1256': 'CP1256',
  607. '88591': 'ISO8859-1',
  608. '88592': 'ISO8859-2',
  609. '88595': 'ISO8859-5',
  610. '885915': 'ISO8859-15',
  611. # Mappings from Python codec names to C lib encoding names
  612. 'ascii': 'ISO8859-1',
  613. 'latin_1': 'ISO8859-1',
  614. 'iso8859_1': 'ISO8859-1',
  615. 'iso8859_10': 'ISO8859-10',
  616. 'iso8859_11': 'ISO8859-11',
  617. 'iso8859_13': 'ISO8859-13',
  618. 'iso8859_14': 'ISO8859-14',
  619. 'iso8859_15': 'ISO8859-15',
  620. 'iso8859_16': 'ISO8859-16',
  621. 'iso8859_2': 'ISO8859-2',
  622. 'iso8859_3': 'ISO8859-3',
  623. 'iso8859_4': 'ISO8859-4',
  624. 'iso8859_5': 'ISO8859-5',
  625. 'iso8859_6': 'ISO8859-6',
  626. 'iso8859_7': 'ISO8859-7',
  627. 'iso8859_8': 'ISO8859-8',
  628. 'iso8859_9': 'ISO8859-9',
  629. 'iso2022_jp': 'JIS7',
  630. 'shift_jis': 'SJIS',
  631. 'tactis': 'TACTIS',
  632. 'euc_jp': 'eucJP',
  633. 'euc_kr': 'eucKR',
  634. 'utf_8': 'UTF-8',
  635. 'koi8_r': 'KOI8-R',
  636. 'koi8_t': 'KOI8-T',
  637. 'koi8_u': 'KOI8-U',
  638. 'kz1048': 'RK1048',
  639. 'cp1251': 'CP1251',
  640. 'cp1255': 'CP1255',
  641. 'cp1256': 'CP1256',
  642. # XXX This list is still incomplete. If you know more
  643. # mappings, please file a bug report. Thanks.
  644. }
  645. for k, v in sorted(locale_encoding_alias.items()):
  646. k = k.replace('_', '')
  647. locale_encoding_alias.setdefault(k, v)
  648. del k, v
  649. #
  650. # The locale_alias table maps lowercase alias names to C locale names
  651. # (case-sensitive). Encodings are always separated from the locale
  652. # name using a dot ('.'); they should only be given in case the
  653. # language name is needed to interpret the given encoding alias
  654. # correctly (CJK codes often have this need).
  655. #
  656. # Note that the normalize() function which uses this tables
  657. # removes '_' and '-' characters from the encoding part of the
  658. # locale name before doing the lookup. This saves a lot of
  659. # space in the table.
  660. #
  661. # MAL 2004-12-10:
  662. # Updated alias mapping to most recent locale.alias file
  663. # from X.org distribution using makelocalealias.py.
  664. #
  665. # These are the differences compared to the old mapping (Python 2.4
  666. # and older):
  667. #
  668. # updated 'bg' -> 'bg_BG.ISO8859-5' to 'bg_BG.CP1251'
  669. # updated 'bg_bg' -> 'bg_BG.ISO8859-5' to 'bg_BG.CP1251'
  670. # updated 'bulgarian' -> 'bg_BG.ISO8859-5' to 'bg_BG.CP1251'
  671. # updated 'cz' -> 'cz_CZ.ISO8859-2' to 'cs_CZ.ISO8859-2'
  672. # updated 'cz_cz' -> 'cz_CZ.ISO8859-2' to 'cs_CZ.ISO8859-2'
  673. # updated 'czech' -> 'cs_CS.ISO8859-2' to 'cs_CZ.ISO8859-2'
  674. # updated 'dutch' -> 'nl_BE.ISO8859-1' to 'nl_NL.ISO8859-1'
  675. # updated 'et' -> 'et_EE.ISO8859-4' to 'et_EE.ISO8859-15'
  676. # updated 'et_ee' -> 'et_EE.ISO8859-4' to 'et_EE.ISO8859-15'
  677. # updated 'fi' -> 'fi_FI.ISO8859-1' to 'fi_FI.ISO8859-15'
  678. # updated 'fi_fi' -> 'fi_FI.ISO8859-1' to 'fi_FI.ISO8859-15'
  679. # updated 'iw' -> 'iw_IL.ISO8859-8' to 'he_IL.ISO8859-8'
  680. # updated 'iw_il' -> 'iw_IL.ISO8859-8' to 'he_IL.ISO8859-8'
  681. # updated 'japanese' -> 'ja_JP.SJIS' to 'ja_JP.eucJP'
  682. # updated 'lt' -> 'lt_LT.ISO8859-4' to 'lt_LT.ISO8859-13'
  683. # updated 'lv' -> 'lv_LV.ISO8859-4' to 'lv_LV.ISO8859-13'
  684. # updated 'sl' -> 'sl_CS.ISO8859-2' to 'sl_SI.ISO8859-2'
  685. # updated 'slovene' -> 'sl_CS.ISO8859-2' to 'sl_SI.ISO8859-2'
  686. # updated 'th_th' -> 'th_TH.TACTIS' to 'th_TH.ISO8859-11'
  687. # updated 'zh_cn' -> 'zh_CN.eucCN' to 'zh_CN.gb2312'
  688. # updated 'zh_cn.big5' -> 'zh_TW.eucTW' to 'zh_TW.big5'
  689. # updated 'zh_tw' -> 'zh_TW.eucTW' to 'zh_TW.big5'
  690. #
  691. # MAL 2008-05-30:
  692. # Updated alias mapping to most recent locale.alias file
  693. # from X.org distribution using makelocalealias.py.
  694. #
  695. # These are the differences compared to the old mapping (Python 2.5
  696. # and older):
  697. #
  698. # updated 'cs_cs.iso88592' -> 'cs_CZ.ISO8859-2' to 'cs_CS.ISO8859-2'
  699. # updated 'serbocroatian' -> 'sh_YU.ISO8859-2' to 'sr_CS.ISO8859-2'
  700. # updated 'sh' -> 'sh_YU.ISO8859-2' to 'sr_CS.ISO8859-2'
  701. # updated 'sh_hr.iso88592' -> 'sh_HR.ISO8859-2' to 'hr_HR.ISO8859-2'
  702. # updated 'sh_sp' -> 'sh_YU.ISO8859-2' to 'sr_CS.ISO8859-2'
  703. # updated 'sh_yu' -> 'sh_YU.ISO8859-2' to 'sr_CS.ISO8859-2'
  704. # updated 'sp' -> 'sp_YU.ISO8859-5' to 'sr_CS.ISO8859-5'
  705. # updated 'sp_yu' -> 'sp_YU.ISO8859-5' to 'sr_CS.ISO8859-5'
  706. # updated 'sr' -> 'sr_YU.ISO8859-5' to 'sr_CS.ISO8859-5'
  707. # updated 'sr@cyrillic' -> 'sr_YU.ISO8859-5' to 'sr_CS.ISO8859-5'
  708. # updated 'sr_sp' -> 'sr_SP.ISO8859-2' to 'sr_CS.ISO8859-2'
  709. # updated 'sr_yu' -> 'sr_YU.ISO8859-5' to 'sr_CS.ISO8859-5'
  710. # updated 'sr_yu.cp1251@cyrillic' -> 'sr_YU.CP1251' to 'sr_CS.CP1251'
  711. # updated 'sr_yu.iso88592' -> 'sr_YU.ISO8859-2' to 'sr_CS.ISO8859-2'
  712. # updated 'sr_yu.iso88595' -> 'sr_YU.ISO8859-5' to 'sr_CS.ISO8859-5'
  713. # updated 'sr_yu.iso88595@cyrillic' -> 'sr_YU.ISO8859-5' to 'sr_CS.ISO8859-5'
  714. # updated 'sr_yu.microsoftcp1251@cyrillic' -> 'sr_YU.CP1251' to 'sr_CS.CP1251'
  715. # updated 'sr_yu.utf8@cyrillic' -> 'sr_YU.UTF-8' to 'sr_CS.UTF-8'
  716. # updated 'sr_yu@cyrillic' -> 'sr_YU.ISO8859-5' to 'sr_CS.ISO8859-5'
  717. #
  718. # AP 2010-04-12:
  719. # Updated alias mapping to most recent locale.alias file
  720. # from X.org distribution using makelocalealias.py.
  721. #
  722. # These are the differences compared to the old mapping (Python 2.6.5
  723. # and older):
  724. #
  725. # updated 'ru' -> 'ru_RU.ISO8859-5' to 'ru_RU.UTF-8'
  726. # updated 'ru_ru' -> 'ru_RU.ISO8859-5' to 'ru_RU.UTF-8'
  727. # updated 'serbocroatian' -> 'sr_CS.ISO8859-2' to 'sr_RS.UTF-8@latin'
  728. # updated 'sh' -> 'sr_CS.ISO8859-2' to 'sr_RS.UTF-8@latin'
  729. # updated 'sh_yu' -> 'sr_CS.ISO8859-2' to 'sr_RS.UTF-8@latin'
  730. # updated 'sr' -> 'sr_CS.ISO8859-5' to 'sr_RS.UTF-8'
  731. # updated 'sr@cyrillic' -> 'sr_CS.ISO8859-5' to 'sr_RS.UTF-8'
  732. # updated 'sr@latn' -> 'sr_CS.ISO8859-2' to 'sr_RS.UTF-8@latin'
  733. # updated 'sr_cs.utf8@latn' -> 'sr_CS.UTF-8' to 'sr_RS.UTF-8@latin'
  734. # updated 'sr_cs@latn' -> 'sr_CS.ISO8859-2' to 'sr_RS.UTF-8@latin'
  735. # updated 'sr_yu' -> 'sr_CS.ISO8859-5' to 'sr_RS.UTF-8@latin'
  736. # updated 'sr_yu.utf8@cyrillic' -> 'sr_CS.UTF-8' to 'sr_RS.UTF-8'
  737. # updated 'sr_yu@cyrillic' -> 'sr_CS.ISO8859-5' to 'sr_RS.UTF-8'
  738. #
  739. # SS 2013-12-20:
  740. # Updated alias mapping to most recent locale.alias file
  741. # from X.org distribution using makelocalealias.py.
  742. #
  743. # These are the differences compared to the old mapping (Python 3.3.3
  744. # and older):
  745. #
  746. # updated 'a3' -> 'a3_AZ.KOI8-C' to 'az_AZ.KOI8-C'
  747. # updated 'a3_az' -> 'a3_AZ.KOI8-C' to 'az_AZ.KOI8-C'
  748. # updated 'a3_az.koi8c' -> 'a3_AZ.KOI8-C' to 'az_AZ.KOI8-C'
  749. # updated 'cs_cs.iso88592' -> 'cs_CS.ISO8859-2' to 'cs_CZ.ISO8859-2'
  750. # updated 'hebrew' -> 'iw_IL.ISO8859-8' to 'he_IL.ISO8859-8'
  751. # updated 'hebrew.iso88598' -> 'iw_IL.ISO8859-8' to 'he_IL.ISO8859-8'
  752. # updated 'sd' -> 'sd_IN@devanagari.UTF-8' to 'sd_IN.UTF-8'
  753. # updated 'sr@latn' -> 'sr_RS.UTF-8@latin' to 'sr_CS.UTF-8@latin'
  754. # updated 'sr_cs' -> 'sr_RS.UTF-8' to 'sr_CS.UTF-8'
  755. # updated 'sr_cs.utf8@latn' -> 'sr_RS.UTF-8@latin' to 'sr_CS.UTF-8@latin'
  756. # updated 'sr_cs@latn' -> 'sr_RS.UTF-8@latin' to 'sr_CS.UTF-8@latin'
  757. #
  758. # SS 2014-10-01:
  759. # Updated alias mapping with glibc 2.19 supported locales.
  760. #
  761. # SS 2018-05-05:
  762. # Updated alias mapping with glibc 2.27 supported locales.
  763. #
  764. # These are the differences compared to the old mapping (Python 3.6.5
  765. # and older):
  766. #
  767. # updated 'ca_es@valencia' -> 'ca_ES.ISO8859-15@valencia' to 'ca_ES.UTF-8@valencia'
  768. # updated 'kk_kz' -> 'kk_KZ.RK1048' to 'kk_KZ.ptcp154'
  769. # updated 'russian' -> 'ru_RU.ISO8859-5' to 'ru_RU.KOI8-R'
  770. locale_alias = {
  771. 'a3': 'az_AZ.KOI8-C',
  772. 'a3_az': 'az_AZ.KOI8-C',
  773. 'a3_az.koic': 'az_AZ.KOI8-C',
  774. 'aa_dj': 'aa_DJ.ISO8859-1',
  775. 'aa_er': 'aa_ER.UTF-8',
  776. 'aa_et': 'aa_ET.UTF-8',
  777. 'af': 'af_ZA.ISO8859-1',
  778. 'af_za': 'af_ZA.ISO8859-1',
  779. 'agr_pe': 'agr_PE.UTF-8',
  780. 'ak_gh': 'ak_GH.UTF-8',
  781. 'am': 'am_ET.UTF-8',
  782. 'am_et': 'am_ET.UTF-8',
  783. 'american': 'en_US.ISO8859-1',
  784. 'an_es': 'an_ES.ISO8859-15',
  785. 'anp_in': 'anp_IN.UTF-8',
  786. 'ar': 'ar_AA.ISO8859-6',
  787. 'ar_aa': 'ar_AA.ISO8859-6',
  788. 'ar_ae': 'ar_AE.ISO8859-6',
  789. 'ar_bh': 'ar_BH.ISO8859-6',
  790. 'ar_dz': 'ar_DZ.ISO8859-6',
  791. 'ar_eg': 'ar_EG.ISO8859-6',
  792. 'ar_in': 'ar_IN.UTF-8',
  793. 'ar_iq': 'ar_IQ.ISO8859-6',
  794. 'ar_jo': 'ar_JO.ISO8859-6',
  795. 'ar_kw': 'ar_KW.ISO8859-6',
  796. 'ar_lb': 'ar_LB.ISO8859-6',
  797. 'ar_ly': 'ar_LY.ISO8859-6',
  798. 'ar_ma': 'ar_MA.ISO8859-6',
  799. 'ar_om': 'ar_OM.ISO8859-6',
  800. 'ar_qa': 'ar_QA.ISO8859-6',
  801. 'ar_sa': 'ar_SA.ISO8859-6',
  802. 'ar_sd': 'ar_SD.ISO8859-6',
  803. 'ar_ss': 'ar_SS.UTF-8',
  804. 'ar_sy': 'ar_SY.ISO8859-6',
  805. 'ar_tn': 'ar_TN.ISO8859-6',
  806. 'ar_ye': 'ar_YE.ISO8859-6',
  807. 'arabic': 'ar_AA.ISO8859-6',
  808. 'as': 'as_IN.UTF-8',
  809. 'as_in': 'as_IN.UTF-8',
  810. 'ast_es': 'ast_ES.ISO8859-15',
  811. 'ayc_pe': 'ayc_PE.UTF-8',
  812. 'az': 'az_AZ.ISO8859-9E',
  813. 'az_az': 'az_AZ.ISO8859-9E',
  814. 'az_az.iso88599e': 'az_AZ.ISO8859-9E',
  815. 'az_ir': 'az_IR.UTF-8',
  816. 'be': 'be_BY.CP1251',
  817. 'be@latin': 'be_BY.UTF-8@latin',
  818. 'be_bg.utf8': 'bg_BG.UTF-8',
  819. 'be_by': 'be_BY.CP1251',
  820. 'be_by@latin': 'be_BY.UTF-8@latin',
  821. 'bem_zm': 'bem_ZM.UTF-8',
  822. 'ber_dz': 'ber_DZ.UTF-8',
  823. 'ber_ma': 'ber_MA.UTF-8',
  824. 'bg': 'bg_BG.CP1251',
  825. 'bg_bg': 'bg_BG.CP1251',
  826. 'bhb_in.utf8': 'bhb_IN.UTF-8',
  827. 'bho_in': 'bho_IN.UTF-8',
  828. 'bho_np': 'bho_NP.UTF-8',
  829. 'bi_vu': 'bi_VU.UTF-8',
  830. 'bn_bd': 'bn_BD.UTF-8',
  831. 'bn_in': 'bn_IN.UTF-8',
  832. 'bo_cn': 'bo_CN.UTF-8',
  833. 'bo_in': 'bo_IN.UTF-8',
  834. 'bokmal': 'nb_NO.ISO8859-1',
  835. 'bokm\xe5l': 'nb_NO.ISO8859-1',
  836. 'br': 'br_FR.ISO8859-1',
  837. 'br_fr': 'br_FR.ISO8859-1',
  838. 'brx_in': 'brx_IN.UTF-8',
  839. 'bs': 'bs_BA.ISO8859-2',
  840. 'bs_ba': 'bs_BA.ISO8859-2',
  841. 'bulgarian': 'bg_BG.CP1251',
  842. 'byn_er': 'byn_ER.UTF-8',
  843. 'c': 'C',
  844. 'c-french': 'fr_CA.ISO8859-1',
  845. 'c.ascii': 'C',
  846. 'c.en': 'C',
  847. 'c.iso88591': 'en_US.ISO8859-1',
  848. 'c.utf8': 'C.UTF-8',
  849. 'c_c': 'C',
  850. 'c_c.c': 'C',
  851. 'ca': 'ca_ES.ISO8859-1',
  852. 'ca_ad': 'ca_AD.ISO8859-1',
  853. 'ca_es': 'ca_ES.ISO8859-1',
  854. 'ca_es@valencia': 'ca_ES.UTF-8@valencia',
  855. 'ca_fr': 'ca_FR.ISO8859-1',
  856. 'ca_it': 'ca_IT.ISO8859-1',
  857. 'catalan': 'ca_ES.ISO8859-1',
  858. 'ce_ru': 'ce_RU.UTF-8',
  859. 'cextend': 'en_US.ISO8859-1',
  860. 'chinese-s': 'zh_CN.eucCN',
  861. 'chinese-t': 'zh_TW.eucTW',
  862. 'chr_us': 'chr_US.UTF-8',
  863. 'ckb_iq': 'ckb_IQ.UTF-8',
  864. 'cmn_tw': 'cmn_TW.UTF-8',
  865. 'crh_ua': 'crh_UA.UTF-8',
  866. 'croatian': 'hr_HR.ISO8859-2',
  867. 'cs': 'cs_CZ.ISO8859-2',
  868. 'cs_cs': 'cs_CZ.ISO8859-2',
  869. 'cs_cz': 'cs_CZ.ISO8859-2',
  870. 'csb_pl': 'csb_PL.UTF-8',
  871. 'cv_ru': 'cv_RU.UTF-8',
  872. 'cy': 'cy_GB.ISO8859-1',
  873. 'cy_gb': 'cy_GB.ISO8859-1',
  874. 'cz': 'cs_CZ.ISO8859-2',
  875. 'cz_cz': 'cs_CZ.ISO8859-2',
  876. 'czech': 'cs_CZ.ISO8859-2',
  877. 'da': 'da_DK.ISO8859-1',
  878. 'da_dk': 'da_DK.ISO8859-1',
  879. 'danish': 'da_DK.ISO8859-1',
  880. 'dansk': 'da_DK.ISO8859-1',
  881. 'de': 'de_DE.ISO8859-1',
  882. 'de_at': 'de_AT.ISO8859-1',
  883. 'de_be': 'de_BE.ISO8859-1',
  884. 'de_ch': 'de_CH.ISO8859-1',
  885. 'de_de': 'de_DE.ISO8859-1',
  886. 'de_it': 'de_IT.ISO8859-1',
  887. 'de_li.utf8': 'de_LI.UTF-8',
  888. 'de_lu': 'de_LU.ISO8859-1',
  889. 'deutsch': 'de_DE.ISO8859-1',
  890. 'doi_in': 'doi_IN.UTF-8',
  891. 'dutch': 'nl_NL.ISO8859-1',
  892. 'dutch.iso88591': 'nl_BE.ISO8859-1',
  893. 'dv_mv': 'dv_MV.UTF-8',
  894. 'dz_bt': 'dz_BT.UTF-8',
  895. 'ee': 'ee_EE.ISO8859-4',
  896. 'ee_ee': 'ee_EE.ISO8859-4',
  897. 'eesti': 'et_EE.ISO8859-1',
  898. 'el': 'el_GR.ISO8859-7',
  899. 'el_cy': 'el_CY.ISO8859-7',
  900. 'el_gr': 'el_GR.ISO8859-7',
  901. 'el_gr@euro': 'el_GR.ISO8859-15',
  902. 'en': 'en_US.ISO8859-1',
  903. 'en_ag': 'en_AG.UTF-8',
  904. 'en_au': 'en_AU.ISO8859-1',
  905. 'en_be': 'en_BE.ISO8859-1',
  906. 'en_bw': 'en_BW.ISO8859-1',
  907. 'en_ca': 'en_CA.ISO8859-1',
  908. 'en_dk': 'en_DK.ISO8859-1',
  909. 'en_dl.utf8': 'en_DL.UTF-8',
  910. 'en_gb': 'en_GB.ISO8859-1',
  911. 'en_hk': 'en_HK.ISO8859-1',
  912. 'en_ie': 'en_IE.ISO8859-1',
  913. 'en_il': 'en_IL.UTF-8',
  914. 'en_in': 'en_IN.ISO8859-1',
  915. 'en_ng': 'en_NG.UTF-8',
  916. 'en_nz': 'en_NZ.ISO8859-1',
  917. 'en_ph': 'en_PH.ISO8859-1',
  918. 'en_sc.utf8': 'en_SC.UTF-8',
  919. 'en_sg': 'en_SG.ISO8859-1',
  920. 'en_uk': 'en_GB.ISO8859-1',
  921. 'en_us': 'en_US.ISO8859-1',
  922. 'en_us@euro@euro': 'en_US.ISO8859-15',
  923. 'en_za': 'en_ZA.ISO8859-1',
  924. 'en_zm': 'en_ZM.UTF-8',
  925. 'en_zw': 'en_ZW.ISO8859-1',
  926. 'en_zw.utf8': 'en_ZS.UTF-8',
  927. 'eng_gb': 'en_GB.ISO8859-1',
  928. 'english': 'en_EN.ISO8859-1',
  929. 'english.iso88591': 'en_US.ISO8859-1',
  930. 'english_uk': 'en_GB.ISO8859-1',
  931. 'english_united-states': 'en_US.ISO8859-1',
  932. 'english_united-states.437': 'C',
  933. 'english_us': 'en_US.ISO8859-1',
  934. 'eo': 'eo_XX.ISO8859-3',
  935. 'eo.utf8': 'eo.UTF-8',
  936. 'eo_eo': 'eo_EO.ISO8859-3',
  937. 'eo_us.utf8': 'eo_US.UTF-8',
  938. 'eo_xx': 'eo_XX.ISO8859-3',
  939. 'es': 'es_ES.ISO8859-1',
  940. 'es_ar': 'es_AR.ISO8859-1',
  941. 'es_bo': 'es_BO.ISO8859-1',
  942. 'es_cl': 'es_CL.ISO8859-1',
  943. 'es_co': 'es_CO.ISO8859-1',
  944. 'es_cr': 'es_CR.ISO8859-1',
  945. 'es_cu': 'es_CU.UTF-8',
  946. 'es_do': 'es_DO.ISO8859-1',
  947. 'es_ec': 'es_EC.ISO8859-1',
  948. 'es_es': 'es_ES.ISO8859-1',
  949. 'es_gt': 'es_GT.ISO8859-1',
  950. 'es_hn': 'es_HN.ISO8859-1',
  951. 'es_mx': 'es_MX.ISO8859-1',
  952. 'es_ni': 'es_NI.ISO8859-1',
  953. 'es_pa': 'es_PA.ISO8859-1',
  954. 'es_pe': 'es_PE.ISO8859-1',
  955. 'es_pr': 'es_PR.ISO8859-1',
  956. 'es_py': 'es_PY.ISO8859-1',
  957. 'es_sv': 'es_SV.ISO8859-1',
  958. 'es_us': 'es_US.ISO8859-1',
  959. 'es_uy': 'es_UY.ISO8859-1',
  960. 'es_ve': 'es_VE.ISO8859-1',
  961. 'estonian': 'et_EE.ISO8859-1',
  962. 'et': 'et_EE.ISO8859-15',
  963. 'et_ee': 'et_EE.ISO8859-15',
  964. 'eu': 'eu_ES.ISO8859-1',
  965. 'eu_es': 'eu_ES.ISO8859-1',
  966. 'eu_fr': 'eu_FR.ISO8859-1',
  967. 'fa': 'fa_IR.UTF-8',
  968. 'fa_ir': 'fa_IR.UTF-8',
  969. 'fa_ir.isiri3342': 'fa_IR.ISIRI-3342',
  970. 'ff_sn': 'ff_SN.UTF-8',
  971. 'fi': 'fi_FI.ISO8859-15',
  972. 'fi_fi': 'fi_FI.ISO8859-15',
  973. 'fil_ph': 'fil_PH.UTF-8',
  974. 'finnish': 'fi_FI.ISO8859-1',
  975. 'fo': 'fo_FO.ISO8859-1',
  976. 'fo_fo': 'fo_FO.ISO8859-1',
  977. 'fr': 'fr_FR.ISO8859-1',
  978. 'fr_be': 'fr_BE.ISO8859-1',
  979. 'fr_ca': 'fr_CA.ISO8859-1',
  980. 'fr_ch': 'fr_CH.ISO8859-1',
  981. 'fr_fr': 'fr_FR.ISO8859-1',
  982. 'fr_lu': 'fr_LU.ISO8859-1',
  983. 'fran\xe7ais': 'fr_FR.ISO8859-1',
  984. 'fre_fr': 'fr_FR.ISO8859-1',
  985. 'french': 'fr_FR.ISO8859-1',
  986. 'french.iso88591': 'fr_CH.ISO8859-1',
  987. 'french_france': 'fr_FR.ISO8859-1',
  988. 'fur_it': 'fur_IT.UTF-8',
  989. 'fy_de': 'fy_DE.UTF-8',
  990. 'fy_nl': 'fy_NL.UTF-8',
  991. 'ga': 'ga_IE.ISO8859-1',
  992. 'ga_ie': 'ga_IE.ISO8859-1',
  993. 'galego': 'gl_ES.ISO8859-1',
  994. 'galician': 'gl_ES.ISO8859-1',
  995. 'gd': 'gd_GB.ISO8859-1',
  996. 'gd_gb': 'gd_GB.ISO8859-1',
  997. 'ger_de': 'de_DE.ISO8859-1',
  998. 'german': 'de_DE.ISO8859-1',
  999. 'german.iso88591': 'de_CH.ISO8859-1',
  1000. 'german_germany': 'de_DE.ISO8859-1',
  1001. 'gez_er': 'gez_ER.UTF-8',
  1002. 'gez_et': 'gez_ET.UTF-8',
  1003. 'gl': 'gl_ES.ISO8859-1',
  1004. 'gl_es': 'gl_ES.ISO8859-1',
  1005. 'greek': 'el_GR.ISO8859-7',
  1006. 'gu_in': 'gu_IN.UTF-8',
  1007. 'gv': 'gv_GB.ISO8859-1',
  1008. 'gv_gb': 'gv_GB.ISO8859-1',
  1009. 'ha_ng': 'ha_NG.UTF-8',
  1010. 'hak_tw': 'hak_TW.UTF-8',
  1011. 'he': 'he_IL.ISO8859-8',
  1012. 'he_il': 'he_IL.ISO8859-8',
  1013. 'hebrew': 'he_IL.ISO8859-8',
  1014. 'hi': 'hi_IN.ISCII-DEV',
  1015. 'hi_in': 'hi_IN.ISCII-DEV',
  1016. 'hi_in.isciidev': 'hi_IN.ISCII-DEV',
  1017. 'hif_fj': 'hif_FJ.UTF-8',
  1018. 'hne': 'hne_IN.UTF-8',
  1019. 'hne_in': 'hne_IN.UTF-8',
  1020. 'hr': 'hr_HR.ISO8859-2',
  1021. 'hr_hr': 'hr_HR.ISO8859-2',
  1022. 'hrvatski': 'hr_HR.ISO8859-2',
  1023. 'hsb_de': 'hsb_DE.ISO8859-2',
  1024. 'ht_ht': 'ht_HT.UTF-8',
  1025. 'hu': 'hu_HU.ISO8859-2',
  1026. 'hu_hu': 'hu_HU.ISO8859-2',
  1027. 'hungarian': 'hu_HU.ISO8859-2',
  1028. 'hy_am': 'hy_AM.UTF-8',
  1029. 'hy_am.armscii8': 'hy_AM.ARMSCII_8',
  1030. 'ia': 'ia.UTF-8',
  1031. 'ia_fr': 'ia_FR.UTF-8',
  1032. 'icelandic': 'is_IS.ISO8859-1',
  1033. 'id': 'id_ID.ISO8859-1',
  1034. 'id_id': 'id_ID.ISO8859-1',
  1035. 'ig_ng': 'ig_NG.UTF-8',
  1036. 'ik_ca': 'ik_CA.UTF-8',
  1037. 'in': 'id_ID.ISO8859-1',
  1038. 'in_id': 'id_ID.ISO8859-1',
  1039. 'is': 'is_IS.ISO8859-1',
  1040. 'is_is': 'is_IS.ISO8859-1',
  1041. 'iso-8859-1': 'en_US.ISO8859-1',
  1042. 'iso-8859-15': 'en_US.ISO8859-15',
  1043. 'iso8859-1': 'en_US.ISO8859-1',
  1044. 'iso8859-15': 'en_US.ISO8859-15',
  1045. 'iso_8859_1': 'en_US.ISO8859-1',
  1046. 'iso_8859_15': 'en_US.ISO8859-15',
  1047. 'it': 'it_IT.ISO8859-1',
  1048. 'it_ch': 'it_CH.ISO8859-1',
  1049. 'it_it': 'it_IT.ISO8859-1',
  1050. 'italian': 'it_IT.ISO8859-1',
  1051. 'iu': 'iu_CA.NUNACOM-8',
  1052. 'iu_ca': 'iu_CA.NUNACOM-8',
  1053. 'iu_ca.nunacom8': 'iu_CA.NUNACOM-8',
  1054. 'iw': 'he_IL.ISO8859-8',
  1055. 'iw_il': 'he_IL.ISO8859-8',
  1056. 'iw_il.utf8': 'iw_IL.UTF-8',
  1057. 'ja': 'ja_JP.eucJP',
  1058. 'ja_jp': 'ja_JP.eucJP',
  1059. 'ja_jp.euc': 'ja_JP.eucJP',
  1060. 'ja_jp.mscode': 'ja_JP.SJIS',
  1061. 'ja_jp.pck': 'ja_JP.SJIS',
  1062. 'japan': 'ja_JP.eucJP',
  1063. 'japanese': 'ja_JP.eucJP',
  1064. 'japanese-euc': 'ja_JP.eucJP',
  1065. 'japanese.euc': 'ja_JP.eucJP',
  1066. 'jp_jp': 'ja_JP.eucJP',
  1067. 'ka': 'ka_GE.GEORGIAN-ACADEMY',
  1068. 'ka_ge': 'ka_GE.GEORGIAN-ACADEMY',
  1069. 'ka_ge.georgianacademy': 'ka_GE.GEORGIAN-ACADEMY',
  1070. 'ka_ge.georgianps': 'ka_GE.GEORGIAN-PS',
  1071. 'ka_ge.georgianrs': 'ka_GE.GEORGIAN-ACADEMY',
  1072. 'kab_dz': 'kab_DZ.UTF-8',
  1073. 'kk_kz': 'kk_KZ.ptcp154',
  1074. 'kl': 'kl_GL.ISO8859-1',
  1075. 'kl_gl': 'kl_GL.ISO8859-1',
  1076. 'km_kh': 'km_KH.UTF-8',
  1077. 'kn': 'kn_IN.UTF-8',
  1078. 'kn_in': 'kn_IN.UTF-8',
  1079. 'ko': 'ko_KR.eucKR',
  1080. 'ko_kr': 'ko_KR.eucKR',
  1081. 'ko_kr.euc': 'ko_KR.eucKR',
  1082. 'kok_in': 'kok_IN.UTF-8',
  1083. 'korean': 'ko_KR.eucKR',
  1084. 'korean.euc': 'ko_KR.eucKR',
  1085. 'ks': 'ks_IN.UTF-8',
  1086. 'ks_in': 'ks_IN.UTF-8',
  1087. 'ks_in@devanagari.utf8': 'ks_IN.UTF-8@devanagari',
  1088. 'ku_tr': 'ku_TR.ISO8859-9',
  1089. 'kw': 'kw_GB.ISO8859-1',
  1090. 'kw_gb': 'kw_GB.ISO8859-1',
  1091. 'ky': 'ky_KG.UTF-8',
  1092. 'ky_kg': 'ky_KG.UTF-8',
  1093. 'lb_lu': 'lb_LU.UTF-8',
  1094. 'lg_ug': 'lg_UG.ISO8859-10',
  1095. 'li_be': 'li_BE.UTF-8',
  1096. 'li_nl': 'li_NL.UTF-8',
  1097. 'lij_it': 'lij_IT.UTF-8',
  1098. 'lithuanian': 'lt_LT.ISO8859-13',
  1099. 'ln_cd': 'ln_CD.UTF-8',
  1100. 'lo': 'lo_LA.MULELAO-1',
  1101. 'lo_la': 'lo_LA.MULELAO-1',
  1102. 'lo_la.cp1133': 'lo_LA.IBM-CP1133',
  1103. 'lo_la.ibmcp1133': 'lo_LA.IBM-CP1133',
  1104. 'lo_la.mulelao1': 'lo_LA.MULELAO-1',
  1105. 'lt': 'lt_LT.ISO8859-13',
  1106. 'lt_lt': 'lt_LT.ISO8859-13',
  1107. 'lv': 'lv_LV.ISO8859-13',
  1108. 'lv_lv': 'lv_LV.ISO8859-13',
  1109. 'lzh_tw': 'lzh_TW.UTF-8',
  1110. 'mag_in': 'mag_IN.UTF-8',
  1111. 'mai': 'mai_IN.UTF-8',
  1112. 'mai_in': 'mai_IN.UTF-8',
  1113. 'mai_np': 'mai_NP.UTF-8',
  1114. 'mfe_mu': 'mfe_MU.UTF-8',
  1115. 'mg_mg': 'mg_MG.ISO8859-15',
  1116. 'mhr_ru': 'mhr_RU.UTF-8',
  1117. 'mi': 'mi_NZ.ISO8859-1',
  1118. 'mi_nz': 'mi_NZ.ISO8859-1',
  1119. 'miq_ni': 'miq_NI.UTF-8',
  1120. 'mjw_in': 'mjw_IN.UTF-8',
  1121. 'mk': 'mk_MK.ISO8859-5',
  1122. 'mk_mk': 'mk_MK.ISO8859-5',
  1123. 'ml': 'ml_IN.UTF-8',
  1124. 'ml_in': 'ml_IN.UTF-8',
  1125. 'mn_mn': 'mn_MN.UTF-8',
  1126. 'mni_in': 'mni_IN.UTF-8',
  1127. 'mr': 'mr_IN.UTF-8',
  1128. 'mr_in': 'mr_IN.UTF-8',
  1129. 'ms': 'ms_MY.ISO8859-1',
  1130. 'ms_my': 'ms_MY.ISO8859-1',
  1131. 'mt': 'mt_MT.ISO8859-3',
  1132. 'mt_mt': 'mt_MT.ISO8859-3',
  1133. 'my_mm': 'my_MM.UTF-8',
  1134. 'nan_tw': 'nan_TW.UTF-8',
  1135. 'nb': 'nb_NO.ISO8859-1',
  1136. 'nb_no': 'nb_NO.ISO8859-1',
  1137. 'nds_de': 'nds_DE.UTF-8',
  1138. 'nds_nl': 'nds_NL.UTF-8',
  1139. 'ne_np': 'ne_NP.UTF-8',
  1140. 'nhn_mx': 'nhn_MX.UTF-8',
  1141. 'niu_nu': 'niu_NU.UTF-8',
  1142. 'niu_nz': 'niu_NZ.UTF-8',
  1143. 'nl': 'nl_NL.ISO8859-1',
  1144. 'nl_aw': 'nl_AW.UTF-8',
  1145. 'nl_be': 'nl_BE.ISO8859-1',
  1146. 'nl_nl': 'nl_NL.ISO8859-1',
  1147. 'nn': 'nn_NO.ISO8859-1',
  1148. 'nn_no': 'nn_NO.ISO8859-1',
  1149. 'no': 'no_NO.ISO8859-1',
  1150. 'no@nynorsk': 'ny_NO.ISO8859-1',
  1151. 'no_no': 'no_NO.ISO8859-1',
  1152. 'no_no.iso88591@bokmal': 'no_NO.ISO8859-1',
  1153. 'no_no.iso88591@nynorsk': 'no_NO.ISO8859-1',
  1154. 'norwegian': 'no_NO.ISO8859-1',
  1155. 'nr': 'nr_ZA.ISO8859-1',
  1156. 'nr_za': 'nr_ZA.ISO8859-1',
  1157. 'nso': 'nso_ZA.ISO8859-15',
  1158. 'nso_za': 'nso_ZA.ISO8859-15',
  1159. 'ny': 'ny_NO.ISO8859-1',
  1160. 'ny_no': 'ny_NO.ISO8859-1',
  1161. 'nynorsk': 'nn_NO.ISO8859-1',
  1162. 'oc': 'oc_FR.ISO8859-1',
  1163. 'oc_fr': 'oc_FR.ISO8859-1',
  1164. 'om_et': 'om_ET.UTF-8',
  1165. 'om_ke': 'om_KE.ISO8859-1',
  1166. 'or': 'or_IN.UTF-8',
  1167. 'or_in': 'or_IN.UTF-8',
  1168. 'os_ru': 'os_RU.UTF-8',
  1169. 'pa': 'pa_IN.UTF-8',
  1170. 'pa_in': 'pa_IN.UTF-8',
  1171. 'pa_pk': 'pa_PK.UTF-8',
  1172. 'pap_an': 'pap_AN.UTF-8',
  1173. 'pap_aw': 'pap_AW.UTF-8',
  1174. 'pap_cw': 'pap_CW.UTF-8',
  1175. 'pd': 'pd_US.ISO8859-1',
  1176. 'pd_de': 'pd_DE.ISO8859-1',
  1177. 'pd_us': 'pd_US.ISO8859-1',
  1178. 'ph': 'ph_PH.ISO8859-1',
  1179. 'ph_ph': 'ph_PH.ISO8859-1',
  1180. 'pl': 'pl_PL.ISO8859-2',
  1181. 'pl_pl': 'pl_PL.ISO8859-2',
  1182. 'polish': 'pl_PL.ISO8859-2',
  1183. 'portuguese': 'pt_PT.ISO8859-1',
  1184. 'portuguese_brazil': 'pt_BR.ISO8859-1',
  1185. 'posix': 'C',
  1186. 'posix-utf2': 'C',
  1187. 'pp': 'pp_AN.ISO8859-1',
  1188. 'pp_an': 'pp_AN.ISO8859-1',
  1189. 'ps_af': 'ps_AF.UTF-8',
  1190. 'pt': 'pt_PT.ISO8859-1',
  1191. 'pt_br': 'pt_BR.ISO8859-1',
  1192. 'pt_pt': 'pt_PT.ISO8859-1',
  1193. 'quz_pe': 'quz_PE.UTF-8',
  1194. 'raj_in': 'raj_IN.UTF-8',
  1195. 'ro': 'ro_RO.ISO8859-2',
  1196. 'ro_ro': 'ro_RO.ISO8859-2',
  1197. 'romanian': 'ro_RO.ISO8859-2',
  1198. 'ru': 'ru_RU.UTF-8',
  1199. 'ru_ru': 'ru_RU.UTF-8',
  1200. 'ru_ua': 'ru_UA.KOI8-U',
  1201. 'rumanian': 'ro_RO.ISO8859-2',
  1202. 'russian': 'ru_RU.KOI8-R',
  1203. 'rw': 'rw_RW.ISO8859-1',
  1204. 'rw_rw': 'rw_RW.ISO8859-1',
  1205. 'sa_in': 'sa_IN.UTF-8',
  1206. 'sat_in': 'sat_IN.UTF-8',
  1207. 'sc_it': 'sc_IT.UTF-8',
  1208. 'sd': 'sd_IN.UTF-8',
  1209. 'sd_in': 'sd_IN.UTF-8',
  1210. 'sd_in@devanagari.utf8': 'sd_IN.UTF-8@devanagari',
  1211. 'sd_pk': 'sd_PK.UTF-8',
  1212. 'se_no': 'se_NO.UTF-8',
  1213. 'serbocroatian': 'sr_RS.UTF-8@latin',
  1214. 'sgs_lt': 'sgs_LT.UTF-8',
  1215. 'sh': 'sr_RS.UTF-8@latin',
  1216. 'sh_ba.iso88592@bosnia': 'sr_CS.ISO8859-2',
  1217. 'sh_hr': 'sh_HR.ISO8859-2',
  1218. 'sh_hr.iso88592': 'hr_HR.ISO8859-2',
  1219. 'sh_sp': 'sr_CS.ISO8859-2',
  1220. 'sh_yu': 'sr_RS.UTF-8@latin',
  1221. 'shn_mm': 'shn_MM.UTF-8',
  1222. 'shs_ca': 'shs_CA.UTF-8',
  1223. 'si': 'si_LK.UTF-8',
  1224. 'si_lk': 'si_LK.UTF-8',
  1225. 'sid_et': 'sid_ET.UTF-8',
  1226. 'sinhala': 'si_LK.UTF-8',
  1227. 'sk': 'sk_SK.ISO8859-2',
  1228. 'sk_sk': 'sk_SK.ISO8859-2',
  1229. 'sl': 'sl_SI.ISO8859-2',
  1230. 'sl_cs': 'sl_CS.ISO8859-2',
  1231. 'sl_si': 'sl_SI.ISO8859-2',
  1232. 'slovak': 'sk_SK.ISO8859-2',
  1233. 'slovene': 'sl_SI.ISO8859-2',
  1234. 'slovenian': 'sl_SI.ISO8859-2',
  1235. 'sm_ws': 'sm_WS.UTF-8',
  1236. 'so_dj': 'so_DJ.ISO8859-1',
  1237. 'so_et': 'so_ET.UTF-8',
  1238. 'so_ke': 'so_KE.ISO8859-1',
  1239. 'so_so': 'so_SO.ISO8859-1',
  1240. 'sp': 'sr_CS.ISO8859-5',
  1241. 'sp_yu': 'sr_CS.ISO8859-5',
  1242. 'spanish': 'es_ES.ISO8859-1',
  1243. 'spanish_spain': 'es_ES.ISO8859-1',
  1244. 'sq': 'sq_AL.ISO8859-2',
  1245. 'sq_al': 'sq_AL.ISO8859-2',
  1246. 'sq_mk': 'sq_MK.UTF-8',
  1247. 'sr': 'sr_RS.UTF-8',
  1248. 'sr@cyrillic': 'sr_RS.UTF-8',
  1249. 'sr@latn': 'sr_CS.UTF-8@latin',
  1250. 'sr_cs': 'sr_CS.UTF-8',
  1251. 'sr_cs.iso88592@latn': 'sr_CS.ISO8859-2',
  1252. 'sr_cs@latn': 'sr_CS.UTF-8@latin',
  1253. 'sr_me': 'sr_ME.UTF-8',
  1254. 'sr_rs': 'sr_RS.UTF-8',
  1255. 'sr_rs@latn': 'sr_RS.UTF-8@latin',
  1256. 'sr_sp': 'sr_CS.ISO8859-2',
  1257. 'sr_yu': 'sr_RS.UTF-8@latin',
  1258. 'sr_yu.cp1251@cyrillic': 'sr_CS.CP1251',
  1259. 'sr_yu.iso88592': 'sr_CS.ISO8859-2',
  1260. 'sr_yu.iso88595': 'sr_CS.ISO8859-5',
  1261. 'sr_yu.iso88595@cyrillic': 'sr_CS.ISO8859-5',
  1262. 'sr_yu.microsoftcp1251@cyrillic': 'sr_CS.CP1251',
  1263. 'sr_yu.utf8': 'sr_RS.UTF-8',
  1264. 'sr_yu.utf8@cyrillic': 'sr_RS.UTF-8',
  1265. 'sr_yu@cyrillic': 'sr_RS.UTF-8',
  1266. 'ss': 'ss_ZA.ISO8859-1',
  1267. 'ss_za': 'ss_ZA.ISO8859-1',
  1268. 'st': 'st_ZA.ISO8859-1',
  1269. 'st_za': 'st_ZA.ISO8859-1',
  1270. 'sv': 'sv_SE.ISO8859-1',
  1271. 'sv_fi': 'sv_FI.ISO8859-1',
  1272. 'sv_se': 'sv_SE.ISO8859-1',
  1273. 'sw_ke': 'sw_KE.UTF-8',
  1274. 'sw_tz': 'sw_TZ.UTF-8',
  1275. 'swedish': 'sv_SE.ISO8859-1',
  1276. 'szl_pl': 'szl_PL.UTF-8',
  1277. 'ta': 'ta_IN.TSCII-0',
  1278. 'ta_in': 'ta_IN.TSCII-0',
  1279. 'ta_in.tscii': 'ta_IN.TSCII-0',
  1280. 'ta_in.tscii0': 'ta_IN.TSCII-0',
  1281. 'ta_lk': 'ta_LK.UTF-8',
  1282. 'tcy_in.utf8': 'tcy_IN.UTF-8',
  1283. 'te': 'te_IN.UTF-8',
  1284. 'te_in': 'te_IN.UTF-8',
  1285. 'tg': 'tg_TJ.KOI8-C',
  1286. 'tg_tj': 'tg_TJ.KOI8-C',
  1287. 'th': 'th_TH.ISO8859-11',
  1288. 'th_th': 'th_TH.ISO8859-11',
  1289. 'th_th.tactis': 'th_TH.TIS620',
  1290. 'th_th.tis620': 'th_TH.TIS620',
  1291. 'thai': 'th_TH.ISO8859-11',
  1292. 'the_np': 'the_NP.UTF-8',
  1293. 'ti_er': 'ti_ER.UTF-8',
  1294. 'ti_et': 'ti_ET.UTF-8',
  1295. 'tig_er': 'tig_ER.UTF-8',
  1296. 'tk_tm': 'tk_TM.UTF-8',
  1297. 'tl': 'tl_PH.ISO8859-1',
  1298. 'tl_ph': 'tl_PH.ISO8859-1',
  1299. 'tn': 'tn_ZA.ISO8859-15',
  1300. 'tn_za': 'tn_ZA.ISO8859-15',
  1301. 'to_to': 'to_TO.UTF-8',
  1302. 'tpi_pg': 'tpi_PG.UTF-8',
  1303. 'tr': 'tr_TR.ISO8859-9',
  1304. 'tr_cy': 'tr_CY.ISO8859-9',
  1305. 'tr_tr': 'tr_TR.ISO8859-9',
  1306. 'ts': 'ts_ZA.ISO8859-1',
  1307. 'ts_za': 'ts_ZA.ISO8859-1',
  1308. 'tt': 'tt_RU.TATAR-CYR',
  1309. 'tt_ru': 'tt_RU.TATAR-CYR',
  1310. 'tt_ru.tatarcyr': 'tt_RU.TATAR-CYR',
  1311. 'tt_ru@iqtelif': 'tt_RU.UTF-8@iqtelif',
  1312. 'turkish': 'tr_TR.ISO8859-9',
  1313. 'ug_cn': 'ug_CN.UTF-8',
  1314. 'uk': 'uk_UA.KOI8-U',
  1315. 'uk_ua': 'uk_UA.KOI8-U',
  1316. 'univ': 'en_US.utf',
  1317. 'universal': 'en_US.utf',
  1318. 'universal.utf8@ucs4': 'en_US.UTF-8',
  1319. 'unm_us': 'unm_US.UTF-8',
  1320. 'ur': 'ur_PK.CP1256',
  1321. 'ur_in': 'ur_IN.UTF-8',
  1322. 'ur_pk': 'ur_PK.CP1256',
  1323. 'uz': 'uz_UZ.UTF-8',
  1324. 'uz_uz': 'uz_UZ.UTF-8',
  1325. 'uz_uz@cyrillic': 'uz_UZ.UTF-8',
  1326. 've': 've_ZA.UTF-8',
  1327. 've_za': 've_ZA.UTF-8',
  1328. 'vi': 'vi_VN.TCVN',
  1329. 'vi_vn': 'vi_VN.TCVN',
  1330. 'vi_vn.tcvn': 'vi_VN.TCVN',
  1331. 'vi_vn.tcvn5712': 'vi_VN.TCVN',
  1332. 'vi_vn.viscii': 'vi_VN.VISCII',
  1333. 'vi_vn.viscii111': 'vi_VN.VISCII',
  1334. 'wa': 'wa_BE.ISO8859-1',
  1335. 'wa_be': 'wa_BE.ISO8859-1',
  1336. 'wae_ch': 'wae_CH.UTF-8',
  1337. 'wal_et': 'wal_ET.UTF-8',
  1338. 'wo_sn': 'wo_SN.UTF-8',
  1339. 'xh': 'xh_ZA.ISO8859-1',
  1340. 'xh_za': 'xh_ZA.ISO8859-1',
  1341. 'yi': 'yi_US.CP1255',
  1342. 'yi_us': 'yi_US.CP1255',
  1343. 'yo_ng': 'yo_NG.UTF-8',
  1344. 'yue_hk': 'yue_HK.UTF-8',
  1345. 'yuw_pg': 'yuw_PG.UTF-8',
  1346. 'zh': 'zh_CN.eucCN',
  1347. 'zh_cn': 'zh_CN.gb2312',
  1348. 'zh_cn.big5': 'zh_TW.big5',
  1349. 'zh_cn.euc': 'zh_CN.eucCN',
  1350. 'zh_hk': 'zh_HK.big5hkscs',
  1351. 'zh_hk.big5hk': 'zh_HK.big5hkscs',
  1352. 'zh_sg': 'zh_SG.GB2312',
  1353. 'zh_sg.gbk': 'zh_SG.GBK',
  1354. 'zh_tw': 'zh_TW.big5',
  1355. 'zh_tw.euc': 'zh_TW.eucTW',
  1356. 'zh_tw.euctw': 'zh_TW.eucTW',
  1357. 'zu': 'zu_ZA.ISO8859-1',
  1358. 'zu_za': 'zu_ZA.ISO8859-1',
  1359. }
  1360. #
  1361. # This maps Windows language identifiers to locale strings.
  1362. #
  1363. # This list has been updated from
  1364. # http://msdn.microsoft.com/library/default.asp?url=/library/en-us/intl/nls_238z.asp
  1365. # to include every locale up to Windows Vista.
  1366. #
  1367. # NOTE: this mapping is incomplete. If your language is missing, please
  1368. # submit a bug report as detailed in the Python devguide at:
  1369. # https://devguide.python.org/triage/issue-tracker/
  1370. # Make sure you include the missing language identifier and the suggested
  1371. # locale code.
  1372. #
  1373. windows_locale = {
  1374. 0x0436: "af_ZA", # Afrikaans
  1375. 0x041c: "sq_AL", # Albanian
  1376. 0x0484: "gsw_FR",# Alsatian - France
  1377. 0x045e: "am_ET", # Amharic - Ethiopia
  1378. 0x0401: "ar_SA", # Arabic - Saudi Arabia
  1379. 0x0801: "ar_IQ", # Arabic - Iraq
  1380. 0x0c01: "ar_EG", # Arabic - Egypt
  1381. 0x1001: "ar_LY", # Arabic - Libya
  1382. 0x1401: "ar_DZ", # Arabic - Algeria
  1383. 0x1801: "ar_MA", # Arabic - Morocco
  1384. 0x1c01: "ar_TN", # Arabic - Tunisia
  1385. 0x2001: "ar_OM", # Arabic - Oman
  1386. 0x2401: "ar_YE", # Arabic - Yemen
  1387. 0x2801: "ar_SY", # Arabic - Syria
  1388. 0x2c01: "ar_JO", # Arabic - Jordan
  1389. 0x3001: "ar_LB", # Arabic - Lebanon
  1390. 0x3401: "ar_KW", # Arabic - Kuwait
  1391. 0x3801: "ar_AE", # Arabic - United Arab Emirates
  1392. 0x3c01: "ar_BH", # Arabic - Bahrain
  1393. 0x4001: "ar_QA", # Arabic - Qatar
  1394. 0x042b: "hy_AM", # Armenian
  1395. 0x044d: "as_IN", # Assamese - India
  1396. 0x042c: "az_AZ", # Azeri - Latin
  1397. 0x082c: "az_AZ", # Azeri - Cyrillic
  1398. 0x046d: "ba_RU", # Bashkir
  1399. 0x042d: "eu_ES", # Basque - Russia
  1400. 0x0423: "be_BY", # Belarusian
  1401. 0x0445: "bn_IN", # Begali
  1402. 0x201a: "bs_BA", # Bosnian - Cyrillic
  1403. 0x141a: "bs_BA", # Bosnian - Latin
  1404. 0x047e: "br_FR", # Breton - France
  1405. 0x0402: "bg_BG", # Bulgarian
  1406. # 0x0455: "my_MM", # Burmese - Not supported
  1407. 0x0403: "ca_ES", # Catalan
  1408. 0x0004: "zh_CHS",# Chinese - Simplified
  1409. 0x0404: "zh_TW", # Chinese - Taiwan
  1410. 0x0804: "zh_CN", # Chinese - PRC
  1411. 0x0c04: "zh_HK", # Chinese - Hong Kong S.A.R.
  1412. 0x1004: "zh_SG", # Chinese - Singapore
  1413. 0x1404: "zh_MO", # Chinese - Macao S.A.R.
  1414. 0x7c04: "zh_CHT",# Chinese - Traditional
  1415. 0x0483: "co_FR", # Corsican - France
  1416. 0x041a: "hr_HR", # Croatian
  1417. 0x101a: "hr_BA", # Croatian - Bosnia
  1418. 0x0405: "cs_CZ", # Czech
  1419. 0x0406: "da_DK", # Danish
  1420. 0x048c: "gbz_AF",# Dari - Afghanistan
  1421. 0x0465: "div_MV",# Divehi - Maldives
  1422. 0x0413: "nl_NL", # Dutch - The Netherlands
  1423. 0x0813: "nl_BE", # Dutch - Belgium
  1424. 0x0409: "en_US", # English - United States
  1425. 0x0809: "en_GB", # English - United Kingdom
  1426. 0x0c09: "en_AU", # English - Australia
  1427. 0x1009: "en_CA", # English - Canada
  1428. 0x1409: "en_NZ", # English - New Zealand
  1429. 0x1809: "en_IE", # English - Ireland
  1430. 0x1c09: "en_ZA", # English - South Africa
  1431. 0x2009: "en_JA", # English - Jamaica
  1432. 0x2409: "en_CB", # English - Caribbean
  1433. 0x2809: "en_BZ", # English - Belize
  1434. 0x2c09: "en_TT", # English - Trinidad
  1435. 0x3009: "en_ZW", # English - Zimbabwe
  1436. 0x3409: "en_PH", # English - Philippines
  1437. 0x4009: "en_IN", # English - India
  1438. 0x4409: "en_MY", # English - Malaysia
  1439. 0x4809: "en_IN", # English - Singapore
  1440. 0x0425: "et_EE", # Estonian
  1441. 0x0438: "fo_FO", # Faroese
  1442. 0x0464: "fil_PH",# Filipino
  1443. 0x040b: "fi_FI", # Finnish
  1444. 0x040c: "fr_FR", # French - France
  1445. 0x080c: "fr_BE", # French - Belgium
  1446. 0x0c0c: "fr_CA", # French - Canada
  1447. 0x100c: "fr_CH", # French - Switzerland
  1448. 0x140c: "fr_LU", # French - Luxembourg
  1449. 0x180c: "fr_MC", # French - Monaco
  1450. 0x0462: "fy_NL", # Frisian - Netherlands
  1451. 0x0456: "gl_ES", # Galician
  1452. 0x0437: "ka_GE", # Georgian
  1453. 0x0407: "de_DE", # German - Germany
  1454. 0x0807: "de_CH", # German - Switzerland
  1455. 0x0c07: "de_AT", # German - Austria
  1456. 0x1007: "de_LU", # German - Luxembourg
  1457. 0x1407: "de_LI", # German - Liechtenstein
  1458. 0x0408: "el_GR", # Greek
  1459. 0x046f: "kl_GL", # Greenlandic - Greenland
  1460. 0x0447: "gu_IN", # Gujarati
  1461. 0x0468: "ha_NG", # Hausa - Latin
  1462. 0x040d: "he_IL", # Hebrew
  1463. 0x0439: "hi_IN", # Hindi
  1464. 0x040e: "hu_HU", # Hungarian
  1465. 0x040f: "is_IS", # Icelandic
  1466. 0x0421: "id_ID", # Indonesian
  1467. 0x045d: "iu_CA", # Inuktitut - Syllabics
  1468. 0x085d: "iu_CA", # Inuktitut - Latin
  1469. 0x083c: "ga_IE", # Irish - Ireland
  1470. 0x0410: "it_IT", # Italian - Italy
  1471. 0x0810: "it_CH", # Italian - Switzerland
  1472. 0x0411: "ja_JP", # Japanese
  1473. 0x044b: "kn_IN", # Kannada - India
  1474. 0x043f: "kk_KZ", # Kazakh
  1475. 0x0453: "kh_KH", # Khmer - Cambodia
  1476. 0x0486: "qut_GT",# K'iche - Guatemala
  1477. 0x0487: "rw_RW", # Kinyarwanda - Rwanda
  1478. 0x0457: "kok_IN",# Konkani
  1479. 0x0412: "ko_KR", # Korean
  1480. 0x0440: "ky_KG", # Kyrgyz
  1481. 0x0454: "lo_LA", # Lao - Lao PDR
  1482. 0x0426: "lv_LV", # Latvian
  1483. 0x0427: "lt_LT", # Lithuanian
  1484. 0x082e: "dsb_DE",# Lower Sorbian - Germany
  1485. 0x046e: "lb_LU", # Luxembourgish
  1486. 0x042f: "mk_MK", # FYROM Macedonian
  1487. 0x043e: "ms_MY", # Malay - Malaysia
  1488. 0x083e: "ms_BN", # Malay - Brunei Darussalam
  1489. 0x044c: "ml_IN", # Malayalam - India
  1490. 0x043a: "mt_MT", # Maltese
  1491. 0x0481: "mi_NZ", # Maori
  1492. 0x047a: "arn_CL",# Mapudungun
  1493. 0x044e: "mr_IN", # Marathi
  1494. 0x047c: "moh_CA",# Mohawk - Canada
  1495. 0x0450: "mn_MN", # Mongolian - Cyrillic
  1496. 0x0850: "mn_CN", # Mongolian - PRC
  1497. 0x0461: "ne_NP", # Nepali
  1498. 0x0414: "nb_NO", # Norwegian - Bokmal
  1499. 0x0814: "nn_NO", # Norwegian - Nynorsk
  1500. 0x0482: "oc_FR", # Occitan - France
  1501. 0x0448: "or_IN", # Oriya - India
  1502. 0x0463: "ps_AF", # Pashto - Afghanistan
  1503. 0x0429: "fa_IR", # Persian
  1504. 0x0415: "pl_PL", # Polish
  1505. 0x0416: "pt_BR", # Portuguese - Brazil
  1506. 0x0816: "pt_PT", # Portuguese - Portugal
  1507. 0x0446: "pa_IN", # Punjabi
  1508. 0x046b: "quz_BO",# Quechua (Bolivia)
  1509. 0x086b: "quz_EC",# Quechua (Ecuador)
  1510. 0x0c6b: "quz_PE",# Quechua (Peru)
  1511. 0x0418: "ro_RO", # Romanian - Romania
  1512. 0x0417: "rm_CH", # Romansh
  1513. 0x0419: "ru_RU", # Russian
  1514. 0x243b: "smn_FI",# Sami Finland
  1515. 0x103b: "smj_NO",# Sami Norway
  1516. 0x143b: "smj_SE",# Sami Sweden
  1517. 0x043b: "se_NO", # Sami Northern Norway
  1518. 0x083b: "se_SE", # Sami Northern Sweden
  1519. 0x0c3b: "se_FI", # Sami Northern Finland
  1520. 0x203b: "sms_FI",# Sami Skolt
  1521. 0x183b: "sma_NO",# Sami Southern Norway
  1522. 0x1c3b: "sma_SE",# Sami Southern Sweden
  1523. 0x044f: "sa_IN", # Sanskrit
  1524. 0x0c1a: "sr_SP", # Serbian - Cyrillic
  1525. 0x1c1a: "sr_BA", # Serbian - Bosnia Cyrillic
  1526. 0x081a: "sr_SP", # Serbian - Latin
  1527. 0x181a: "sr_BA", # Serbian - Bosnia Latin
  1528. 0x045b: "si_LK", # Sinhala - Sri Lanka
  1529. 0x046c: "ns_ZA", # Northern Sotho
  1530. 0x0432: "tn_ZA", # Setswana - Southern Africa
  1531. 0x041b: "sk_SK", # Slovak
  1532. 0x0424: "sl_SI", # Slovenian
  1533. 0x040a: "es_ES", # Spanish - Spain
  1534. 0x080a: "es_MX", # Spanish - Mexico
  1535. 0x0c0a: "es_ES", # Spanish - Spain (Modern)
  1536. 0x100a: "es_GT", # Spanish - Guatemala
  1537. 0x140a: "es_CR", # Spanish - Costa Rica
  1538. 0x180a: "es_PA", # Spanish - Panama
  1539. 0x1c0a: "es_DO", # Spanish - Dominican Republic
  1540. 0x200a: "es_VE", # Spanish - Venezuela
  1541. 0x240a: "es_CO", # Spanish - Colombia
  1542. 0x280a: "es_PE", # Spanish - Peru
  1543. 0x2c0a: "es_AR", # Spanish - Argentina
  1544. 0x300a: "es_EC", # Spanish - Ecuador
  1545. 0x340a: "es_CL", # Spanish - Chile
  1546. 0x380a: "es_UR", # Spanish - Uruguay
  1547. 0x3c0a: "es_PY", # Spanish - Paraguay
  1548. 0x400a: "es_BO", # Spanish - Bolivia
  1549. 0x440a: "es_SV", # Spanish - El Salvador
  1550. 0x480a: "es_HN", # Spanish - Honduras
  1551. 0x4c0a: "es_NI", # Spanish - Nicaragua
  1552. 0x500a: "es_PR", # Spanish - Puerto Rico
  1553. 0x540a: "es_US", # Spanish - United States
  1554. # 0x0430: "", # Sutu - Not supported
  1555. 0x0441: "sw_KE", # Swahili
  1556. 0x041d: "sv_SE", # Swedish - Sweden
  1557. 0x081d: "sv_FI", # Swedish - Finland
  1558. 0x045a: "syr_SY",# Syriac
  1559. 0x0428: "tg_TJ", # Tajik - Cyrillic
  1560. 0x085f: "tmz_DZ",# Tamazight - Latin
  1561. 0x0449: "ta_IN", # Tamil
  1562. 0x0444: "tt_RU", # Tatar
  1563. 0x044a: "te_IN", # Telugu
  1564. 0x041e: "th_TH", # Thai
  1565. 0x0851: "bo_BT", # Tibetan - Bhutan
  1566. 0x0451: "bo_CN", # Tibetan - PRC
  1567. 0x041f: "tr_TR", # Turkish
  1568. 0x0442: "tk_TM", # Turkmen - Cyrillic
  1569. 0x0480: "ug_CN", # Uighur - Arabic
  1570. 0x0422: "uk_UA", # Ukrainian
  1571. 0x042e: "wen_DE",# Upper Sorbian - Germany
  1572. 0x0420: "ur_PK", # Urdu
  1573. 0x0820: "ur_IN", # Urdu - India
  1574. 0x0443: "uz_UZ", # Uzbek - Latin
  1575. 0x0843: "uz_UZ", # Uzbek - Cyrillic
  1576. 0x042a: "vi_VN", # Vietnamese
  1577. 0x0452: "cy_GB", # Welsh
  1578. 0x0488: "wo_SN", # Wolof - Senegal
  1579. 0x0434: "xh_ZA", # Xhosa - South Africa
  1580. 0x0485: "sah_RU",# Yakut - Cyrillic
  1581. 0x0478: "ii_CN", # Yi - PRC
  1582. 0x046a: "yo_NG", # Yoruba - Nigeria
  1583. 0x0435: "zu_ZA", # Zulu
  1584. }
  1585. def _print_locale():
  1586. """ Test function.
  1587. """
  1588. categories = {}
  1589. def _init_categories(categories=categories):
  1590. for k,v in globals().items():
  1591. if k[:3] == 'LC_':
  1592. categories[k] = v
  1593. _init_categories()
  1594. del categories['LC_ALL']
  1595. print('Locale defaults as determined by getdefaultlocale():')
  1596. print('-'*72)
  1597. lang, enc = getdefaultlocale()
  1598. print('Language: ', lang or '(undefined)')
  1599. print('Encoding: ', enc or '(undefined)')
  1600. print()
  1601. print('Locale settings on startup:')
  1602. print('-'*72)
  1603. for name,category in categories.items():
  1604. print(name, '...')
  1605. lang, enc = getlocale(category)
  1606. print(' Language: ', lang or '(undefined)')
  1607. print(' Encoding: ', enc or '(undefined)')
  1608. print()
  1609. print()
  1610. print('Locale settings after calling resetlocale():')
  1611. print('-'*72)
  1612. resetlocale()
  1613. for name,category in categories.items():
  1614. print(name, '...')
  1615. lang, enc = getlocale(category)
  1616. print(' Language: ', lang or '(undefined)')
  1617. print(' Encoding: ', enc or '(undefined)')
  1618. print()
  1619. try:
  1620. setlocale(LC_ALL, "")
  1621. except:
  1622. print('NOTE:')
  1623. print('setlocale(LC_ALL, "") does not support the default locale')
  1624. print('given in the OS environment variables.')
  1625. else:
  1626. print()
  1627. print('Locale settings after calling setlocale(LC_ALL, ""):')
  1628. print('-'*72)
  1629. for name,category in categories.items():
  1630. print(name, '...')
  1631. lang, enc = getlocale(category)
  1632. print(' Language: ', lang or '(undefined)')
  1633. print(' Encoding: ', enc or '(undefined)')
  1634. print()
  1635. ###
  1636. try:
  1637. LC_MESSAGES
  1638. except NameError:
  1639. pass
  1640. else:
  1641. __all__.append("LC_MESSAGES")
  1642. if __name__=='__main__':
  1643. print('Locale aliasing:')
  1644. print()
  1645. _print_locale()
  1646. print()
  1647. print('Number formatting:')
  1648. print()
  1649. _test()