aliases.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551
  1. """ Encoding Aliases Support
  2. This module is used by the encodings package search function to
  3. map encodings names to module names.
  4. Note that the search function normalizes the encoding names before
  5. doing the lookup, so the mapping will have to map normalized
  6. encoding names to module names.
  7. Contents:
  8. The following aliases dictionary contains mappings of all IANA
  9. character set names for which the Python core library provides
  10. codecs. In addition to these, a few Python specific codec
  11. aliases have also been added.
  12. """
  13. aliases = {
  14. # Please keep this list sorted alphabetically by value !
  15. # ascii codec
  16. '646' : 'ascii',
  17. 'ansi_x3.4_1968' : 'ascii',
  18. 'ansi_x3_4_1968' : 'ascii', # some email headers use this non-standard name
  19. 'ansi_x3.4_1986' : 'ascii',
  20. 'cp367' : 'ascii',
  21. 'csascii' : 'ascii',
  22. 'ibm367' : 'ascii',
  23. 'iso646_us' : 'ascii',
  24. 'iso_646.irv_1991' : 'ascii',
  25. 'iso_ir_6' : 'ascii',
  26. 'us' : 'ascii',
  27. 'us_ascii' : 'ascii',
  28. # base64_codec codec
  29. 'base64' : 'base64_codec',
  30. 'base_64' : 'base64_codec',
  31. # big5 codec
  32. 'big5_tw' : 'big5',
  33. 'csbig5' : 'big5',
  34. # big5hkscs codec
  35. 'big5_hkscs' : 'big5hkscs',
  36. 'hkscs' : 'big5hkscs',
  37. # bz2_codec codec
  38. 'bz2' : 'bz2_codec',
  39. # cp037 codec
  40. '037' : 'cp037',
  41. 'csibm037' : 'cp037',
  42. 'ebcdic_cp_ca' : 'cp037',
  43. 'ebcdic_cp_nl' : 'cp037',
  44. 'ebcdic_cp_us' : 'cp037',
  45. 'ebcdic_cp_wt' : 'cp037',
  46. 'ibm037' : 'cp037',
  47. 'ibm039' : 'cp037',
  48. # cp1026 codec
  49. '1026' : 'cp1026',
  50. 'csibm1026' : 'cp1026',
  51. 'ibm1026' : 'cp1026',
  52. # cp1125 codec
  53. '1125' : 'cp1125',
  54. 'ibm1125' : 'cp1125',
  55. 'cp866u' : 'cp1125',
  56. 'ruscii' : 'cp1125',
  57. # cp1140 codec
  58. '1140' : 'cp1140',
  59. 'ibm1140' : 'cp1140',
  60. # cp1250 codec
  61. '1250' : 'cp1250',
  62. 'windows_1250' : 'cp1250',
  63. # cp1251 codec
  64. '1251' : 'cp1251',
  65. 'windows_1251' : 'cp1251',
  66. # cp1252 codec
  67. '1252' : 'cp1252',
  68. 'windows_1252' : 'cp1252',
  69. # cp1253 codec
  70. '1253' : 'cp1253',
  71. 'windows_1253' : 'cp1253',
  72. # cp1254 codec
  73. '1254' : 'cp1254',
  74. 'windows_1254' : 'cp1254',
  75. # cp1255 codec
  76. '1255' : 'cp1255',
  77. 'windows_1255' : 'cp1255',
  78. # cp1256 codec
  79. '1256' : 'cp1256',
  80. 'windows_1256' : 'cp1256',
  81. # cp1257 codec
  82. '1257' : 'cp1257',
  83. 'windows_1257' : 'cp1257',
  84. # cp1258 codec
  85. '1258' : 'cp1258',
  86. 'windows_1258' : 'cp1258',
  87. # cp273 codec
  88. '273' : 'cp273',
  89. 'ibm273' : 'cp273',
  90. 'csibm273' : 'cp273',
  91. # cp424 codec
  92. '424' : 'cp424',
  93. 'csibm424' : 'cp424',
  94. 'ebcdic_cp_he' : 'cp424',
  95. 'ibm424' : 'cp424',
  96. # cp437 codec
  97. '437' : 'cp437',
  98. 'cspc8codepage437' : 'cp437',
  99. 'ibm437' : 'cp437',
  100. # cp500 codec
  101. '500' : 'cp500',
  102. 'csibm500' : 'cp500',
  103. 'ebcdic_cp_be' : 'cp500',
  104. 'ebcdic_cp_ch' : 'cp500',
  105. 'ibm500' : 'cp500',
  106. # cp775 codec
  107. '775' : 'cp775',
  108. 'cspc775baltic' : 'cp775',
  109. 'ibm775' : 'cp775',
  110. # cp850 codec
  111. '850' : 'cp850',
  112. 'cspc850multilingual' : 'cp850',
  113. 'ibm850' : 'cp850',
  114. # cp852 codec
  115. '852' : 'cp852',
  116. 'cspcp852' : 'cp852',
  117. 'ibm852' : 'cp852',
  118. # cp855 codec
  119. '855' : 'cp855',
  120. 'csibm855' : 'cp855',
  121. 'ibm855' : 'cp855',
  122. # cp857 codec
  123. '857' : 'cp857',
  124. 'csibm857' : 'cp857',
  125. 'ibm857' : 'cp857',
  126. # cp858 codec
  127. '858' : 'cp858',
  128. 'csibm858' : 'cp858',
  129. 'ibm858' : 'cp858',
  130. # cp860 codec
  131. '860' : 'cp860',
  132. 'csibm860' : 'cp860',
  133. 'ibm860' : 'cp860',
  134. # cp861 codec
  135. '861' : 'cp861',
  136. 'cp_is' : 'cp861',
  137. 'csibm861' : 'cp861',
  138. 'ibm861' : 'cp861',
  139. # cp862 codec
  140. '862' : 'cp862',
  141. 'cspc862latinhebrew' : 'cp862',
  142. 'ibm862' : 'cp862',
  143. # cp863 codec
  144. '863' : 'cp863',
  145. 'csibm863' : 'cp863',
  146. 'ibm863' : 'cp863',
  147. # cp864 codec
  148. '864' : 'cp864',
  149. 'csibm864' : 'cp864',
  150. 'ibm864' : 'cp864',
  151. # cp865 codec
  152. '865' : 'cp865',
  153. 'csibm865' : 'cp865',
  154. 'ibm865' : 'cp865',
  155. # cp866 codec
  156. '866' : 'cp866',
  157. 'csibm866' : 'cp866',
  158. 'ibm866' : 'cp866',
  159. # cp869 codec
  160. '869' : 'cp869',
  161. 'cp_gr' : 'cp869',
  162. 'csibm869' : 'cp869',
  163. 'ibm869' : 'cp869',
  164. # cp932 codec
  165. '932' : 'cp932',
  166. 'ms932' : 'cp932',
  167. 'mskanji' : 'cp932',
  168. 'ms_kanji' : 'cp932',
  169. # cp949 codec
  170. '949' : 'cp949',
  171. 'ms949' : 'cp949',
  172. 'uhc' : 'cp949',
  173. # cp950 codec
  174. '950' : 'cp950',
  175. 'ms950' : 'cp950',
  176. # euc_jis_2004 codec
  177. 'jisx0213' : 'euc_jis_2004',
  178. 'eucjis2004' : 'euc_jis_2004',
  179. 'euc_jis2004' : 'euc_jis_2004',
  180. # euc_jisx0213 codec
  181. 'eucjisx0213' : 'euc_jisx0213',
  182. # euc_jp codec
  183. 'eucjp' : 'euc_jp',
  184. 'ujis' : 'euc_jp',
  185. 'u_jis' : 'euc_jp',
  186. # euc_kr codec
  187. 'euckr' : 'euc_kr',
  188. 'korean' : 'euc_kr',
  189. 'ksc5601' : 'euc_kr',
  190. 'ks_c_5601' : 'euc_kr',
  191. 'ks_c_5601_1987' : 'euc_kr',
  192. 'ksx1001' : 'euc_kr',
  193. 'ks_x_1001' : 'euc_kr',
  194. # gb18030 codec
  195. 'gb18030_2000' : 'gb18030',
  196. # gb2312 codec
  197. 'chinese' : 'gb2312',
  198. 'csiso58gb231280' : 'gb2312',
  199. 'euc_cn' : 'gb2312',
  200. 'euccn' : 'gb2312',
  201. 'eucgb2312_cn' : 'gb2312',
  202. 'gb2312_1980' : 'gb2312',
  203. 'gb2312_80' : 'gb2312',
  204. 'iso_ir_58' : 'gb2312',
  205. # gbk codec
  206. '936' : 'gbk',
  207. 'cp936' : 'gbk',
  208. 'ms936' : 'gbk',
  209. # hex_codec codec
  210. 'hex' : 'hex_codec',
  211. # hp_roman8 codec
  212. 'roman8' : 'hp_roman8',
  213. 'r8' : 'hp_roman8',
  214. 'csHPRoman8' : 'hp_roman8',
  215. 'cp1051' : 'hp_roman8',
  216. 'ibm1051' : 'hp_roman8',
  217. # hz codec
  218. 'hzgb' : 'hz',
  219. 'hz_gb' : 'hz',
  220. 'hz_gb_2312' : 'hz',
  221. # iso2022_jp codec
  222. 'csiso2022jp' : 'iso2022_jp',
  223. 'iso2022jp' : 'iso2022_jp',
  224. 'iso_2022_jp' : 'iso2022_jp',
  225. # iso2022_jp_1 codec
  226. 'iso2022jp_1' : 'iso2022_jp_1',
  227. 'iso_2022_jp_1' : 'iso2022_jp_1',
  228. # iso2022_jp_2 codec
  229. 'iso2022jp_2' : 'iso2022_jp_2',
  230. 'iso_2022_jp_2' : 'iso2022_jp_2',
  231. # iso2022_jp_2004 codec
  232. 'iso_2022_jp_2004' : 'iso2022_jp_2004',
  233. 'iso2022jp_2004' : 'iso2022_jp_2004',
  234. # iso2022_jp_3 codec
  235. 'iso2022jp_3' : 'iso2022_jp_3',
  236. 'iso_2022_jp_3' : 'iso2022_jp_3',
  237. # iso2022_jp_ext codec
  238. 'iso2022jp_ext' : 'iso2022_jp_ext',
  239. 'iso_2022_jp_ext' : 'iso2022_jp_ext',
  240. # iso2022_kr codec
  241. 'csiso2022kr' : 'iso2022_kr',
  242. 'iso2022kr' : 'iso2022_kr',
  243. 'iso_2022_kr' : 'iso2022_kr',
  244. # iso8859_10 codec
  245. 'csisolatin6' : 'iso8859_10',
  246. 'iso_8859_10' : 'iso8859_10',
  247. 'iso_8859_10_1992' : 'iso8859_10',
  248. 'iso_ir_157' : 'iso8859_10',
  249. 'l6' : 'iso8859_10',
  250. 'latin6' : 'iso8859_10',
  251. # iso8859_11 codec
  252. 'thai' : 'iso8859_11',
  253. 'iso_8859_11' : 'iso8859_11',
  254. 'iso_8859_11_2001' : 'iso8859_11',
  255. # iso8859_13 codec
  256. 'iso_8859_13' : 'iso8859_13',
  257. 'l7' : 'iso8859_13',
  258. 'latin7' : 'iso8859_13',
  259. # iso8859_14 codec
  260. 'iso_8859_14' : 'iso8859_14',
  261. 'iso_8859_14_1998' : 'iso8859_14',
  262. 'iso_celtic' : 'iso8859_14',
  263. 'iso_ir_199' : 'iso8859_14',
  264. 'l8' : 'iso8859_14',
  265. 'latin8' : 'iso8859_14',
  266. # iso8859_15 codec
  267. 'iso_8859_15' : 'iso8859_15',
  268. 'l9' : 'iso8859_15',
  269. 'latin9' : 'iso8859_15',
  270. # iso8859_16 codec
  271. 'iso_8859_16' : 'iso8859_16',
  272. 'iso_8859_16_2001' : 'iso8859_16',
  273. 'iso_ir_226' : 'iso8859_16',
  274. 'l10' : 'iso8859_16',
  275. 'latin10' : 'iso8859_16',
  276. # iso8859_2 codec
  277. 'csisolatin2' : 'iso8859_2',
  278. 'iso_8859_2' : 'iso8859_2',
  279. 'iso_8859_2_1987' : 'iso8859_2',
  280. 'iso_ir_101' : 'iso8859_2',
  281. 'l2' : 'iso8859_2',
  282. 'latin2' : 'iso8859_2',
  283. # iso8859_3 codec
  284. 'csisolatin3' : 'iso8859_3',
  285. 'iso_8859_3' : 'iso8859_3',
  286. 'iso_8859_3_1988' : 'iso8859_3',
  287. 'iso_ir_109' : 'iso8859_3',
  288. 'l3' : 'iso8859_3',
  289. 'latin3' : 'iso8859_3',
  290. # iso8859_4 codec
  291. 'csisolatin4' : 'iso8859_4',
  292. 'iso_8859_4' : 'iso8859_4',
  293. 'iso_8859_4_1988' : 'iso8859_4',
  294. 'iso_ir_110' : 'iso8859_4',
  295. 'l4' : 'iso8859_4',
  296. 'latin4' : 'iso8859_4',
  297. # iso8859_5 codec
  298. 'csisolatincyrillic' : 'iso8859_5',
  299. 'cyrillic' : 'iso8859_5',
  300. 'iso_8859_5' : 'iso8859_5',
  301. 'iso_8859_5_1988' : 'iso8859_5',
  302. 'iso_ir_144' : 'iso8859_5',
  303. # iso8859_6 codec
  304. 'arabic' : 'iso8859_6',
  305. 'asmo_708' : 'iso8859_6',
  306. 'csisolatinarabic' : 'iso8859_6',
  307. 'ecma_114' : 'iso8859_6',
  308. 'iso_8859_6' : 'iso8859_6',
  309. 'iso_8859_6_1987' : 'iso8859_6',
  310. 'iso_ir_127' : 'iso8859_6',
  311. # iso8859_7 codec
  312. 'csisolatingreek' : 'iso8859_7',
  313. 'ecma_118' : 'iso8859_7',
  314. 'elot_928' : 'iso8859_7',
  315. 'greek' : 'iso8859_7',
  316. 'greek8' : 'iso8859_7',
  317. 'iso_8859_7' : 'iso8859_7',
  318. 'iso_8859_7_1987' : 'iso8859_7',
  319. 'iso_ir_126' : 'iso8859_7',
  320. # iso8859_8 codec
  321. 'csisolatinhebrew' : 'iso8859_8',
  322. 'hebrew' : 'iso8859_8',
  323. 'iso_8859_8' : 'iso8859_8',
  324. 'iso_8859_8_1988' : 'iso8859_8',
  325. 'iso_ir_138' : 'iso8859_8',
  326. # iso8859_9 codec
  327. 'csisolatin5' : 'iso8859_9',
  328. 'iso_8859_9' : 'iso8859_9',
  329. 'iso_8859_9_1989' : 'iso8859_9',
  330. 'iso_ir_148' : 'iso8859_9',
  331. 'l5' : 'iso8859_9',
  332. 'latin5' : 'iso8859_9',
  333. # johab codec
  334. 'cp1361' : 'johab',
  335. 'ms1361' : 'johab',
  336. # koi8_r codec
  337. 'cskoi8r' : 'koi8_r',
  338. # kz1048 codec
  339. 'kz_1048' : 'kz1048',
  340. 'rk1048' : 'kz1048',
  341. 'strk1048_2002' : 'kz1048',
  342. # latin_1 codec
  343. #
  344. # Note that the latin_1 codec is implemented internally in C and a
  345. # lot faster than the charmap codec iso8859_1 which uses the same
  346. # encoding. This is why we discourage the use of the iso8859_1
  347. # codec and alias it to latin_1 instead.
  348. #
  349. '8859' : 'latin_1',
  350. 'cp819' : 'latin_1',
  351. 'csisolatin1' : 'latin_1',
  352. 'ibm819' : 'latin_1',
  353. 'iso8859' : 'latin_1',
  354. 'iso8859_1' : 'latin_1',
  355. 'iso_8859_1' : 'latin_1',
  356. 'iso_8859_1_1987' : 'latin_1',
  357. 'iso_ir_100' : 'latin_1',
  358. 'l1' : 'latin_1',
  359. 'latin' : 'latin_1',
  360. 'latin1' : 'latin_1',
  361. # mac_cyrillic codec
  362. 'maccyrillic' : 'mac_cyrillic',
  363. # mac_greek codec
  364. 'macgreek' : 'mac_greek',
  365. # mac_iceland codec
  366. 'maciceland' : 'mac_iceland',
  367. # mac_latin2 codec
  368. 'maccentraleurope' : 'mac_latin2',
  369. 'mac_centeuro' : 'mac_latin2',
  370. 'maclatin2' : 'mac_latin2',
  371. # mac_roman codec
  372. 'macintosh' : 'mac_roman',
  373. 'macroman' : 'mac_roman',
  374. # mac_turkish codec
  375. 'macturkish' : 'mac_turkish',
  376. # mbcs codec
  377. 'ansi' : 'mbcs',
  378. 'dbcs' : 'mbcs',
  379. # ptcp154 codec
  380. 'csptcp154' : 'ptcp154',
  381. 'pt154' : 'ptcp154',
  382. 'cp154' : 'ptcp154',
  383. 'cyrillic_asian' : 'ptcp154',
  384. # quopri_codec codec
  385. 'quopri' : 'quopri_codec',
  386. 'quoted_printable' : 'quopri_codec',
  387. 'quotedprintable' : 'quopri_codec',
  388. # rot_13 codec
  389. 'rot13' : 'rot_13',
  390. # shift_jis codec
  391. 'csshiftjis' : 'shift_jis',
  392. 'shiftjis' : 'shift_jis',
  393. 'sjis' : 'shift_jis',
  394. 's_jis' : 'shift_jis',
  395. # shift_jis_2004 codec
  396. 'shiftjis2004' : 'shift_jis_2004',
  397. 'sjis_2004' : 'shift_jis_2004',
  398. 's_jis_2004' : 'shift_jis_2004',
  399. # shift_jisx0213 codec
  400. 'shiftjisx0213' : 'shift_jisx0213',
  401. 'sjisx0213' : 'shift_jisx0213',
  402. 's_jisx0213' : 'shift_jisx0213',
  403. # tis_620 codec
  404. 'tis620' : 'tis_620',
  405. 'tis_620_0' : 'tis_620',
  406. 'tis_620_2529_0' : 'tis_620',
  407. 'tis_620_2529_1' : 'tis_620',
  408. 'iso_ir_166' : 'tis_620',
  409. # utf_16 codec
  410. 'u16' : 'utf_16',
  411. 'utf16' : 'utf_16',
  412. # utf_16_be codec
  413. 'unicodebigunmarked' : 'utf_16_be',
  414. 'utf_16be' : 'utf_16_be',
  415. # utf_16_le codec
  416. 'unicodelittleunmarked' : 'utf_16_le',
  417. 'utf_16le' : 'utf_16_le',
  418. # utf_32 codec
  419. 'u32' : 'utf_32',
  420. 'utf32' : 'utf_32',
  421. # utf_32_be codec
  422. 'utf_32be' : 'utf_32_be',
  423. # utf_32_le codec
  424. 'utf_32le' : 'utf_32_le',
  425. # utf_7 codec
  426. 'u7' : 'utf_7',
  427. 'utf7' : 'utf_7',
  428. 'unicode_1_1_utf_7' : 'utf_7',
  429. # utf_8 codec
  430. 'u8' : 'utf_8',
  431. 'utf' : 'utf_8',
  432. 'utf8' : 'utf_8',
  433. 'utf8_ucs2' : 'utf_8',
  434. 'utf8_ucs4' : 'utf_8',
  435. 'cp65001' : 'utf_8',
  436. # uu_codec codec
  437. 'uu' : 'uu_codec',
  438. # zlib_codec codec
  439. 'zip' : 'zlib_codec',
  440. 'zlib' : 'zlib_codec',
  441. # temporary mac CJK aliases, will be replaced by proper codecs in 3.1
  442. 'x_mac_japanese' : 'shift_jis',
  443. 'x_mac_korean' : 'euc_kr',
  444. 'x_mac_simp_chinese' : 'gb2312',
  445. 'x_mac_trad_chinese' : 'big5',
  446. }