test_idna.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293
  1. #!/usr/bin/env python
  2. import unittest
  3. import idna
  4. class IDNATests(unittest.TestCase):
  5. def setUp(self):
  6. self.tld_strings = [
  7. ["\u6d4b\u8bd5", b"xn--0zwm56d"],
  8. ["\u092a\u0930\u0940\u0915\u094d\u0937\u093e", b"xn--11b5bs3a9aj6g"],
  9. ["\ud55c\uad6d", b"xn--3e0b707e"],
  10. ["\u09ad\u09be\u09b0\u09a4", b"xn--45brj9c"],
  11. ["\u09ac\u09be\u0982\u09b2\u09be", b"xn--54b7fta0cc"],
  12. [
  13. "\u0438\u0441\u043f\u044b\u0442\u0430\u043d\u0438\u0435",
  14. b"xn--80akhbyknj4f",
  15. ],
  16. ["\u0441\u0440\u0431", b"xn--90a3ac"],
  17. ["\ud14c\uc2a4\ud2b8", b"xn--9t4b11yi5a"],
  18. [
  19. "\u0b9a\u0bbf\u0b99\u0bcd\u0b95\u0baa\u0bcd\u0baa\u0bc2\u0bb0\u0bcd",
  20. b"xn--clchc0ea0b2g2a9gcd",
  21. ],
  22. ["\u05d8\u05e2\u05e1\u05d8", b"xn--deba0ad"],
  23. ["\u4e2d\u56fd", b"xn--fiqs8s"],
  24. ["\u4e2d\u570b", b"xn--fiqz9s"],
  25. ["\u0c2d\u0c3e\u0c30\u0c24\u0c4d", b"xn--fpcrj9c3d"],
  26. ["\u0dbd\u0d82\u0d9a\u0dcf", b"xn--fzc2c9e2c"],
  27. ["\u6e2c\u8a66", b"xn--g6w251d"],
  28. ["\u0aad\u0abe\u0ab0\u0aa4", b"xn--gecrj9c"],
  29. ["\u092d\u093e\u0930\u0924", b"xn--h2brj9c"],
  30. ["\u0622\u0632\u0645\u0627\u06cc\u0634\u06cc", b"xn--hgbk6aj7f53bba"],
  31. ["\u0baa\u0bb0\u0bbf\u0b9f\u0bcd\u0b9a\u0bc8", b"xn--hlcj6aya9esc7a"],
  32. ["\u0443\u043a\u0440", b"xn--j1amh"],
  33. ["\u9999\u6e2f", b"xn--j6w193g"],
  34. ["\u03b4\u03bf\u03ba\u03b9\u03bc\u03ae", b"xn--jxalpdlp"],
  35. ["\u0625\u062e\u062a\u0628\u0627\u0631", b"xn--kgbechtv"],
  36. ["\u53f0\u6e7e", b"xn--kprw13d"],
  37. ["\u53f0\u7063", b"xn--kpry57d"],
  38. ["\u0627\u0644\u062c\u0632\u0627\u0626\u0631", b"xn--lgbbat1ad8j"],
  39. ["\u0639\u0645\u0627\u0646", b"xn--mgb9awbf"],
  40. ["\u0627\u06cc\u0631\u0627\u0646", b"xn--mgba3a4f16a"],
  41. ["\u0627\u0645\u0627\u0631\u0627\u062a", b"xn--mgbaam7a8h"],
  42. ["\u067e\u0627\u06a9\u0633\u062a\u0627\u0646", b"xn--mgbai9azgqp6j"],
  43. ["\u0627\u0644\u0627\u0631\u062f\u0646", b"xn--mgbayh7gpa"],
  44. ["\u0628\u06be\u0627\u0631\u062a", b"xn--mgbbh1a71e"],
  45. ["\u0627\u0644\u0645\u063a\u0631\u0628", b"xn--mgbc0a9azcg"],
  46. ["\u0627\u0644\u0633\u0639\u0648\u062f\u064a\u0629", b"xn--mgberp4a5d4ar"],
  47. ["\u10d2\u10d4", b"xn--node"],
  48. ["\u0e44\u0e17\u0e22", b"xn--o3cw4h"],
  49. ["\u0633\u0648\u0631\u064a\u0629", b"xn--ogbpf8fl"],
  50. ["\u0440\u0444", b"xn--p1ai"],
  51. ["\u062a\u0648\u0646\u0633", b"xn--pgbs0dh"],
  52. ["\u0a2d\u0a3e\u0a30\u0a24", b"xn--s9brj9c"],
  53. ["\u0645\u0635\u0631", b"xn--wgbh1c"],
  54. ["\u0642\u0637\u0631", b"xn--wgbl6a"],
  55. ["\u0b87\u0bb2\u0b99\u0bcd\u0b95\u0bc8", b"xn--xkc2al3hye2a"],
  56. ["\u0b87\u0ba8\u0bcd\u0ba4\u0bbf\u0baf\u0bbe", b"xn--xkc2dl3a5ee0h"],
  57. ["\u65b0\u52a0\u5761", b"xn--yfro4i67o"],
  58. ["\u0641\u0644\u0633\u0637\u064a\u0646", b"xn--ygbi2ammx"],
  59. ["\u30c6\u30b9\u30c8", b"xn--zckzah"],
  60. ["\u049b\u0430\u0437", b"xn--80ao21a"],
  61. ["\u0645\u0644\u064a\u0633\u064a\u0627", b"xn--mgbx4cd0ab"],
  62. ["\u043c\u043e\u043d", b"xn--l1acc"],
  63. ["\u0633\u0648\u062f\u0627\u0646", b"xn--mgbpl2fh"],
  64. ]
  65. def testIDNTLDALabels(self):
  66. for ulabel, alabel in self.tld_strings:
  67. self.assertEqual(alabel, idna.alabel(ulabel))
  68. def testIDNTLDULabels(self):
  69. for ulabel, alabel in self.tld_strings:
  70. self.assertEqual(ulabel, idna.ulabel(alabel))
  71. def test_valid_label_length(self):
  72. self.assertTrue(idna.valid_label_length("a" * 63))
  73. self.assertFalse(idna.valid_label_length("a" * 64))
  74. self.assertRaises(idna.IDNAError, idna.encode, "a" * 64)
  75. def test_check_bidi(self):
  76. la = "\u0061"
  77. r = "\u05d0"
  78. al = "\u0627"
  79. an = "\u0660"
  80. en = "\u0030"
  81. es = "\u002d"
  82. cs = "\u002c"
  83. et = "\u0024"
  84. on = "\u0021"
  85. bn = "\u200c"
  86. nsm = "\u0610"
  87. ws = "\u0020"
  88. # RFC 5893 Rule 1
  89. self.assertTrue(idna.check_bidi(la))
  90. self.assertTrue(idna.check_bidi(r))
  91. self.assertTrue(idna.check_bidi(al))
  92. self.assertRaises(idna.IDNABidiError, idna.check_bidi, an)
  93. # RFC 5893 Rule 2
  94. self.assertTrue(idna.check_bidi(r + al))
  95. self.assertTrue(idna.check_bidi(r + al))
  96. self.assertTrue(idna.check_bidi(r + an))
  97. self.assertTrue(idna.check_bidi(r + en))
  98. self.assertTrue(idna.check_bidi(r + es + al))
  99. self.assertTrue(idna.check_bidi(r + cs + al))
  100. self.assertTrue(idna.check_bidi(r + et + al))
  101. self.assertTrue(idna.check_bidi(r + on + al))
  102. self.assertTrue(idna.check_bidi(r + bn + al))
  103. self.assertTrue(idna.check_bidi(r + nsm))
  104. self.assertRaises(idna.IDNABidiError, idna.check_bidi, r + la)
  105. self.assertRaises(idna.IDNABidiError, idna.check_bidi, r + ws)
  106. # RFC 5893 Rule 3
  107. self.assertTrue(idna.check_bidi(r + al))
  108. self.assertTrue(idna.check_bidi(r + en))
  109. self.assertTrue(idna.check_bidi(r + an))
  110. self.assertTrue(idna.check_bidi(r + nsm))
  111. self.assertTrue(idna.check_bidi(r + nsm + nsm))
  112. self.assertRaises(idna.IDNABidiError, idna.check_bidi, r + on)
  113. # RFC 5893 Rule 4
  114. self.assertTrue(idna.check_bidi(r + en))
  115. self.assertTrue(idna.check_bidi(r + an))
  116. self.assertRaises(idna.IDNABidiError, idna.check_bidi, r + en + an)
  117. self.assertRaises(idna.IDNABidiError, idna.check_bidi, r + an + en)
  118. # RFC 5893 Rule 5
  119. self.assertTrue(idna.check_bidi(la + en, check_ltr=True))
  120. self.assertTrue(idna.check_bidi(la + es + la, check_ltr=True))
  121. self.assertTrue(idna.check_bidi(la + cs + la, check_ltr=True))
  122. self.assertTrue(idna.check_bidi(la + et + la, check_ltr=True))
  123. self.assertTrue(idna.check_bidi(la + on + la, check_ltr=True))
  124. self.assertTrue(idna.check_bidi(la + bn + la, check_ltr=True))
  125. self.assertTrue(idna.check_bidi(la + nsm, check_ltr=True))
  126. # RFC 5893 Rule 6
  127. self.assertTrue(idna.check_bidi(la + la, check_ltr=True))
  128. self.assertTrue(idna.check_bidi(la + en, check_ltr=True))
  129. self.assertTrue(idna.check_bidi(la + en + nsm, check_ltr=True))
  130. self.assertTrue(idna.check_bidi(la + en + nsm + nsm, check_ltr=True))
  131. self.assertRaises(idna.IDNABidiError, idna.check_bidi, la + cs, check_ltr=True)
  132. def test_check_initial_combiner(self):
  133. m = "\u0300"
  134. a = "\u0061"
  135. self.assertTrue(idna.check_initial_combiner(a))
  136. self.assertTrue(idna.check_initial_combiner(a + m))
  137. self.assertRaises(idna.IDNAError, idna.check_initial_combiner, m + a)
  138. def test_check_hyphen_ok(self):
  139. self.assertTrue(idna.check_hyphen_ok("abc"))
  140. self.assertTrue(idna.check_hyphen_ok("a--b"))
  141. self.assertRaises(idna.IDNAError, idna.check_hyphen_ok, "aa--")
  142. self.assertRaises(idna.IDNAError, idna.check_hyphen_ok, "a-")
  143. self.assertRaises(idna.IDNAError, idna.check_hyphen_ok, "-a")
  144. def test_valid_contextj(self):
  145. zwnj = "\u200c"
  146. zwj = "\u200d"
  147. virama = "\u094d"
  148. latin = "\u0061"
  149. # RFC 5892 Appendix A.1 (Zero Width Non-Joiner)
  150. self.assertFalse(idna.valid_contextj(zwnj, 0))
  151. self.assertFalse(idna.valid_contextj(latin + zwnj, 1)) # No preceding Virama
  152. self.assertTrue(idna.valid_contextj(virama + zwnj, 1)) # Preceding Virama
  153. # RFC 5892 Appendix A.2 (Zero Width Joiner)
  154. self.assertFalse(idna.valid_contextj(zwj, 0))
  155. self.assertFalse(idna.valid_contextj(latin + zwj, 1)) # No preceding Virama
  156. self.assertTrue(idna.valid_contextj(virama + zwj, 1)) # Preceding Virama
  157. def test_valid_contexto(self):
  158. latin = "\u0061"
  159. latin_l = "\u006c"
  160. greek = "\u03b1"
  161. hebrew = "\u05d0"
  162. katakana = "\u30a1"
  163. hiragana = "\u3041"
  164. han = "\u6f22"
  165. arabic_digit = "\u0660"
  166. ext_arabic_digit = "\u06f0"
  167. # RFC 5892 Rule A.3 (Middle Dot)
  168. latin_middle_dot = "\u00b7"
  169. self.assertTrue(idna.valid_contexto(latin_l + latin_middle_dot + latin_l, 1))
  170. self.assertFalse(idna.valid_contexto(latin_middle_dot + latin_l, 1))
  171. self.assertFalse(idna.valid_contexto(latin_l + latin_middle_dot, 0))
  172. self.assertFalse(idna.valid_contexto(latin_middle_dot, 0))
  173. self.assertFalse(idna.valid_contexto(latin_l + latin_middle_dot + latin, 1))
  174. # RFC 5892 Rule A.4 (Greek Lower Numeral Sign)
  175. glns = "\u0375"
  176. self.assertTrue(idna.valid_contexto(glns + greek, 0))
  177. self.assertFalse(idna.valid_contexto(glns + latin, 0))
  178. self.assertFalse(idna.valid_contexto(glns, 0))
  179. self.assertFalse(idna.valid_contexto(greek + glns, 1))
  180. # RFC 5892 Rule A.5 (Hebrew Punctuation Geresh)
  181. geresh = "\u05f3"
  182. self.assertTrue(idna.valid_contexto(hebrew + geresh, 1))
  183. self.assertFalse(idna.valid_contexto(latin + geresh, 1))
  184. # RFC 5892 Rule A.6 (Hebrew Punctuation Gershayim)
  185. gershayim = "\u05f4"
  186. self.assertTrue(idna.valid_contexto(hebrew + gershayim, 1))
  187. self.assertFalse(idna.valid_contexto(latin + gershayim, 1))
  188. # RFC 5892 Rule A.7 (Katakana Middle Dot)
  189. ja_middle_dot = "\u30fb"
  190. self.assertTrue(idna.valid_contexto(katakana + ja_middle_dot + katakana, 1))
  191. self.assertTrue(idna.valid_contexto(hiragana + ja_middle_dot + hiragana, 1))
  192. self.assertTrue(idna.valid_contexto(han + ja_middle_dot + han, 1))
  193. self.assertTrue(idna.valid_contexto(han + ja_middle_dot + latin, 1))
  194. self.assertTrue(idna.valid_contexto("\u6f22\u30fb\u5b57", 1))
  195. self.assertFalse(idna.valid_contexto("\u0061\u30fb\u0061", 1))
  196. # RFC 5892 Rule A.8 (Arabic-Indic Digits)
  197. self.assertTrue(idna.valid_contexto(arabic_digit + arabic_digit, 0))
  198. self.assertFalse(idna.valid_contexto(arabic_digit + ext_arabic_digit, 0))
  199. # RFC 5892 Rule A.9 (Extended Arabic-Indic Digits)
  200. self.assertTrue(idna.valid_contexto(ext_arabic_digit + ext_arabic_digit, 0))
  201. self.assertFalse(idna.valid_contexto(ext_arabic_digit + arabic_digit, 0))
  202. def test_encode(self, encode=None, skip_bytes=False):
  203. if encode is None:
  204. encode = idna.encode
  205. self.assertEqual(encode("xn--zckzah.xn--zckzah"), b"xn--zckzah.xn--zckzah")
  206. self.assertEqual(encode("\u30c6\u30b9\u30c8.xn--zckzah"), b"xn--zckzah.xn--zckzah")
  207. self.assertEqual(encode("\u30c6\u30b9\u30c8.\u30c6\u30b9\u30c8"), b"xn--zckzah.xn--zckzah")
  208. self.assertEqual(encode("abc.abc"), b"abc.abc")
  209. self.assertEqual(encode("xn--zckzah.abc"), b"xn--zckzah.abc")
  210. self.assertEqual(encode("\u30c6\u30b9\u30c8.abc"), b"xn--zckzah.abc")
  211. self.assertEqual(
  212. encode("\u0521\u0525\u0523-\u0523\u0523-----\u0521\u0523\u0523\u0523.aa"),
  213. b"xn---------90gglbagaar.aa",
  214. )
  215. if encode is idna.encode:
  216. self.assertRaises(
  217. idna.IDNAError,
  218. encode,
  219. "\u0521\u0524\u0523-\u0523\u0523-----\u0521\u0523\u0523\u0523.aa",
  220. uts46=False,
  221. )
  222. self.assertEqual(encode("a" * 63), b"a" * 63)
  223. self.assertRaises(idna.IDNAError, encode, "a" * 64)
  224. self.assertRaises(idna.core.InvalidCodepoint, encode, "*")
  225. if not skip_bytes:
  226. self.assertRaises(idna.IDNAError, encode, b"\x0a\x33\x81")
  227. def test_decode(self, decode=None, skip_str=False):
  228. if decode is None:
  229. decode = idna.decode
  230. self.assertEqual(decode(b"xn--zckzah.xn--zckzah"), "\u30c6\u30b9\u30c8.\u30c6\u30b9\u30c8")
  231. self.assertEqual(
  232. decode(b"xn--d1acufc.xn--80akhbyknj4f"),
  233. "\u0434\u043e\u043c\u0435\u043d.\u0438\u0441\u043f\u044b\u0442\u0430\u043d\u0438\u0435",
  234. )
  235. if not skip_str:
  236. self.assertEqual(
  237. decode("\u30c6\u30b9\u30c8.xn--zckzah"),
  238. "\u30c6\u30b9\u30c8.\u30c6\u30b9\u30c8",
  239. )
  240. self.assertEqual(
  241. decode("\u30c6\u30b9\u30c8.\u30c6\u30b9\u30c8"),
  242. "\u30c6\u30b9\u30c8.\u30c6\u30b9\u30c8",
  243. )
  244. self.assertEqual(decode("abc.abc"), "abc.abc")
  245. self.assertEqual(
  246. decode(b"xn---------90gglbagaar.aa"),
  247. "\u0521\u0525\u0523-\u0523\u0523-----\u0521\u0523\u0523\u0523.aa",
  248. )
  249. self.assertRaises(idna.IDNAError, decode, b"XN---------90GGLBAGAAC.AA")
  250. self.assertRaises(idna.IDNAError, decode, b"xn---------90gglbagaac.aa")
  251. self.assertRaises(idna.IDNAError, decode, b"xn--")
  252. self.assertRaises(idna.IDNAError, decode, b"\x8d\xd2")
  253. self.assertRaises(
  254. idna.IDNAError,
  255. decode,
  256. b"A.A.0.a.a.A.0.a.A.A.0.a.A.0A.2.a.A.A.0.a.A.0.A.a.A0.a.a.A.0.a.fB.A.A.a.A.A.B.A.A.a.A.A.B.A.A.a.A.A.0.a.A.a.a.A.A.0.a.A.0.A.a.A0.a.a.A.0.a.fB.A.A.a.A.A.B.0A.A.a.A.A.B.A.A.a.A.A.a.A.A.B.A.A.a.A.0.a.B.A.A.a.A.B.A.a.A.A.5.a.A.0.a.Ba.A.B.A.A.a.A.0.a.Xn--B.A.A.A.a",
  257. )
  258. self.assertRaises(idna.IDNAError, decode, b"xn--ukba655qaaaa14431eeaaba.c")
  259. if __name__ == "__main__":
  260. unittest.main()