test_idna.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275
  1. #!/usr/bin/env python
  2. import unittest
  3. import idna
  4. class IDNATests(unittest.TestCase):
  5. def setUp(self):
  6. self.tld_strings = [
  7. ['\u6d4b\u8bd5', b'xn--0zwm56d'],
  8. ['\u092a\u0930\u0940\u0915\u094d\u0937\u093e', b'xn--11b5bs3a9aj6g'],
  9. ['\ud55c\uad6d', b'xn--3e0b707e'],
  10. ['\u09ad\u09be\u09b0\u09a4', b'xn--45brj9c'],
  11. ['\u09ac\u09be\u0982\u09b2\u09be', b'xn--54b7fta0cc'],
  12. ['\u0438\u0441\u043f\u044b\u0442\u0430\u043d\u0438\u0435', b'xn--80akhbyknj4f'],
  13. ['\u0441\u0440\u0431', b'xn--90a3ac'],
  14. ['\ud14c\uc2a4\ud2b8', b'xn--9t4b11yi5a'],
  15. ['\u0b9a\u0bbf\u0b99\u0bcd\u0b95\u0baa\u0bcd\u0baa\u0bc2\u0bb0\u0bcd', b'xn--clchc0ea0b2g2a9gcd'],
  16. ['\u05d8\u05e2\u05e1\u05d8', b'xn--deba0ad'],
  17. ['\u4e2d\u56fd', b'xn--fiqs8s'],
  18. ['\u4e2d\u570b', b'xn--fiqz9s'],
  19. ['\u0c2d\u0c3e\u0c30\u0c24\u0c4d', b'xn--fpcrj9c3d'],
  20. ['\u0dbd\u0d82\u0d9a\u0dcf', b'xn--fzc2c9e2c'],
  21. ['\u6e2c\u8a66', b'xn--g6w251d'],
  22. ['\u0aad\u0abe\u0ab0\u0aa4', b'xn--gecrj9c'],
  23. ['\u092d\u093e\u0930\u0924', b'xn--h2brj9c'],
  24. ['\u0622\u0632\u0645\u0627\u06cc\u0634\u06cc', b'xn--hgbk6aj7f53bba'],
  25. ['\u0baa\u0bb0\u0bbf\u0b9f\u0bcd\u0b9a\u0bc8', b'xn--hlcj6aya9esc7a'],
  26. ['\u0443\u043a\u0440', b'xn--j1amh'],
  27. ['\u9999\u6e2f', b'xn--j6w193g'],
  28. ['\u03b4\u03bf\u03ba\u03b9\u03bc\u03ae', b'xn--jxalpdlp'],
  29. ['\u0625\u062e\u062a\u0628\u0627\u0631', b'xn--kgbechtv'],
  30. ['\u53f0\u6e7e', b'xn--kprw13d'],
  31. ['\u53f0\u7063', b'xn--kpry57d'],
  32. ['\u0627\u0644\u062c\u0632\u0627\u0626\u0631', b'xn--lgbbat1ad8j'],
  33. ['\u0639\u0645\u0627\u0646', b'xn--mgb9awbf'],
  34. ['\u0627\u06cc\u0631\u0627\u0646', b'xn--mgba3a4f16a'],
  35. ['\u0627\u0645\u0627\u0631\u0627\u062a', b'xn--mgbaam7a8h'],
  36. ['\u067e\u0627\u06a9\u0633\u062a\u0627\u0646', b'xn--mgbai9azgqp6j'],
  37. ['\u0627\u0644\u0627\u0631\u062f\u0646', b'xn--mgbayh7gpa'],
  38. ['\u0628\u06be\u0627\u0631\u062a', b'xn--mgbbh1a71e'],
  39. ['\u0627\u0644\u0645\u063a\u0631\u0628', b'xn--mgbc0a9azcg'],
  40. ['\u0627\u0644\u0633\u0639\u0648\u062f\u064a\u0629', b'xn--mgberp4a5d4ar'],
  41. ['\u10d2\u10d4', b'xn--node'],
  42. ['\u0e44\u0e17\u0e22', b'xn--o3cw4h'],
  43. ['\u0633\u0648\u0631\u064a\u0629', b'xn--ogbpf8fl'],
  44. ['\u0440\u0444', b'xn--p1ai'],
  45. ['\u062a\u0648\u0646\u0633', b'xn--pgbs0dh'],
  46. ['\u0a2d\u0a3e\u0a30\u0a24', b'xn--s9brj9c'],
  47. ['\u0645\u0635\u0631', b'xn--wgbh1c'],
  48. ['\u0642\u0637\u0631', b'xn--wgbl6a'],
  49. ['\u0b87\u0bb2\u0b99\u0bcd\u0b95\u0bc8', b'xn--xkc2al3hye2a'],
  50. ['\u0b87\u0ba8\u0bcd\u0ba4\u0bbf\u0baf\u0bbe', b'xn--xkc2dl3a5ee0h'],
  51. ['\u65b0\u52a0\u5761', b'xn--yfro4i67o'],
  52. ['\u0641\u0644\u0633\u0637\u064a\u0646', b'xn--ygbi2ammx'],
  53. ['\u30c6\u30b9\u30c8', b'xn--zckzah'],
  54. ['\u049b\u0430\u0437', b'xn--80ao21a'],
  55. ['\u0645\u0644\u064a\u0633\u064a\u0627', b'xn--mgbx4cd0ab'],
  56. ['\u043c\u043e\u043d', b'xn--l1acc'],
  57. ['\u0633\u0648\u062f\u0627\u0646', b'xn--mgbpl2fh'],
  58. ]
  59. def testIDNTLDALabels(self):
  60. for (ulabel, alabel) in self.tld_strings:
  61. self.assertEqual(alabel, idna.alabel(ulabel))
  62. def testIDNTLDULabels(self):
  63. for (ulabel, alabel) in self.tld_strings:
  64. self.assertEqual(ulabel, idna.ulabel(alabel))
  65. def test_valid_label_length(self):
  66. self.assertTrue(idna.valid_label_length('a' * 63))
  67. self.assertFalse(idna.valid_label_length('a' * 64))
  68. self.assertRaises(idna.IDNAError, idna.encode, 'a' * 64)
  69. def test_check_bidi(self):
  70. l = '\u0061'
  71. r = '\u05d0'
  72. al = '\u0627'
  73. an = '\u0660'
  74. en = '\u0030'
  75. es = '\u002d'
  76. cs = '\u002c'
  77. et = '\u0024'
  78. on = '\u0021'
  79. bn = '\u200c'
  80. nsm = '\u0610'
  81. ws = '\u0020'
  82. # RFC 5893 Rule 1
  83. self.assertTrue(idna.check_bidi(l))
  84. self.assertTrue(idna.check_bidi(r))
  85. self.assertTrue(idna.check_bidi(al))
  86. self.assertRaises(idna.IDNABidiError, idna.check_bidi, an)
  87. # RFC 5893 Rule 2
  88. self.assertTrue(idna.check_bidi(r + al))
  89. self.assertTrue(idna.check_bidi(r + al))
  90. self.assertTrue(idna.check_bidi(r + an))
  91. self.assertTrue(idna.check_bidi(r + en))
  92. self.assertTrue(idna.check_bidi(r + es + al))
  93. self.assertTrue(idna.check_bidi(r + cs + al))
  94. self.assertTrue(idna.check_bidi(r + et + al))
  95. self.assertTrue(idna.check_bidi(r + on + al))
  96. self.assertTrue(idna.check_bidi(r + bn + al))
  97. self.assertTrue(idna.check_bidi(r + nsm))
  98. self.assertRaises(idna.IDNABidiError, idna.check_bidi, r + l)
  99. self.assertRaises(idna.IDNABidiError, idna.check_bidi, r + ws)
  100. # RFC 5893 Rule 3
  101. self.assertTrue(idna.check_bidi(r + al))
  102. self.assertTrue(idna.check_bidi(r + en))
  103. self.assertTrue(idna.check_bidi(r + an))
  104. self.assertTrue(idna.check_bidi(r + nsm))
  105. self.assertTrue(idna.check_bidi(r + nsm + nsm))
  106. self.assertRaises(idna.IDNABidiError, idna.check_bidi, r + on)
  107. # RFC 5893 Rule 4
  108. self.assertTrue(idna.check_bidi(r + en))
  109. self.assertTrue(idna.check_bidi(r + an))
  110. self.assertRaises(idna.IDNABidiError, idna.check_bidi, r + en + an)
  111. self.assertRaises(idna.IDNABidiError, idna.check_bidi, r + an + en)
  112. # RFC 5893 Rule 5
  113. self.assertTrue(idna.check_bidi(l + en, check_ltr=True))
  114. self.assertTrue(idna.check_bidi(l + es + l, check_ltr=True))
  115. self.assertTrue(idna.check_bidi(l + cs + l, check_ltr=True))
  116. self.assertTrue(idna.check_bidi(l + et + l, check_ltr=True))
  117. self.assertTrue(idna.check_bidi(l + on + l, check_ltr=True))
  118. self.assertTrue(idna.check_bidi(l + bn + l, check_ltr=True))
  119. self.assertTrue(idna.check_bidi(l + nsm, check_ltr=True))
  120. # RFC 5893 Rule 6
  121. self.assertTrue(idna.check_bidi(l + l, check_ltr=True))
  122. self.assertTrue(idna.check_bidi(l + en, check_ltr=True))
  123. self.assertTrue(idna.check_bidi(l + en + nsm, check_ltr=True))
  124. self.assertTrue(idna.check_bidi(l + en + nsm + nsm, check_ltr=True))
  125. self.assertRaises(idna.IDNABidiError, idna.check_bidi, l + cs, check_ltr=True)
  126. def test_check_initial_combiner(self):
  127. m = '\u0300'
  128. a = '\u0061'
  129. self.assertTrue(idna.check_initial_combiner(a))
  130. self.assertTrue(idna.check_initial_combiner(a + m))
  131. self.assertRaises(idna.IDNAError, idna.check_initial_combiner, m + a)
  132. def test_check_hyphen_ok(self):
  133. self.assertTrue(idna.check_hyphen_ok('abc'))
  134. self.assertTrue(idna.check_hyphen_ok('a--b'))
  135. self.assertRaises(idna.IDNAError, idna.check_hyphen_ok, 'aa--')
  136. self.assertRaises(idna.IDNAError, idna.check_hyphen_ok, 'a-')
  137. self.assertRaises(idna.IDNAError, idna.check_hyphen_ok, '-a')
  138. def test_valid_contextj(self):
  139. zwnj = '\u200c'
  140. zwj = '\u200d'
  141. virama = '\u094d'
  142. latin = '\u0061'
  143. # RFC 5892 Appendix A.1 (Zero Width Non-Joiner)
  144. self.assertFalse(idna.valid_contextj(zwnj, 0))
  145. self.assertFalse(idna.valid_contextj(latin + zwnj, 1)) # No preceding Virama
  146. self.assertTrue(idna.valid_contextj(virama + zwnj, 1)) # Preceding Virama
  147. # RFC 5892 Appendix A.2 (Zero Width Joiner)
  148. self.assertFalse(idna.valid_contextj(zwj, 0))
  149. self.assertFalse(idna.valid_contextj(latin + zwj, 1)) # No preceding Virama
  150. self.assertTrue(idna.valid_contextj(virama + zwj, 1)) # Preceding Virama
  151. def test_valid_contexto(self):
  152. latin = '\u0061'
  153. latin_l = '\u006c'
  154. greek = '\u03b1'
  155. hebrew = '\u05d0'
  156. katakana = '\u30a1'
  157. hiragana = '\u3041'
  158. han = '\u6f22'
  159. arabic_digit = '\u0660'
  160. ext_arabic_digit = '\u06f0'
  161. # RFC 5892 Rule A.3 (Middle Dot)
  162. latin_middle_dot = '\u00b7'
  163. self.assertTrue(idna.valid_contexto(latin_l + latin_middle_dot + latin_l, 1))
  164. self.assertFalse(idna.valid_contexto(latin_middle_dot + latin_l, 1))
  165. self.assertFalse(idna.valid_contexto(latin_l + latin_middle_dot, 0))
  166. self.assertFalse(idna.valid_contexto(latin_middle_dot, 0))
  167. self.assertFalse(idna.valid_contexto(latin_l + latin_middle_dot + latin, 1))
  168. # RFC 5892 Rule A.4 (Greek Lower Numeral Sign)
  169. glns = '\u0375'
  170. self.assertTrue(idna.valid_contexto(glns + greek, 0))
  171. self.assertFalse(idna.valid_contexto(glns + latin, 0))
  172. self.assertFalse(idna.valid_contexto(glns, 0))
  173. self.assertFalse(idna.valid_contexto(greek + glns, 1))
  174. # RFC 5892 Rule A.5 (Hebrew Punctuation Geresh)
  175. geresh = '\u05f3'
  176. self.assertTrue(idna.valid_contexto(hebrew + geresh, 1))
  177. self.assertFalse(idna.valid_contexto(latin + geresh, 1))
  178. # RFC 5892 Rule A.6 (Hebrew Punctuation Gershayim)
  179. gershayim = '\u05f4'
  180. self.assertTrue(idna.valid_contexto(hebrew + gershayim, 1))
  181. self.assertFalse(idna.valid_contexto(latin + gershayim, 1))
  182. # RFC 5892 Rule A.7 (Katakana Middle Dot)
  183. ja_middle_dot = '\u30fb'
  184. self.assertTrue(idna.valid_contexto(katakana + ja_middle_dot + katakana, 1))
  185. self.assertTrue(idna.valid_contexto(hiragana + ja_middle_dot + hiragana, 1))
  186. self.assertTrue(idna.valid_contexto(han + ja_middle_dot + han, 1))
  187. self.assertTrue(idna.valid_contexto(han + ja_middle_dot + latin, 1))
  188. self.assertTrue(idna.valid_contexto('\u6f22\u30fb\u5b57', 1))
  189. self.assertFalse(idna.valid_contexto('\u0061\u30fb\u0061', 1))
  190. # RFC 5892 Rule A.8 (Arabic-Indic Digits)
  191. self.assertTrue(idna.valid_contexto(arabic_digit + arabic_digit, 0))
  192. self.assertFalse(idna.valid_contexto(arabic_digit + ext_arabic_digit, 0))
  193. # RFC 5892 Rule A.9 (Extended Arabic-Indic Digits)
  194. self.assertTrue(idna.valid_contexto(ext_arabic_digit + ext_arabic_digit, 0))
  195. self.assertFalse(idna.valid_contexto(ext_arabic_digit + arabic_digit, 0))
  196. def test_encode(self, encode=None, skip_bytes=False):
  197. if encode is None:
  198. encode = idna.encode
  199. self.assertEqual(encode('xn--zckzah.xn--zckzah'), b'xn--zckzah.xn--zckzah')
  200. self.assertEqual(encode('\u30c6\u30b9\u30c8.xn--zckzah'), b'xn--zckzah.xn--zckzah')
  201. self.assertEqual(encode('\u30c6\u30b9\u30c8.\u30c6\u30b9\u30c8'), b'xn--zckzah.xn--zckzah')
  202. self.assertEqual(encode('abc.abc'), b'abc.abc')
  203. self.assertEqual(encode('xn--zckzah.abc'), b'xn--zckzah.abc')
  204. self.assertEqual(encode('\u30c6\u30b9\u30c8.abc'), b'xn--zckzah.abc')
  205. self.assertEqual(encode('\u0521\u0525\u0523-\u0523\u0523-----\u0521\u0523\u0523\u0523.aa'),
  206. b'xn---------90gglbagaar.aa')
  207. if encode is idna.encode:
  208. self.assertRaises(idna.IDNAError, encode,
  209. '\u0521\u0524\u0523-\u0523\u0523-----\u0521\u0523\u0523\u0523.aa', uts46=False)
  210. self.assertEqual(encode('a'*63), b'a'*63)
  211. self.assertRaises(idna.IDNAError, encode, 'a'*64)
  212. self.assertRaises(idna.core.InvalidCodepoint, encode, '*')
  213. if not skip_bytes:
  214. self.assertRaises(idna.IDNAError, encode, b'\x0a\x33\x81')
  215. def test_decode(self, decode=None, skip_str=False):
  216. if decode is None:
  217. decode = idna.decode
  218. self.assertEqual(decode(b'xn--zckzah.xn--zckzah'), '\u30c6\u30b9\u30c8.\u30c6\u30b9\u30c8')
  219. self.assertEqual(decode(b'xn--d1acufc.xn--80akhbyknj4f'),
  220. '\u0434\u043e\u043c\u0435\u043d.\u0438\u0441\u043f\u044b\u0442\u0430\u043d\u0438\u0435')
  221. if not skip_str:
  222. self.assertEqual(decode('\u30c6\u30b9\u30c8.xn--zckzah'), '\u30c6\u30b9\u30c8.\u30c6\u30b9\u30c8')
  223. self.assertEqual(decode('\u30c6\u30b9\u30c8.\u30c6\u30b9\u30c8'),
  224. '\u30c6\u30b9\u30c8.\u30c6\u30b9\u30c8')
  225. self.assertEqual(decode('abc.abc'), 'abc.abc')
  226. self.assertEqual(decode(b'xn---------90gglbagaar.aa'),
  227. '\u0521\u0525\u0523-\u0523\u0523-----\u0521\u0523\u0523\u0523.aa')
  228. self.assertRaises(idna.IDNAError, decode, b'XN---------90GGLBAGAAC.AA')
  229. self.assertRaises(idna.IDNAError, decode, b'xn---------90gglbagaac.aa')
  230. self.assertRaises(idna.IDNAError, decode, b'xn--')
  231. self.assertRaises(idna.IDNAError, decode, b'\x8d\xd2')
  232. self.assertRaises(idna.IDNAError, decode, b'A.A.0.a.a.A.0.a.A.A.0.a.A.0A.2.a.A.A.0.a.A.0.A.a.A0.a.a.A.0.a.fB.A.A.a.A.A.B.A.A.a.A.A.B.A.A.a.A.A.0.a.A.a.a.A.A.0.a.A.0.A.a.A0.a.a.A.0.a.fB.A.A.a.A.A.B.0A.A.a.A.A.B.A.A.a.A.A.a.A.A.B.A.A.a.A.0.a.B.A.A.a.A.B.A.a.A.A.5.a.A.0.a.Ba.A.B.A.A.a.A.0.a.Xn--B.A.A.A.a')
  233. if __name__ == '__main__':
  234. unittest.main()