test_idna_uts46.py 8.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190
  1. """Tests for TR46 code."""
  2. import os.path
  3. import re
  4. import unittest
  5. import idna
  6. _RE_UNICODE = re.compile("\\\\u([0-9a-fA-F]{4})")
  7. _RE_SURROGATE = re.compile("[\uD800-\uDBFF][\uDC00-\uDFFF]")
  8. _SKIP_TESTS = [
  9. # These are strings that are illegal in IDNA 2008. Older versions of the UTS-46 test suite
  10. # had these denoted with the 'NV8' marker but this has been removed, so we need to manually
  11. # review exceptions and add them here to skip them as text vectors if they are invalid.
  12. '\U000102F7\u3002\u200D',
  13. '\U0001D7F5\u9681\u2BEE\uFF0E\u180D\u200C',
  14. '9\u9681\u2BEE.\u180D\u200C',
  15. '\u00DF\u200C\uAAF6\u18A5.\u22B6\u2D21\u2D16',
  16. 'ss\u200C\uAAF6\u18A5.\u22B6\u2D21\u2D16',
  17. '\u00DF\u200C\uAAF6\u18A5\uFF0E\u22B6\u2D21\u2D16',
  18. 'ss\u200C\uAAF6\u18A5\uFF0E\u22B6\u2D21\u2D16',
  19. '\U00010A57\u200D\u3002\u2D09\u2D15',
  20. '\U00010A57\u200D\uFF61\u2D09\u2D15',
  21. '\U0001D7CF\U0001DA19\u2E16.\u200D',
  22. '1\U0001DA19\u2E16.\u200D',
  23. '\U0001D7E04\U000E01D7\U0001D23B\uFF0E\u200D\U000102F5\u26E7\u200D',
  24. '84\U000E01D7\U0001D23B.\u200D\U000102F5\u26E7\u200D',
  25. '\u00A1', 'xn--7a', '\u19DA', 'xn--pkf', '\u2615', 'xn--53h',
  26. '\U0001E937.\U00010B90\U0001E881\U00010E60\u0624',
  27. '\U0001E937.\U00010B90\U0001E881\U00010E60\u0648\u0654',
  28. '\U0001E915.\U00010B90\U0001E881\U00010E60\u0648\u0654',
  29. '\U0001E915.\U00010B90\U0001E881\U00010E60\u0624',
  30. 'xn--ve6h.xn--jgb1694kz0b2176a',
  31. '\u00DF\u3002\U000102F3\u2D0C\u0FB8',
  32. 'ss\u3002\U000102F3\u2D0C\u0FB8',
  33. 'ss.xn--lgd921mvv0m',
  34. 'ss.\U000102F3\u2D0C\u0FB8',
  35. 'xn--zca.xn--lgd921mvv0m',
  36. '\u00DF.\U000102F3\u2D0C\u0FB8',
  37. '\u00DF\uFF61\U000102F3\u2D0C\u0FB8',
  38. 'ss\uFF61\U000102F3\u2D0C\u0FB8',
  39. '\u16AD\uFF61\U0001D320\u00DF\U00016AF1',
  40. '\u16AD\u3002\U0001D320\u00DF\U00016AF1',
  41. '\u16AD\u3002\U0001D320SS\U00016AF1',
  42. '\u16AD\u3002\U0001D320ss\U00016AF1',
  43. '\u16AD\u3002\U0001D320Ss\U00016AF1',
  44. 'xn--hwe.xn--ss-ci1ub261a',
  45. '\u16AD.\U0001D320ss\U00016AF1',
  46. '\u16AD.\U0001D320SS\U00016AF1',
  47. '\u16AD.\U0001D320Ss\U00016AF1',
  48. 'xn--hwe.xn--zca4946pblnc',
  49. '\u16AD.\U0001D320\u00DF\U00016AF1',
  50. '\u16AD\uFF61\U0001D320SS\U00016AF1',
  51. '\u16AD\uFF61\U0001D320ss\U00016AF1',
  52. '\u16AD\uFF61\U0001D320Ss\U00016AF1',
  53. '\u2D1A\U000102F8\U000E0104\u30025\uD7F6\u103A',
  54. 'xn--ilj2659d.xn--5-dug9054m',
  55. '\u2D1A\U000102F8.5\uD7F6\u103A',
  56. '\u2D1A\U000102F8\U000E0104\u3002\U0001D7DD\uD7F6\u103A',
  57. 'xn--9-mfs8024b.',
  58. '9\u9681\u2BEE.',
  59. 'xn--ss-4epx629f.xn--ifh802b6a',
  60. 'ss\uAAF6\u18A5.\u22B6\u2D21\u2D16',
  61. 'xn--pt9c.xn--0kjya',
  62. '\U00010A57.\u2D09\u2D15',
  63. '\uA5F7\U00011180.\u075D\U00010A52',
  64. 'xn--ju8a625r.xn--hpb0073k',
  65. '\u03C2.\u0641\u0645\u064A\U0001F79B1.',
  66. '\u03A3.\u0641\u0645\u064A\U0001F79B1.',
  67. '\u03C3.\u0641\u0645\u064A\U0001F79B1.',
  68. 'xn--4xa.xn--1-gocmu97674d.',
  69. 'xn--3xa.xn--1-gocmu97674d.',
  70. 'xn--1-5bt6845n.',
  71. '1\U0001DA19\u2E16.',
  72. 'xn--84-s850a.xn--59h6326e',
  73. '84\U0001D23B.\U000102F5\u26E7',
  74. 'xn--r97c.',
  75. '\U000102F7.',
  76. # These appear to be errors in the test vectors. All relate to incorrectly applying
  77. # bidi rules across label boundaries. Appears independently confirmed
  78. # at http://www.alvestrand.no/pipermail/idna-update/2017-January/007946.html
  79. '0\u00E0.\u05D0', '0a\u0300.\u05D0', '0A\u0300.\u05D0', '0\u00C0.\u05D0', 'xn--0-sfa.xn--4db',
  80. '\u00E0\u02c7.\u05D0', 'a\u0300\u02c7.\u05D0', 'A\u0300\u02c7.\u05D0', '\u00C0\u02c7.\u05D0',
  81. 'xn--0ca88g.xn--4db', '0A.\u05D0', '0a.\u05D0', '0a.xn--4db', 'c.xn--0-eha.xn--4db',
  82. 'c.0\u00FC.\u05D0', 'c.0u\u0308.\u05D0', 'C.0U\u0308.\u05D0', 'C.0\u00DC.\u05D0',
  83. 'C.0\u00FC.\u05D0', 'C.0\u0075\u0308.\u05D0', '\u06B6\u06DF\u3002\u2087\uA806', '\u06B6\u06DF\u30027\uA806',
  84. 'xn--pkb6f.xn--7-x93e', '\u06B6\u06DF.7\uA806', '1.\uAC7E6.\U00010C41\u06D0',
  85. '1.\u1100\u1165\u11B56.\U00010C41\u06D0', '1.xn--6-945e.xn--glb1794k',
  86. ]
  87. def unicode_fixup(string):
  88. """Replace backslash-u-XXXX with appropriate unicode characters."""
  89. return _RE_SURROGATE.sub(lambda match: chr(
  90. (ord(match.group(0)[0]) - 0xd800) * 0x400 +
  91. ord(match.group(0)[1]) - 0xdc00 + 0x10000),
  92. _RE_UNICODE.sub(lambda match: chr(int(match.group(1), 16)), string))
  93. def parse_idna_test_table(inputstream):
  94. """Parse IdnaTestV2.txt and return a list of tuples."""
  95. for lineno, line in enumerate(inputstream):
  96. line = line.decode('utf-8').strip()
  97. if '#' in line:
  98. line = line.split('#', 1)[0]
  99. if not line:
  100. continue
  101. yield((lineno + 1, tuple(field.strip() for field in line.split(';'))))
  102. class TestIdnaTest(unittest.TestCase):
  103. """Run one of the IdnaTestV2.txt test lines."""
  104. def __init__(self, lineno=None, fields=None):
  105. super().__init__()
  106. self.lineno = lineno
  107. self.fields = fields
  108. def id(self):
  109. return '{}.{}'.format(super().id(), self.lineno)
  110. def shortDescription(self):
  111. if not self.fields:
  112. return ''
  113. return 'IdnaTestV2.txt line {}: {}'.format(self.lineno, '; '.join(self.fields))
  114. def runTest(self):
  115. if not self.fields:
  116. return ''
  117. source, to_unicode, to_unicode_status, to_ascii, to_ascii_status, to_ascii_t, to_ascii_t_status = self.fields
  118. if source in _SKIP_TESTS:
  119. return
  120. if not to_unicode:
  121. to_unicode = source
  122. if not to_unicode_status:
  123. to_unicode_status = '[]'
  124. if not to_ascii:
  125. to_ascii = to_unicode
  126. if not to_ascii_status:
  127. to_ascii_status = to_unicode_status
  128. if not to_ascii_t:
  129. to_ascii_t = to_ascii
  130. if not to_ascii_t_status:
  131. to_ascii_t_status = to_ascii_status
  132. try:
  133. output = idna.decode(source, uts46=True, strict=True)
  134. if to_unicode_status != '[]':
  135. self.fail('decode() did not emit required error {} for {}'.format(to_unicode, repr(source)))
  136. self.assertEqual(output, to_unicode, 'unexpected decode() output')
  137. except (idna.IDNAError, UnicodeError, ValueError) as exc:
  138. if str(exc).startswith("Unknown"):
  139. raise unittest.SkipTest("Test requires support for a newer"
  140. " version of Unicode than this Python supports")
  141. if to_unicode_status == '[]':
  142. raise
  143. try:
  144. output = idna.encode(source, uts46=True, strict=True).decode('ascii')
  145. if to_ascii_status != '[]':
  146. self.fail('encode() did not emit required error {} for {}'.
  147. format(to_ascii_status, repr(source)))
  148. self.assertEqual(output, to_ascii, 'unexpected encode() output')
  149. except (idna.IDNAError, UnicodeError, ValueError) as exc:
  150. if str(exc).startswith("Unknown"):
  151. raise unittest.SkipTest("Test requires support for a newer"
  152. " version of Unicode than this Python supports")
  153. if to_ascii_status == '[]':
  154. raise
  155. try:
  156. output = idna.encode(source, uts46=True, strict=True, transitional=True).decode('ascii')
  157. if to_ascii_t_status != '[]':
  158. self.fail('encode(transitional=True) did not emit required error {} for {}'.
  159. format(to_ascii_t_status, repr(source)))
  160. self.assertEqual(output, to_ascii_t, 'unexpected encode() output')
  161. except (idna.IDNAError, UnicodeError, ValueError) as exc:
  162. if str(exc).startswith("Unknown"):
  163. raise unittest.SkipTest("Test requires support for a newer"
  164. " version of Unicode than this Python supports")
  165. if to_ascii_t_status == '[]':
  166. raise
  167. def load_tests(loader, tests, pattern):
  168. """Create a suite of all the individual tests."""
  169. suite = unittest.TestSuite()
  170. with open(os.path.join(os.path.dirname(__file__),
  171. 'IdnaTestV2.txt'), 'rb') as tests_file:
  172. suite.addTests(TestIdnaTest(lineno, fields)
  173. for lineno, fields in parse_idna_test_table(tests_file))
  174. return suite