test_emojis.py 8.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243
  1. # std imports
  2. import os
  3. import codecs
  4. # 3rd party
  5. import pytest
  6. try:
  7. # python 2
  8. _ = unichr
  9. except NameError:
  10. # python 3
  11. unichr = chr
  12. # some tests cannot be done on some builds of python, where the internal
  13. # unicode structure is limited to 0x10000 for memory conservation,
  14. # "ValueError: unichr() arg not in range(0x10000) (narrow Python build)"
  15. try:
  16. unichr(0x2fffe)
  17. NARROW_ONLY = False
  18. except ValueError:
  19. NARROW_ONLY = True
  20. # local
  21. import wcwidth
  22. def make_sequence_from_line(line):
  23. # convert '002A FE0F ; ..' -> (0x2a, 0xfe0f) -> chr(0x2a) + chr(0xfe0f)
  24. return ''.join(unichr(int(cp, 16)) for cp in line.split(';', 1)[0].strip().split())
  25. @pytest.mark.skipif(NARROW_ONLY, reason="Test cannot verify on python 'narrow' builds")
  26. def emoji_zwj_sequence():
  27. u"""
  28. Emoji zwj sequence of four codepoints is just 2 cells.
  29. """
  30. phrase = (u"\U0001f469" # Base, Category So, East Asian Width property 'W' -- WOMAN
  31. u"\U0001f3fb" # Modifier, Category Sk, East Asian Width property 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-1-2
  32. u"\u200d" # Joiner, Category Cf, East Asian Width property 'N' -- ZERO WIDTH JOINER
  33. u"\U0001f4bb") # Fused, Category So, East Asian Width peroperty 'W' -- PERSONAL COMPUTER
  34. # This test adapted from https://www.unicode.org/L2/L2023/23107-terminal-suppt.pdf
  35. expect_length_each = (2, 0, 0, 2)
  36. expect_length_phrase = 2
  37. # exercise,
  38. length_each = tuple(map(wcwidth.wcwidth, phrase))
  39. length_phrase = wcwidth.wcswidth(phrase)
  40. # verify.
  41. assert length_each == expect_length_each
  42. assert length_phrase == expect_length_phrase
  43. @pytest.mark.skipif(NARROW_ONLY, reason="Test cannot verify on python 'narrow' builds")
  44. def test_unfinished_zwj_sequence():
  45. u"""
  46. Ensure index-out-of-bounds does not occur for zero-width joiner without any following character
  47. """
  48. phrase = (u"\U0001f469" # Base, Category So, East Asian Width property 'W' -- WOMAN
  49. u"\U0001f3fb" # Modifier, Category Sk, East Asian Width property 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-1-2
  50. u"\u200d") # Joiner, Category Cf, East Asian Width property 'N' -- ZERO WIDTH JOINER
  51. expect_length_each = (2, 0, 0)
  52. expect_length_phrase = 2
  53. # exercise,
  54. length_each = tuple(map(wcwidth.wcwidth, phrase))
  55. length_phrase = wcwidth.wcswidth(phrase)
  56. # verify.
  57. assert length_each == expect_length_each
  58. assert length_phrase == expect_length_phrase
  59. @pytest.mark.skipif(NARROW_ONLY, reason="Test cannot verify on python 'narrow' builds")
  60. def test_non_recommended_zwj_sequence():
  61. """
  62. Verify ZWJ is measured as though successful with characters that cannot be joined, wcwidth does not verify
  63. """
  64. phrase = (u"\U0001f469" # Base, Category So, East Asian Width property 'W' -- WOMAN
  65. u"\U0001f3fb" # Modifier, Category Sk, East Asian Width property 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-1-2
  66. u"\u200d") # Joiner, Category Cf, East Asian Width property 'N' -- ZERO WIDTH JOINER
  67. expect_length_each = (2, 0, 0)
  68. expect_length_phrase = 2
  69. # exercise,
  70. length_each = tuple(map(wcwidth.wcwidth, phrase))
  71. length_phrase = wcwidth.wcswidth(phrase)
  72. # verify.
  73. assert length_each == expect_length_each
  74. assert length_phrase == expect_length_phrase
  75. @pytest.mark.skipif(NARROW_ONLY, reason="Test cannot verify on python 'narrow' builds")
  76. def test_another_emoji_zwj_sequence():
  77. phrase = (
  78. u"\u26F9" # PERSON WITH BALL
  79. u"\U0001F3FB" # EMOJI MODIFIER FITZPATRICK TYPE-1-2
  80. u"\u200D" # ZERO WIDTH JOINER
  81. u"\u2640" # FEMALE SIGN
  82. u"\uFE0F") # VARIATION SELECTOR-16
  83. expect_length_each = (1, 0, 0, 1, 0)
  84. expect_length_phrase = 2
  85. # exercise,
  86. length_each = tuple(map(wcwidth.wcwidth, phrase))
  87. length_phrase = wcwidth.wcswidth(phrase)
  88. # verify.
  89. assert length_each == expect_length_each
  90. assert length_phrase == expect_length_phrase
  91. @pytest.mark.skipif(NARROW_ONLY, reason="Test cannot verify on python 'narrow' builds")
  92. def test_longer_emoji_zwj_sequence():
  93. """
  94. A much longer emoji ZWJ sequence of 10 total codepoints is just 2 cells!
  95. Also test the same sequence in duplicate, verifying multiple VS-16 sequences
  96. in a single function call.
  97. """
  98. # 'Category Code', 'East Asian Width property' -- 'description'
  99. phrase = (u"\U0001F9D1" # 'So', 'W' -- ADULT
  100. u"\U0001F3FB" # 'Sk', 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-1-2
  101. u"\u200d" # 'Cf', 'N' -- ZERO WIDTH JOINER
  102. u"\u2764" # 'So', 'N' -- HEAVY BLACK HEART
  103. u"\uFE0F" # 'Mn', 'A' -- VARIATION SELECTOR-16
  104. u"\u200d" # 'Cf', 'N' -- ZERO WIDTH JOINER
  105. u"\U0001F48B" # 'So', 'W' -- KISS MARK
  106. u"\u200d" # 'Cf', 'N' -- ZERO WIDTH JOINER
  107. u"\U0001F9D1" # 'So', 'W' -- ADULT
  108. u"\U0001F3FD" # 'Sk', 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-4
  109. ) * 2
  110. # This test adapted from https://www.unicode.org/L2/L2023/23107-terminal-suppt.pdf
  111. expect_length_each = (2, 0, 0, 1, 0, 0, 2, 0, 2, 0) * 2
  112. expect_length_phrase = 4
  113. # exercise,
  114. length_each = tuple(map(wcwidth.wcwidth, phrase))
  115. length_phrase = wcwidth.wcswidth(phrase)
  116. # verify.
  117. assert length_each == expect_length_each
  118. assert length_phrase == expect_length_phrase
  119. def read_sequences_from_file(filename):
  120. fp = codecs.open(os.path.join(os.path.dirname(__file__), filename), 'r', encoding='utf-8')
  121. lines = [line.strip()
  122. for line in fp.readlines()
  123. if not line.startswith('#') and line.strip()]
  124. fp.close()
  125. sequences = [make_sequence_from_line(line) for line in lines]
  126. return lines, sequences
  127. @pytest.mark.skipif(NARROW_ONLY, reason="Some sequences in text file are not compatible with 'narrow' builds")
  128. def test_recommended_emoji_zwj_sequences():
  129. """
  130. Test wcswidth of all of the unicode.org-published emoji-zwj-sequences.txt
  131. """
  132. # given,
  133. lines, sequences = read_sequences_from_file('emoji-zwj-sequences.txt')
  134. errors = []
  135. # Exercise, track by zipping with original text file line, a debugging aide
  136. num = 0
  137. for sequence, line in zip(sequences, lines):
  138. num += 1
  139. measured_width = wcwidth.wcswidth(sequence)
  140. if measured_width != 2:
  141. errors.append({
  142. 'expected_width': 2,
  143. 'line': line,
  144. 'measured_width': measured_width,
  145. 'sequence': sequence,
  146. })
  147. # verify
  148. assert errors == []
  149. assert num >= 1468
  150. def test_recommended_variation_16_sequences():
  151. """
  152. Test wcswidth of all of the unicode.org-published emoji-variation-sequences.txt
  153. """
  154. # given,
  155. lines, sequences = read_sequences_from_file('emoji-variation-sequences.txt')
  156. errors = []
  157. num = 0
  158. for sequence, line in zip(sequences, lines):
  159. num += 1
  160. if '\ufe0f' not in sequence:
  161. # filter for only \uFE0F (VS-16)
  162. continue
  163. measured_width = wcwidth.wcswidth(sequence)
  164. if measured_width != 2:
  165. errors.append({
  166. 'expected_width': 2,
  167. 'line': line,
  168. 'measured_width': wcwidth.wcswidth(sequence),
  169. 'sequence': sequence,
  170. })
  171. # verify
  172. assert errors == []
  173. assert num >= 742
  174. def test_unicode_9_vs16():
  175. """Verify effect of VS-16 on unicode_version 9.0 and later"""
  176. phrase = (u"\u2640" # FEMALE SIGN
  177. u"\uFE0F") # VARIATION SELECTOR-16
  178. expect_length_each = (1, 0)
  179. expect_length_phrase = 2
  180. # exercise,
  181. length_each = tuple(wcwidth.wcwidth(w_char, unicode_version='9.0') for w_char in phrase)
  182. length_phrase = wcwidth.wcswidth(phrase, unicode_version='9.0')
  183. # verify.
  184. assert length_each == expect_length_each
  185. assert length_phrase == expect_length_phrase
  186. def test_unicode_8_vs16():
  187. """Verify that VS-16 has no effect on unicode_version 8.0 and earler"""
  188. phrase = (u"\u2640" # FEMALE SIGN
  189. u"\uFE0F") # VARIATION SELECTOR-16
  190. expect_length_each = (1, 0)
  191. expect_length_phrase = 1
  192. # exercise,
  193. length_each = tuple(wcwidth.wcwidth(w_char, unicode_version='8.0') for w_char in phrase)
  194. length_phrase = wcwidth.wcswidth(phrase, unicode_version='8.0')
  195. # verify.
  196. assert length_each == expect_length_each
  197. assert length_phrase == expect_length_phrase