test_core.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402
  1. # coding: utf-8
  2. """Core tests for wcwidth module. isort:skip_file"""
  3. try:
  4. # std import
  5. import importlib.metadata as importmeta
  6. except ImportError:
  7. # 3rd party for python3.7 and earlier
  8. import importlib_metadata as importmeta
  9. # local
  10. import wcwidth
  11. try:
  12. # python 2
  13. _ = unichr
  14. except NameError:
  15. # python 3
  16. unichr = chr
  17. def test_package_version():
  18. """wcwidth.__version__ is expected value."""
  19. # given,
  20. expected = importmeta.version('wcwidth')
  21. # exercise,
  22. result = wcwidth.__version__
  23. # verify.
  24. assert result == expected
  25. def test_empty_string():
  26. """
  27. Test empty string is OK.
  28. https://github.com/jquast/wcwidth/issues/24
  29. """
  30. phrase = ""
  31. expect_length_each = 0
  32. expect_length_phrase = 0
  33. # exercise,
  34. length_each = wcwidth.wcwidth(phrase)
  35. length_phrase = wcwidth.wcswidth(phrase)
  36. # verify.
  37. assert length_each == expect_length_each
  38. assert length_phrase == expect_length_phrase
  39. def basic_string_type():
  40. """
  41. This is a python 2-specific test of the basic "string type"
  42. Such strings cannot contain anything but ascii in python2.
  43. """
  44. # given,
  45. phrase = 'hello\x00world'
  46. expect_length_each = (1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1)
  47. expect_length_phrase = sum(expect_length_each)
  48. # exercise,
  49. length_each = tuple(map(wcwidth.wcwidth, phrase))
  50. length_phrase = wcwidth.wcswidth(phrase)
  51. # verify.
  52. assert length_each == expect_length_each
  53. assert length_phrase == expect_length_phrase
  54. def test_hello_jp():
  55. u"""
  56. Width of Japanese phrase: コンニチハ, セカイ!
  57. Given a phrase of 5 and 3 Katakana ideographs, joined with
  58. 3 English-ASCII punctuation characters, totaling 11, this
  59. phrase consumes 19 cells of a terminal emulator.
  60. """
  61. # given,
  62. phrase = u'コンニチハ, セカイ!'
  63. expect_length_each = (2, 2, 2, 2, 2, 1, 1, 2, 2, 2, 1)
  64. expect_length_phrase = sum(expect_length_each)
  65. # exercise,
  66. length_each = tuple(map(wcwidth.wcwidth, phrase))
  67. length_phrase = wcwidth.wcswidth(phrase)
  68. # verify.
  69. assert length_each == expect_length_each
  70. assert length_phrase == expect_length_phrase
  71. def test_wcswidth_substr():
  72. """
  73. Test wcswidth() optional 2nd parameter, ``n``.
  74. ``n`` determines at which position of the string
  75. to stop counting length.
  76. """
  77. # given,
  78. phrase = u'コンニチハ, セカイ!'
  79. end = 7
  80. expect_length_each = (2, 2, 2, 2, 2, 1, 1,)
  81. expect_length_phrase = sum(expect_length_each)
  82. # exercise,
  83. length_each = tuple(map(wcwidth.wcwidth, phrase))[:end]
  84. length_phrase = wcwidth.wcswidth(phrase, end)
  85. # verify.
  86. assert length_each == expect_length_each
  87. assert length_phrase == expect_length_phrase
  88. def test_null_width_0():
  89. """NULL (0) reports width 0."""
  90. # given,
  91. phrase = u'abc\x00def'
  92. expect_length_each = (1, 1, 1, 0, 1, 1, 1)
  93. expect_length_phrase = sum(expect_length_each)
  94. # exercise,
  95. length_each = tuple(map(wcwidth.wcwidth, phrase))
  96. length_phrase = wcwidth.wcswidth(phrase, len(phrase))
  97. # verify.
  98. assert length_each == expect_length_each
  99. assert length_phrase == expect_length_phrase
  100. def test_control_c0_width_negative_1():
  101. """How the API reacts to CSI (Control sequence initiate).
  102. An example of bad fortune, this terminal sequence is a width of 0
  103. on all terminals, but wcwidth doesn't parse Control-Sequence-Inducer
  104. (CSI) sequences.
  105. Also the "legacy" posix functions wcwidth and wcswidth return -1 for
  106. any string containing the C1 control character \x1b (ESC).
  107. """
  108. # given,
  109. phrase = u'\x1b[0m'
  110. expect_length_each = (-1, 1, 1, 1)
  111. expect_length_phrase = -1
  112. # exercise,
  113. length_each = tuple(map(wcwidth.wcwidth, phrase))
  114. length_phrase = wcwidth.wcswidth(phrase)
  115. # verify, though this is actually *0* width for a terminal emulator
  116. assert length_each == expect_length_each
  117. assert length_phrase == expect_length_phrase
  118. def test_combining_width():
  119. """Simple test combining reports total width of 4."""
  120. # given,
  121. phrase = u'--\u05bf--'
  122. expect_length_each = (1, 1, 0, 1, 1)
  123. expect_length_phrase = 4
  124. # exercise,
  125. length_each = tuple(map(wcwidth.wcwidth, phrase))
  126. length_phrase = wcwidth.wcswidth(phrase)
  127. # verify.
  128. assert length_each == expect_length_each
  129. assert length_phrase == expect_length_phrase
  130. def test_combining_cafe():
  131. u"""Phrase cafe + COMBINING ACUTE ACCENT is café of length 4."""
  132. phrase = u"cafe\u0301"
  133. expect_length_each = (1, 1, 1, 1, 0)
  134. expect_length_phrase = 4
  135. # exercise,
  136. length_each = tuple(map(wcwidth.wcwidth, phrase))
  137. length_phrase = wcwidth.wcswidth(phrase)
  138. # verify.
  139. assert length_each == expect_length_each
  140. assert length_phrase == expect_length_phrase
  141. def test_combining_enclosing():
  142. u"""CYRILLIC CAPITAL LETTER A + COMBINING CYRILLIC HUNDRED THOUSANDS SIGN is of length 1."""
  143. phrase = u"\u0410\u0488"
  144. expect_length_each = (1, 0)
  145. expect_length_phrase = 1
  146. # exercise,
  147. length_each = tuple(map(wcwidth.wcwidth, phrase))
  148. length_phrase = wcwidth.wcswidth(phrase)
  149. # verify.
  150. assert length_each == expect_length_each
  151. assert length_phrase == expect_length_phrase
  152. def test_balinese_script():
  153. u"""
  154. Balinese kapal (ship) is length 3.
  155. This may be an example that is not yet correctly rendered by any terminal so
  156. far, like devanagari.
  157. """
  158. phrase = (u"\u1B13" # Category 'Lo', EAW 'N' -- BALINESE LETTER KA
  159. u"\u1B28" # Category 'Lo', EAW 'N' -- BALINESE LETTER PA KAPAL
  160. u"\u1B2E" # Category 'Lo', EAW 'N' -- BALINESE LETTER LA
  161. u"\u1B44") # Category 'Mc', EAW 'N' -- BALINESE ADEG ADEG
  162. expect_length_each = (1, 1, 1, 0)
  163. expect_length_phrase = 3
  164. # exercise,
  165. length_each = tuple(map(wcwidth.wcwidth, phrase))
  166. length_phrase = wcwidth.wcswidth(phrase)
  167. # verify.
  168. assert length_each == expect_length_each
  169. assert length_phrase == expect_length_phrase
  170. def test_kr_jamo():
  171. """
  172. Test basic combining of HANGUL CHOSEONG and JUNGSEONG
  173. Example and from Raymond Chen's blog post,
  174. https://devblogs.microsoft.com/oldnewthing/20201009-00/?p=104351
  175. """
  176. # This is an example where both characters are "wide" when displayed alone.
  177. #
  178. # But JUNGSEONG (vowel) is designed for combination with a CHOSEONG (consonant).
  179. #
  180. # This wcwidth library understands their width only when combination,
  181. # and not by independent display, like other zero-width characters that may
  182. # only combine with an appropriate preceding character.
  183. phrase = (
  184. u"\u1100" # ᄀ HANGUL CHOSEONG KIYEOK (consonant)
  185. u"\u1161" # ᅡ HANGUL JUNGSEONG A (vowel)
  186. )
  187. expect_length_each = (2, 0)
  188. expect_length_phrase = 2
  189. # exercise,
  190. length_each = tuple(map(wcwidth.wcwidth, phrase))
  191. length_phrase = wcwidth.wcswidth(phrase)
  192. # verify.
  193. assert length_each == expect_length_each
  194. assert length_phrase == expect_length_phrase
  195. def test_kr_jamo_filler():
  196. u"""
  197. Jamo filler is 0 width.
  198. Example from https://www.unicode.org/L2/L2006/06310-hangul-decompose9.pdf
  199. """
  200. phrase = (
  201. u"\u1100" # HANGUL CHOSEONG KIYEOK (consonant)
  202. u"\u1160" # HANGUL JUNGSEONG FILLER (vowel)
  203. )
  204. expect_length_each = (2, 0)
  205. expect_length_phrase = 2
  206. # exercise,
  207. length_each = tuple(map(wcwidth.wcwidth, phrase))
  208. length_phrase = wcwidth.wcswidth(phrase)
  209. # verify.
  210. assert length_each == expect_length_each
  211. assert length_phrase == expect_length_phrase
  212. def test_devanagari_script():
  213. """
  214. Attempt to test the measurement width of Devanagari script.
  215. I believe this 'phrase' should be length 3.
  216. This is a difficult problem, and this library does not yet get it right,
  217. because we interpret the unicode data files programmatically, but they do
  218. not correctly describe how their terminal width is measured.
  219. There are very few Terminals that do!
  220. As of 2023,
  221. - iTerm2: correct length but individual characters are out of order and
  222. horizaontally misplaced as to be unreadable in its language when
  223. using 'Noto Sans' font.
  224. - mlterm: mixed results, it offers several options in the configuration
  225. dialog, "Xft", "Cario", and "Variable Column Width" have some
  226. effect, but with neither 'Noto Sans' or 'unifont', it is not
  227. recognizable as the Devanagari script it is meant to display.
  228. Previous testing with Devanagari documented at address https://benizi.com/vim/devanagari/
  229. See also, https://askubuntu.com/questions/8437/is-there-a-good-mono-spaced-font-for-devanagari-script-in-the-terminal
  230. """
  231. # This test adapted from https://www.unicode.org/L2/L2023/23107-terminal-suppt.pdf
  232. # please note that document correctly points out that the final width cannot be determined
  233. # as a sum of each individual width, as this library currently performs with exception of
  234. # ZWJ, but I think it incorrectly gestures what a stateless call to wcwidth.wcwidth of
  235. # each codepoint *should* return.
  236. phrase = (u"\u0915" # Akhand, Category 'Lo', East Asian Width property 'N' -- DEVANAGARI LETTER KA
  237. u"\u094D" # Joiner, Category 'Mn', East Asian Width property 'N' -- DEVANAGARI SIGN VIRAMA
  238. u"\u0937" # Fused, Category 'Lo', East Asian Width property 'N' -- DEVANAGARI LETTER SSA
  239. u"\u093F") # MatraL, Category 'Mc', East Asian Width property 'N' -- DEVANAGARI VOWEL SIGN I
  240. # 23107-terminal-suppt.pdf suggests wcwidth.wcwidth should return (2, 0, 0, 1)
  241. expect_length_each = (1, 0, 1, 0)
  242. # I believe the final width *should* be 3.
  243. expect_length_phrase = 2
  244. # exercise,
  245. length_each = tuple(map(wcwidth.wcwidth, phrase))
  246. length_phrase = wcwidth.wcswidth(phrase)
  247. # verify.
  248. assert length_each == expect_length_each
  249. assert length_phrase == expect_length_phrase
  250. def test_tamil_script():
  251. # This test adapted from https://www.unicode.org/L2/L2023/23107-terminal-suppt.pdf
  252. phrase = (u"\u0b95" # Akhand, Category 'Lo', East Asian Width property 'N' -- TAMIL LETTER KA
  253. u"\u0bcd" # Joiner, Category 'Mn', East Asian Width property 'N' -- TAMIL SIGN VIRAMA
  254. u"\u0bb7" # Fused, Category 'Lo', East Asian Width property 'N' -- TAMIL LETTER SSA
  255. u"\u0bcc") # MatraLR, Category 'Mc', East Asian Width property 'N' -- TAMIL VOWEL SIGN AU
  256. # 23107-terminal-suppt.pdf suggests wcwidth.wcwidth should return (3, 0, 0, 4)
  257. expect_length_each = (1, 0, 1, 0)
  258. # I believe the final width should be about 5 or 6.
  259. expect_length_phrase = 2
  260. # exercise,
  261. length_each = tuple(map(wcwidth.wcwidth, phrase))
  262. length_phrase = wcwidth.wcswidth(phrase)
  263. # verify.
  264. assert length_each == expect_length_each
  265. assert length_phrase == expect_length_phrase
  266. def test_kannada_script():
  267. # This test adapted from https://www.unicode.org/L2/L2023/23107-terminal-suppt.pdf
  268. # |ರ್ಝೈ|
  269. # |123|
  270. phrase = (u"\u0cb0" # Repha, Category 'Lo', East Asian Width property 'N' -- KANNADA LETTER RA
  271. u"\u0ccd" # Joiner, Category 'Mn', East Asian Width property 'N' -- KANNADA SIGN VIRAMA
  272. u"\u0c9d" # Base, Category 'Lo', East Asian Width property 'N' -- KANNADA LETTER JHA
  273. u"\u0cc8") # MatraUR, Category 'Mc', East Asian Width property 'N' -- KANNADA VOWEL SIGN AI
  274. # 23107-terminal-suppt.pdf suggests should be (2, 0, 3, 1)
  275. expect_length_each = (1, 0, 1, 0)
  276. # I believe the correct final width *should* be 3 or 4.
  277. expect_length_phrase = 2
  278. # exercise,
  279. length_each = tuple(map(wcwidth.wcwidth, phrase))
  280. length_phrase = wcwidth.wcswidth(phrase)
  281. # verify.
  282. assert length_each == expect_length_each
  283. assert length_phrase == expect_length_phrase
  284. def test_kannada_script_2():
  285. # This test adapted from https://www.unicode.org/L2/L2023/23107-terminal-suppt.pdf
  286. # |ರ಼್ಚ|
  287. # |12|
  288. phrase = (u"\u0cb0" # Base, Category 'Lo', East Asian Width property 'N' -- KANNADA LETTER RA
  289. u"\u0cbc" # Nukta, Category 'Mn', East Asian Width property 'N' -- KANNADA SIGN NUKTA
  290. u"\u0ccd" # Joiner, Category 'Lo', East Asian Width property 'N' -- KANNADA SIGN VIRAMA
  291. u"\u0c9a") # Subjoin, Category 'Mc', East Asian Width property 'N' -- KANNADA LETTER CA
  292. # 23107-terminal-suppt.pdf suggests wcwidth.wcwidth should return (2, 0, 0, 1)
  293. expect_length_each = (1, 0, 0, 1)
  294. # I believe the final width is correct, but maybe for the wrong reasons!
  295. expect_length_phrase = 2
  296. # exercise,
  297. length_each = tuple(map(wcwidth.wcwidth, phrase))
  298. length_phrase = wcwidth.wcswidth(phrase)
  299. # verify.
  300. assert length_each == expect_length_each
  301. assert length_phrase == expect_length_phrase
  302. def test_zero_wide_conflict():
  303. # Test characters considered both "wide" and "zero" width
  304. # - (0x03000, 0x0303e,), # Ideographic Space ..Ideographic Variation In
  305. # + (0x03000, 0x03029,), # Ideographic Space ..Hangzhou Numeral Nine
  306. assert wcwidth.wcwidth(unichr(0x03029), unicode_version='4.1.0') == 2
  307. assert wcwidth.wcwidth(unichr(0x0302a), unicode_version='4.1.0') == 0
  308. # - (0x03099, 0x030ff,), # Combining Katakana-hirag..Katakana Digraph Koto
  309. # + (0x0309b, 0x030ff,), # Katakana-hiragana Voiced..Katakana Digraph Koto
  310. assert wcwidth.wcwidth(unichr(0x03099), unicode_version='4.1.0') == 0
  311. assert wcwidth.wcwidth(unichr(0x0309a), unicode_version='4.1.0') == 0
  312. assert wcwidth.wcwidth(unichr(0x0309b), unicode_version='4.1.0') == 2