123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243 |
- # std imports
- import os
- import codecs
- # 3rd party
- import pytest
- try:
- # python 2
- _ = unichr
- except NameError:
- # python 3
- unichr = chr
- # some tests cannot be done on some builds of python, where the internal
- # unicode structure is limited to 0x10000 for memory conservation,
- # "ValueError: unichr() arg not in range(0x10000) (narrow Python build)"
- try:
- unichr(0x2fffe)
- NARROW_ONLY = False
- except ValueError:
- NARROW_ONLY = True
- # local
- import wcwidth
- def make_sequence_from_line(line):
- # convert '002A FE0F ; ..' -> (0x2a, 0xfe0f) -> chr(0x2a) + chr(0xfe0f)
- return ''.join(unichr(int(cp, 16)) for cp in line.split(';', 1)[0].strip().split())
- @pytest.mark.skipif(NARROW_ONLY, reason="Test cannot verify on python 'narrow' builds")
- def emoji_zwj_sequence():
- u"""
- Emoji zwj sequence of four codepoints is just 2 cells.
- """
- phrase = (u"\U0001f469" # Base, Category So, East Asian Width property 'W' -- WOMAN
- u"\U0001f3fb" # Modifier, Category Sk, East Asian Width property 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-1-2
- u"\u200d" # Joiner, Category Cf, East Asian Width property 'N' -- ZERO WIDTH JOINER
- u"\U0001f4bb") # Fused, Category So, East Asian Width peroperty 'W' -- PERSONAL COMPUTER
- # This test adapted from https://www.unicode.org/L2/L2023/23107-terminal-suppt.pdf
- expect_length_each = (2, 0, 0, 2)
- expect_length_phrase = 2
- # exercise,
- length_each = tuple(map(wcwidth.wcwidth, phrase))
- length_phrase = wcwidth.wcswidth(phrase)
- # verify.
- assert length_each == expect_length_each
- assert length_phrase == expect_length_phrase
- @pytest.mark.skipif(NARROW_ONLY, reason="Test cannot verify on python 'narrow' builds")
- def test_unfinished_zwj_sequence():
- u"""
- Ensure index-out-of-bounds does not occur for zero-width joiner without any following character
- """
- phrase = (u"\U0001f469" # Base, Category So, East Asian Width property 'W' -- WOMAN
- u"\U0001f3fb" # Modifier, Category Sk, East Asian Width property 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-1-2
- u"\u200d") # Joiner, Category Cf, East Asian Width property 'N' -- ZERO WIDTH JOINER
- expect_length_each = (2, 0, 0)
- expect_length_phrase = 2
- # exercise,
- length_each = tuple(map(wcwidth.wcwidth, phrase))
- length_phrase = wcwidth.wcswidth(phrase)
- # verify.
- assert length_each == expect_length_each
- assert length_phrase == expect_length_phrase
- @pytest.mark.skipif(NARROW_ONLY, reason="Test cannot verify on python 'narrow' builds")
- def test_non_recommended_zwj_sequence():
- """
- Verify ZWJ is measured as though successful with characters that cannot be joined, wcwidth does not verify
- """
- phrase = (u"\U0001f469" # Base, Category So, East Asian Width property 'W' -- WOMAN
- u"\U0001f3fb" # Modifier, Category Sk, East Asian Width property 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-1-2
- u"\u200d") # Joiner, Category Cf, East Asian Width property 'N' -- ZERO WIDTH JOINER
- expect_length_each = (2, 0, 0)
- expect_length_phrase = 2
- # exercise,
- length_each = tuple(map(wcwidth.wcwidth, phrase))
- length_phrase = wcwidth.wcswidth(phrase)
- # verify.
- assert length_each == expect_length_each
- assert length_phrase == expect_length_phrase
- @pytest.mark.skipif(NARROW_ONLY, reason="Test cannot verify on python 'narrow' builds")
- def test_another_emoji_zwj_sequence():
- phrase = (
- u"\u26F9" # PERSON WITH BALL
- u"\U0001F3FB" # EMOJI MODIFIER FITZPATRICK TYPE-1-2
- u"\u200D" # ZERO WIDTH JOINER
- u"\u2640" # FEMALE SIGN
- u"\uFE0F") # VARIATION SELECTOR-16
- expect_length_each = (1, 0, 0, 1, 0)
- expect_length_phrase = 2
- # exercise,
- length_each = tuple(map(wcwidth.wcwidth, phrase))
- length_phrase = wcwidth.wcswidth(phrase)
- # verify.
- assert length_each == expect_length_each
- assert length_phrase == expect_length_phrase
- @pytest.mark.skipif(NARROW_ONLY, reason="Test cannot verify on python 'narrow' builds")
- def test_longer_emoji_zwj_sequence():
- """
- A much longer emoji ZWJ sequence of 10 total codepoints is just 2 cells!
- Also test the same sequence in duplicate, verifying multiple VS-16 sequences
- in a single function call.
- """
- # 'Category Code', 'East Asian Width property' -- 'description'
- phrase = (u"\U0001F9D1" # 'So', 'W' -- ADULT
- u"\U0001F3FB" # 'Sk', 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-1-2
- u"\u200d" # 'Cf', 'N' -- ZERO WIDTH JOINER
- u"\u2764" # 'So', 'N' -- HEAVY BLACK HEART
- u"\uFE0F" # 'Mn', 'A' -- VARIATION SELECTOR-16
- u"\u200d" # 'Cf', 'N' -- ZERO WIDTH JOINER
- u"\U0001F48B" # 'So', 'W' -- KISS MARK
- u"\u200d" # 'Cf', 'N' -- ZERO WIDTH JOINER
- u"\U0001F9D1" # 'So', 'W' -- ADULT
- u"\U0001F3FD" # 'Sk', 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-4
- ) * 2
- # This test adapted from https://www.unicode.org/L2/L2023/23107-terminal-suppt.pdf
- expect_length_each = (2, 0, 0, 1, 0, 0, 2, 0, 2, 0) * 2
- expect_length_phrase = 4
- # exercise,
- length_each = tuple(map(wcwidth.wcwidth, phrase))
- length_phrase = wcwidth.wcswidth(phrase)
- # verify.
- assert length_each == expect_length_each
- assert length_phrase == expect_length_phrase
- def read_sequences_from_file(filename):
- fp = codecs.open(os.path.join(os.path.dirname(__file__), filename), 'r', encoding='utf-8')
- lines = [line.strip()
- for line in fp.readlines()
- if not line.startswith('#') and line.strip()]
- fp.close()
- sequences = [make_sequence_from_line(line) for line in lines]
- return lines, sequences
- @pytest.mark.skipif(NARROW_ONLY, reason="Some sequences in text file are not compatible with 'narrow' builds")
- def test_recommended_emoji_zwj_sequences():
- """
- Test wcswidth of all of the unicode.org-published emoji-zwj-sequences.txt
- """
- # given,
- lines, sequences = read_sequences_from_file('emoji-zwj-sequences.txt')
- errors = []
- # Exercise, track by zipping with original text file line, a debugging aide
- num = 0
- for sequence, line in zip(sequences, lines):
- num += 1
- measured_width = wcwidth.wcswidth(sequence)
- if measured_width != 2:
- errors.append({
- 'expected_width': 2,
- 'line': line,
- 'measured_width': measured_width,
- 'sequence': sequence,
- })
- # verify
- assert errors == []
- assert num >= 1468
- def test_recommended_variation_16_sequences():
- """
- Test wcswidth of all of the unicode.org-published emoji-variation-sequences.txt
- """
- # given,
- lines, sequences = read_sequences_from_file('emoji-variation-sequences.txt')
- errors = []
- num = 0
- for sequence, line in zip(sequences, lines):
- num += 1
- if '\ufe0f' not in sequence:
- # filter for only \uFE0F (VS-16)
- continue
- measured_width = wcwidth.wcswidth(sequence)
- if measured_width != 2:
- errors.append({
- 'expected_width': 2,
- 'line': line,
- 'measured_width': wcwidth.wcswidth(sequence),
- 'sequence': sequence,
- })
- # verify
- assert errors == []
- assert num >= 742
- def test_unicode_9_vs16():
- """Verify effect of VS-16 on unicode_version 9.0 and later"""
- phrase = (u"\u2640" # FEMALE SIGN
- u"\uFE0F") # VARIATION SELECTOR-16
- expect_length_each = (1, 0)
- expect_length_phrase = 2
- # exercise,
- length_each = tuple(wcwidth.wcwidth(w_char, unicode_version='9.0') for w_char in phrase)
- length_phrase = wcwidth.wcswidth(phrase, unicode_version='9.0')
- # verify.
- assert length_each == expect_length_each
- assert length_phrase == expect_length_phrase
- def test_unicode_8_vs16():
- """Verify that VS-16 has no effect on unicode_version 8.0 and earler"""
- phrase = (u"\u2640" # FEMALE SIGN
- u"\uFE0F") # VARIATION SELECTOR-16
- expect_length_each = (1, 0)
- expect_length_phrase = 1
- # exercise,
- length_each = tuple(wcwidth.wcwidth(w_char, unicode_version='8.0') for w_char in phrase)
- length_phrase = wcwidth.wcswidth(phrase, unicode_version='8.0')
- # verify.
- assert length_each == expect_length_each
- assert length_phrase == expect_length_phrase
|