# std imports import os import codecs # 3rd party import pytest try: # python 2 _ = unichr except NameError: # python 3 unichr = chr # some tests cannot be done on some builds of python, where the internal # unicode structure is limited to 0x10000 for memory conservation, # "ValueError: unichr() arg not in range(0x10000) (narrow Python build)" try: unichr(0x2fffe) NARROW_ONLY = False except ValueError: NARROW_ONLY = True # local import wcwidth def make_sequence_from_line(line): # convert '002A FE0F ; ..' -> (0x2a, 0xfe0f) -> chr(0x2a) + chr(0xfe0f) return ''.join(unichr(int(cp, 16)) for cp in line.split(';', 1)[0].strip().split()) @pytest.mark.skipif(NARROW_ONLY, reason="Test cannot verify on python 'narrow' builds") def emoji_zwj_sequence(): u""" Emoji zwj sequence of four codepoints is just 2 cells. """ phrase = (u"\U0001f469" # Base, Category So, East Asian Width property 'W' -- WOMAN u"\U0001f3fb" # Modifier, Category Sk, East Asian Width property 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-1-2 u"\u200d" # Joiner, Category Cf, East Asian Width property 'N' -- ZERO WIDTH JOINER u"\U0001f4bb") # Fused, Category So, East Asian Width peroperty 'W' -- PERSONAL COMPUTER # This test adapted from https://www.unicode.org/L2/L2023/23107-terminal-suppt.pdf expect_length_each = (2, 0, 0, 2) expect_length_phrase = 2 # exercise, length_each = tuple(map(wcwidth.wcwidth, phrase)) length_phrase = wcwidth.wcswidth(phrase) # verify. assert length_each == expect_length_each assert length_phrase == expect_length_phrase @pytest.mark.skipif(NARROW_ONLY, reason="Test cannot verify on python 'narrow' builds") def test_unfinished_zwj_sequence(): u""" Ensure index-out-of-bounds does not occur for zero-width joiner without any following character """ phrase = (u"\U0001f469" # Base, Category So, East Asian Width property 'W' -- WOMAN u"\U0001f3fb" # Modifier, Category Sk, East Asian Width property 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-1-2 u"\u200d") # Joiner, Category Cf, East Asian Width property 'N' -- ZERO WIDTH JOINER expect_length_each = (2, 0, 0) expect_length_phrase = 2 # exercise, length_each = tuple(map(wcwidth.wcwidth, phrase)) length_phrase = wcwidth.wcswidth(phrase) # verify. assert length_each == expect_length_each assert length_phrase == expect_length_phrase @pytest.mark.skipif(NARROW_ONLY, reason="Test cannot verify on python 'narrow' builds") def test_non_recommended_zwj_sequence(): """ Verify ZWJ is measured as though successful with characters that cannot be joined, wcwidth does not verify """ phrase = (u"\U0001f469" # Base, Category So, East Asian Width property 'W' -- WOMAN u"\U0001f3fb" # Modifier, Category Sk, East Asian Width property 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-1-2 u"\u200d") # Joiner, Category Cf, East Asian Width property 'N' -- ZERO WIDTH JOINER expect_length_each = (2, 0, 0) expect_length_phrase = 2 # exercise, length_each = tuple(map(wcwidth.wcwidth, phrase)) length_phrase = wcwidth.wcswidth(phrase) # verify. assert length_each == expect_length_each assert length_phrase == expect_length_phrase @pytest.mark.skipif(NARROW_ONLY, reason="Test cannot verify on python 'narrow' builds") def test_another_emoji_zwj_sequence(): phrase = ( u"\u26F9" # PERSON WITH BALL u"\U0001F3FB" # EMOJI MODIFIER FITZPATRICK TYPE-1-2 u"\u200D" # ZERO WIDTH JOINER u"\u2640" # FEMALE SIGN u"\uFE0F") # VARIATION SELECTOR-16 expect_length_each = (1, 0, 0, 1, 0) expect_length_phrase = 2 # exercise, length_each = tuple(map(wcwidth.wcwidth, phrase)) length_phrase = wcwidth.wcswidth(phrase) # verify. assert length_each == expect_length_each assert length_phrase == expect_length_phrase @pytest.mark.skipif(NARROW_ONLY, reason="Test cannot verify on python 'narrow' builds") def test_longer_emoji_zwj_sequence(): """ A much longer emoji ZWJ sequence of 10 total codepoints is just 2 cells! Also test the same sequence in duplicate, verifying multiple VS-16 sequences in a single function call. """ # 'Category Code', 'East Asian Width property' -- 'description' phrase = (u"\U0001F9D1" # 'So', 'W' -- ADULT u"\U0001F3FB" # 'Sk', 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-1-2 u"\u200d" # 'Cf', 'N' -- ZERO WIDTH JOINER u"\u2764" # 'So', 'N' -- HEAVY BLACK HEART u"\uFE0F" # 'Mn', 'A' -- VARIATION SELECTOR-16 u"\u200d" # 'Cf', 'N' -- ZERO WIDTH JOINER u"\U0001F48B" # 'So', 'W' -- KISS MARK u"\u200d" # 'Cf', 'N' -- ZERO WIDTH JOINER u"\U0001F9D1" # 'So', 'W' -- ADULT u"\U0001F3FD" # 'Sk', 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-4 ) * 2 # This test adapted from https://www.unicode.org/L2/L2023/23107-terminal-suppt.pdf expect_length_each = (2, 0, 0, 1, 0, 0, 2, 0, 2, 0) * 2 expect_length_phrase = 4 # exercise, length_each = tuple(map(wcwidth.wcwidth, phrase)) length_phrase = wcwidth.wcswidth(phrase) # verify. assert length_each == expect_length_each assert length_phrase == expect_length_phrase def read_sequences_from_file(filename): fp = codecs.open(os.path.join(os.path.dirname(__file__), filename), 'r', encoding='utf-8') lines = [line.strip() for line in fp.readlines() if not line.startswith('#') and line.strip()] fp.close() sequences = [make_sequence_from_line(line) for line in lines] return lines, sequences @pytest.mark.skipif(NARROW_ONLY, reason="Some sequences in text file are not compatible with 'narrow' builds") def test_recommended_emoji_zwj_sequences(): """ Test wcswidth of all of the unicode.org-published emoji-zwj-sequences.txt """ # given, lines, sequences = read_sequences_from_file('emoji-zwj-sequences.txt') errors = [] # Exercise, track by zipping with original text file line, a debugging aide num = 0 for sequence, line in zip(sequences, lines): num += 1 measured_width = wcwidth.wcswidth(sequence) if measured_width != 2: errors.append({ 'expected_width': 2, 'line': line, 'measured_width': measured_width, 'sequence': sequence, }) # verify assert errors == [] assert num >= 1468 def test_recommended_variation_16_sequences(): """ Test wcswidth of all of the unicode.org-published emoji-variation-sequences.txt """ # given, lines, sequences = read_sequences_from_file('emoji-variation-sequences.txt') errors = [] num = 0 for sequence, line in zip(sequences, lines): num += 1 if '\ufe0f' not in sequence: # filter for only \uFE0F (VS-16) continue measured_width = wcwidth.wcswidth(sequence) if measured_width != 2: errors.append({ 'expected_width': 2, 'line': line, 'measured_width': wcwidth.wcswidth(sequence), 'sequence': sequence, }) # verify assert errors == [] assert num >= 742 def test_unicode_9_vs16(): """Verify effect of VS-16 on unicode_version 9.0 and later""" phrase = (u"\u2640" # FEMALE SIGN u"\uFE0F") # VARIATION SELECTOR-16 expect_length_each = (1, 0) expect_length_phrase = 2 # exercise, length_each = tuple(wcwidth.wcwidth(w_char, unicode_version='9.0') for w_char in phrase) length_phrase = wcwidth.wcswidth(phrase, unicode_version='9.0') # verify. assert length_each == expect_length_each assert length_phrase == expect_length_phrase def test_unicode_8_vs16(): """Verify that VS-16 has no effect on unicode_version 8.0 and earler""" phrase = (u"\u2640" # FEMALE SIGN u"\uFE0F") # VARIATION SELECTOR-16 expect_length_each = (1, 0) expect_length_phrase = 1 # exercise, length_each = tuple(wcwidth.wcwidth(w_char, unicode_version='8.0') for w_char in phrase) length_phrase = wcwidth.wcswidth(phrase, unicode_version='8.0') # verify. assert length_each == expect_length_each assert length_phrase == expect_length_phrase