SMusatov
/
ydb
mirror of https://github.com/ydb-platform/ydb.git


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243
							# std imports
import os
import codecs

# 3rd party
import pytest

try:
    # python 2
    _ = unichr
except NameError:
    # python 3
    unichr = chr

# some tests cannot be done on some builds of python, where the internal
# unicode structure is limited to 0x10000 for memory conservation,
# "ValueError: unichr() arg not in range(0x10000) (narrow Python build)"
try:
    unichr(0x2fffe)
    NARROW_ONLY = False
except ValueError:
    NARROW_ONLY = True

# local
import wcwidth


def make_sequence_from_line(line):
    # convert '002A FE0F  ; ..' -> (0x2a, 0xfe0f) -> chr(0x2a) + chr(0xfe0f)
    return ''.join(unichr(int(cp, 16)) for cp in line.split(';', 1)[0].strip().split())


@pytest.mark.skipif(NARROW_ONLY, reason="Test cannot verify on python 'narrow' builds")
def emoji_zwj_sequence():
    u"""
    Emoji zwj sequence of four codepoints is just 2 cells.
    """
    phrase = (u"\U0001f469"   # Base, Category So, East Asian Width property 'W' -- WOMAN
              u"\U0001f3fb"   # Modifier, Category Sk, East Asian Width property 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-1-2
              u"\u200d"       # Joiner, Category Cf, East Asian Width property 'N'  -- ZERO WIDTH JOINER
              u"\U0001f4bb")  # Fused, Category So, East Asian Width peroperty 'W' -- PERSONAL COMPUTER
    # This test adapted from https://www.unicode.org/L2/L2023/23107-terminal-suppt.pdf
    expect_length_each = (2, 0, 0, 2)
    expect_length_phrase = 2

    # exercise,
    length_each = tuple(map(wcwidth.wcwidth, phrase))
    length_phrase = wcwidth.wcswidth(phrase)

    # verify.
    assert length_each == expect_length_each
    assert length_phrase == expect_length_phrase


@pytest.mark.skipif(NARROW_ONLY, reason="Test cannot verify on python 'narrow' builds")
def test_unfinished_zwj_sequence():
    u"""
    Ensure index-out-of-bounds does not occur for zero-width joiner without any following character
    """
    phrase = (u"\U0001f469"   # Base, Category So, East Asian Width property 'W' -- WOMAN
              u"\U0001f3fb"   # Modifier, Category Sk, East Asian Width property 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-1-2
              u"\u200d")      # Joiner, Category Cf, East Asian Width property 'N'  -- ZERO WIDTH JOINER
    expect_length_each = (2, 0, 0)
    expect_length_phrase = 2

    # exercise,
    length_each = tuple(map(wcwidth.wcwidth, phrase))
    length_phrase = wcwidth.wcswidth(phrase)

    # verify.
    assert length_each == expect_length_each
    assert length_phrase == expect_length_phrase


@pytest.mark.skipif(NARROW_ONLY, reason="Test cannot verify on python 'narrow' builds")
def test_non_recommended_zwj_sequence():
    """
    Verify ZWJ is measured as though successful with characters that cannot be joined, wcwidth does not verify
    """
    phrase = (u"\U0001f469"   # Base, Category So, East Asian Width property 'W' -- WOMAN
              u"\U0001f3fb"   # Modifier, Category Sk, East Asian Width property 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-1-2
              u"\u200d")      # Joiner, Category Cf, East Asian Width property 'N'  -- ZERO WIDTH JOINER
    expect_length_each = (2, 0, 0)
    expect_length_phrase = 2

    # exercise,
    length_each = tuple(map(wcwidth.wcwidth, phrase))
    length_phrase = wcwidth.wcswidth(phrase)

    # verify.
    assert length_each == expect_length_each
    assert length_phrase == expect_length_phrase


@pytest.mark.skipif(NARROW_ONLY, reason="Test cannot verify on python 'narrow' builds")
def test_another_emoji_zwj_sequence():
    phrase = (
        u"\u26F9"        # PERSON WITH BALL
        u"\U0001F3FB"    # EMOJI MODIFIER FITZPATRICK TYPE-1-2
        u"\u200D"        # ZERO WIDTH JOINER
        u"\u2640"        # FEMALE SIGN
        u"\uFE0F")       # VARIATION SELECTOR-16
    expect_length_each = (1, 0, 0, 1, 0)
    expect_length_phrase = 2

    # exercise,
    length_each = tuple(map(wcwidth.wcwidth, phrase))
    length_phrase = wcwidth.wcswidth(phrase)

    # verify.
    assert length_each == expect_length_each
    assert length_phrase == expect_length_phrase


@pytest.mark.skipif(NARROW_ONLY, reason="Test cannot verify on python 'narrow' builds")
def test_longer_emoji_zwj_sequence():
    """
    A much longer emoji ZWJ sequence of 10 total codepoints is just 2 cells!

    Also test the same sequence in duplicate, verifying multiple VS-16 sequences
    in a single function call.
    """
    # 'Category Code', 'East Asian Width property' -- 'description'
    phrase = (u"\U0001F9D1"   # 'So', 'W' -- ADULT
              u"\U0001F3FB"   # 'Sk', 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-1-2
              u"\u200d"       # 'Cf', 'N' -- ZERO WIDTH JOINER
              u"\u2764"       # 'So', 'N' -- HEAVY BLACK HEART
              u"\uFE0F"       # 'Mn', 'A' -- VARIATION SELECTOR-16
              u"\u200d"       # 'Cf', 'N' -- ZERO WIDTH JOINER
              u"\U0001F48B"   # 'So', 'W' -- KISS MARK
              u"\u200d"       # 'Cf', 'N' -- ZERO WIDTH JOINER
              u"\U0001F9D1"   # 'So', 'W' -- ADULT
              u"\U0001F3FD"   # 'Sk', 'W' -- EMOJI MODIFIER FITZPATRICK TYPE-4
    ) * 2
    # This test adapted from https://www.unicode.org/L2/L2023/23107-terminal-suppt.pdf
    expect_length_each = (2, 0, 0, 1, 0, 0, 2, 0, 2, 0) * 2
    expect_length_phrase = 4

    # exercise,
    length_each = tuple(map(wcwidth.wcwidth, phrase))
    length_phrase = wcwidth.wcswidth(phrase)

    # verify.
    assert length_each == expect_length_each
    assert length_phrase == expect_length_phrase


def read_sequences_from_file(filename):
    fp = codecs.open(os.path.join(os.path.dirname(__file__), filename), 'r', encoding='utf-8')
    lines = [line.strip()
                for line in fp.readlines()
                if not line.startswith('#') and line.strip()]
    fp.close()
    sequences = [make_sequence_from_line(line) for line in lines]
    return lines, sequences


@pytest.mark.skipif(NARROW_ONLY, reason="Some sequences in text file are not compatible with 'narrow' builds")
def test_recommended_emoji_zwj_sequences():
    """
    Test wcswidth of all of the unicode.org-published emoji-zwj-sequences.txt
    """
    # given,
    lines, sequences = read_sequences_from_file('emoji-zwj-sequences.txt')

    errors = []
    # Exercise, track by zipping with original text file line, a debugging aide
    num = 0
    for sequence, line in zip(sequences, lines):
        num += 1
        measured_width = wcwidth.wcswidth(sequence)
        if measured_width != 2:
            errors.append({
                'expected_width': 2,
                'line': line,
                'measured_width': measured_width,
                'sequence': sequence,
            })

    # verify
    assert errors == []
    assert num >= 1468


def test_recommended_variation_16_sequences():
    """
    Test wcswidth of all of the unicode.org-published emoji-variation-sequences.txt
    """
    # given,
    lines, sequences = read_sequences_from_file('emoji-variation-sequences.txt')

    errors = []
    num = 0
    for sequence, line in zip(sequences, lines):
        num += 1
        if '\ufe0f' not in sequence:
            # filter for only \uFE0F (VS-16)
            continue
        measured_width = wcwidth.wcswidth(sequence)
        if measured_width != 2:
            errors.append({
                'expected_width': 2,
                'line': line,
                'measured_width': wcwidth.wcswidth(sequence),
                'sequence': sequence,
            })

    # verify
    assert errors == []
    assert num >= 742


def test_unicode_9_vs16():
    """Verify effect of VS-16 on unicode_version 9.0 and later"""
    phrase = (u"\u2640"        # FEMALE SIGN
              u"\uFE0F")       # VARIATION SELECTOR-16

    expect_length_each = (1, 0)
    expect_length_phrase = 2

    # exercise,
    length_each = tuple(wcwidth.wcwidth(w_char, unicode_version='9.0') for w_char in phrase)
    length_phrase = wcwidth.wcswidth(phrase, unicode_version='9.0')

    # verify.
    assert length_each == expect_length_each
    assert length_phrase == expect_length_phrase

def test_unicode_8_vs16():
    """Verify that VS-16 has no effect on unicode_version 8.0 and earler"""
    phrase = (u"\u2640"        # FEMALE SIGN
              u"\uFE0F")       # VARIATION SELECTOR-16

    expect_length_each = (1, 0)
    expect_length_phrase = 1

    # exercise,
    length_each = tuple(wcwidth.wcwidth(w_char, unicode_version='8.0') for w_char in phrase)
    length_phrase = wcwidth.wcswidth(phrase, unicode_version='8.0')

    # verify.
    assert length_each == expect_length_each
    assert length_phrase == expect_length_phrase