SMusatov
/
sentry
зеркало из https://github.com/getsentry/sentry


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227
							from typing import Collection, Optional, Sequence

import pytest

from sentry.ratelimits.cardinality import (
    GrantedQuota,
    Quota,
    RedisCardinalityLimiter,
    RequestedQuota,
)


@pytest.fixture
def limiter():
    return RedisCardinalityLimiter()


class LimiterHelper:
    """
    Wrapper interface around the rate limiter, with specialized, stateful and
    primitive interface for more readable tests.
    """

    def __init__(self, limiter: RedisCardinalityLimiter):
        self.limiter = limiter
        self.quota = Quota(window_seconds=3600, granularity_seconds=60, limit=10)
        self.timestamp = 3600

    def add_value(self, value: int) -> Optional[int]:
        values = self.add_values([value])
        if values:
            (value,) = values
            return value

    def add_values(self, values: Sequence[int]) -> Collection[int]:
        request = RequestedQuota(prefix="hello", unit_hashes=values, quota=self.quota)
        new_timestamp, grants = self.limiter.check_within_quotas(
            [request], timestamp=self.timestamp
        )
        self.limiter.use_quotas(grants, new_timestamp)
        (grant,) = grants
        return grant.granted_unit_hashes


def test_basic(limiter: RedisCardinalityLimiter):
    helper = LimiterHelper(limiter)

    for _ in range(20):
        assert helper.add_value(1) == 1

    for _ in range(20):
        assert helper.add_value(2) == 2

    assert [helper.add_value(10 + i) for i in range(100)] == list(range(10, 18)) + [None] * 92

    helper.timestamp += 3600

    # an hour has passed, we should be able to admit 10 new keys
    #
    # note: we only virtually advanced the timestamp. The
    # `cardinality:timeseries` keys for 1, 2 still exist in this test setup
    # (and we would admit them on top of 10..20), but they won't in a
    # real-world scenario
    assert [helper.add_value(10 + i) for i in range(100)] == list(range(10, 20)) + [None] * 90


def test_multiple_prefixes(limiter: RedisCardinalityLimiter):
    """
    Test multiple prefixes/organizations and just make sure we're not leaking
    state between prefixes.

    * `a` only consumes 5 of the quota first and runs out of quota in the
      second `check_within_quotas` call
    * `b` immediately exceeds the quota.
    * `c` fits comfortably into the quota at first (fills out the limit exactly)
    """
    quota = Quota(window_seconds=3600, granularity_seconds=60, limit=10)
    requests = [
        RequestedQuota(prefix="a", unit_hashes={1, 2, 3, 4, 5}, quota=quota),
        RequestedQuota(prefix="b", unit_hashes={1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}, quota=quota),
        RequestedQuota(
            prefix="c", unit_hashes={11, 12, 13, 14, 15, 16, 17, 18, 19, 20}, quota=quota
        ),
    ]
    new_timestamp, grants = limiter.check_within_quotas(requests)

    assert grants == [
        GrantedQuota(request=requests[0], granted_unit_hashes=[1, 2, 3, 4, 5], reached_quota=None),
        GrantedQuota(
            request=requests[1],
            granted_unit_hashes=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
            reached_quota=quota,
        ),
        GrantedQuota(
            request=requests[2],
            granted_unit_hashes=[11, 12, 13, 14, 15, 16, 17, 18, 19, 20],
            reached_quota=None,
        ),
    ]
    limiter.use_quotas(grants, new_timestamp)

    requests = [
        RequestedQuota(prefix="a", unit_hashes={6, 7, 8, 9, 10, 11}, quota=quota),
        RequestedQuota(prefix="b", unit_hashes={1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}, quota=quota),
        RequestedQuota(
            prefix="c", unit_hashes={11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21}, quota=quota
        ),
    ]
    new_timestamp, grants = limiter.check_within_quotas(requests)

    assert grants == [
        GrantedQuota(
            request=requests[0], granted_unit_hashes=[6, 7, 8, 9, 10], reached_quota=quota
        ),
        GrantedQuota(
            request=requests[1],
            granted_unit_hashes=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
            reached_quota=quota,
        ),
        GrantedQuota(
            request=requests[2],
            granted_unit_hashes=[11, 12, 13, 14, 15, 16, 17, 18, 19, 20],
            reached_quota=quota,
        ),
    ]
    limiter.use_quotas(grants, new_timestamp)


def test_sliding(limiter: RedisCardinalityLimiter):
    """
    Our rate limiter has a sliding window of [now - 1 hour ; now], with a
    granularity of 1 hour.

    What that means is that, as time moves on, old hashes should be forgotten
    _one by one_, and the quota budget they occupy should become _gradually_
    available to newer, never-seen-before items.
    """

    helper = LimiterHelper(limiter)

    admissions = []

    # We start with a limit of 10 new hashes per hour. We add a new hash and
    # advance time by 6 minutes, _100 times_
    for i in range(100):
        admissions.append(helper.add_value(i))
        helper.timestamp += 360

    # We assert that _all 100 items_ are admitted/accepted. This is because we
    # have advanced time between each item. We have "slept" for 6 minutes a 100
    # times, so we actually added 100 hashes over a span of 10 hours. That
    # should totally fit into our limit.
    assert admissions == list(range(100))

    admissions = []
    expected = []

    # 100 hashes over 10 hours is "basically" 10 hashes over 1 hour. Since we
    # added items over a span of 10 hours, the limiter should've forgotten
    # about 90% of items already, meaning that in a real-world scenario, we
    # should accept 90 new hashes.
    #
    # But since we only advanced time virtually (and not in Redis for TTL
    # purposes), we actually only accept 10 items... a flaw in this test.
    #
    # Anyway, in the previous loop we added an item every 6 minutes. Now we're
    # adding an item 10 times per 6 minutes. So we should see every 10th item
    # being admitted.
    for i in range(100, 200):
        admissions.append(helper.add_value(i))
        expected.append(i if i % 10 == 0 else None)
        helper.timestamp += 36

    assert admissions == expected


def test_sampling(limiter: RedisCardinalityLimiter):
    """
    demonstrate behavior when "shard sampling" is active. If one out of 10
    shards for an organization are stored, it is still possible to limit the
    exactly correct amount of hashes, for certain hash values.
    """
    limiter.num_physical_shards = 1
    limiter.num_shards = 10
    helper = LimiterHelper(limiter)

    # when adding "hashes" 0..10 in ascending order, the first hash will fill up the physical shard
    admissions = [helper.add_value(i) for i in reversed(range(10))]
    assert admissions == list(reversed(range(10)))

    # we have stored only one shard out of 10, meaning the set count reported
    # from redis is 1, but the total counts are extrapolated correctly. like
    # without sampling, assert that the limit of 10 hashes is correctly applied
    # and we no longer accept additional hashes beyond 10.
    admissions = [helper.add_value(i) for i in range(100, 110)]
    assert admissions == [None] * 10


def test_sampling_going_bad(limiter: RedisCardinalityLimiter):
    """
    test an edgecase of set sampling in the cardinality limiter. it is not
    exactly desired behavior but a known sampling artifact
    """
    limiter.num_physical_shards = 1
    limiter.num_shards = 10
    helper = LimiterHelper(limiter)

    # when adding "hashes" 0..10 in ascending order, the first hash will fill
    # up the physical shard, and a total count of 10 is extrapolated from that
    admissions = [helper.add_value(i) for i in range(10)]
    assert admissions == [0] + [None] * 9


def test_regression_mixed_order(limiter: RedisCardinalityLimiter):
    """
    Regression test to assert we still accept hashes after dropping some
    within the same request, regardless of set order.
    """

    helper = LimiterHelper(limiter)
    # this hash certainly fits into the default limit of 10 hashes
    assert helper.add_value(5) == 5
    # here, only 10 should be limited, as it is the 11th item being fed to the indexer.
    # 5 was admitted in an earlier call, and 0..9 are admitted right before it.
    # there used to be a bug where anything after 10 (i.e. 5) was dropped as
    # well (due to a wrong `break` somewhere in a loop)
    assert helper.add_values([0, 1, 2, 3, 4, 6, 7, 8, 9, 10, 5]) == [0, 1, 2, 3, 4, 6, 7, 8, 9, 5]