test_cardinality.py 8.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227
  1. from typing import Collection, Optional, Sequence
  2. import pytest
  3. from sentry.ratelimits.cardinality import (
  4. GrantedQuota,
  5. Quota,
  6. RedisCardinalityLimiter,
  7. RequestedQuota,
  8. )
  9. @pytest.fixture
  10. def limiter():
  11. return RedisCardinalityLimiter()
  12. class LimiterHelper:
  13. """
  14. Wrapper interface around the rate limiter, with specialized, stateful and
  15. primitive interface for more readable tests.
  16. """
  17. def __init__(self, limiter: RedisCardinalityLimiter):
  18. self.limiter = limiter
  19. self.quota = Quota(window_seconds=3600, granularity_seconds=60, limit=10)
  20. self.timestamp = 3600
  21. def add_value(self, value: int) -> Optional[int]:
  22. values = self.add_values([value])
  23. if values:
  24. (value,) = values
  25. return value
  26. def add_values(self, values: Sequence[int]) -> Collection[int]:
  27. request = RequestedQuota(prefix="hello", unit_hashes=values, quota=self.quota)
  28. new_timestamp, grants = self.limiter.check_within_quotas(
  29. [request], timestamp=self.timestamp
  30. )
  31. self.limiter.use_quotas(grants, new_timestamp)
  32. (grant,) = grants
  33. return grant.granted_unit_hashes
  34. def test_basic(limiter: RedisCardinalityLimiter):
  35. helper = LimiterHelper(limiter)
  36. for _ in range(20):
  37. assert helper.add_value(1) == 1
  38. for _ in range(20):
  39. assert helper.add_value(2) == 2
  40. assert [helper.add_value(10 + i) for i in range(100)] == list(range(10, 18)) + [None] * 92
  41. helper.timestamp += 3600
  42. # an hour has passed, we should be able to admit 10 new keys
  43. #
  44. # note: we only virtually advanced the timestamp. The
  45. # `cardinality:timeseries` keys for 1, 2 still exist in this test setup
  46. # (and we would admit them on top of 10..20), but they won't in a
  47. # real-world scenario
  48. assert [helper.add_value(10 + i) for i in range(100)] == list(range(10, 20)) + [None] * 90
  49. def test_multiple_prefixes(limiter: RedisCardinalityLimiter):
  50. """
  51. Test multiple prefixes/organizations and just make sure we're not leaking
  52. state between prefixes.
  53. * `a` only consumes 5 of the quota first and runs out of quota in the
  54. second `check_within_quotas` call
  55. * `b` immediately exceeds the quota.
  56. * `c` fits comfortably into the quota at first (fills out the limit exactly)
  57. """
  58. quota = Quota(window_seconds=3600, granularity_seconds=60, limit=10)
  59. requests = [
  60. RequestedQuota(prefix="a", unit_hashes={1, 2, 3, 4, 5}, quota=quota),
  61. RequestedQuota(prefix="b", unit_hashes={1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}, quota=quota),
  62. RequestedQuota(
  63. prefix="c", unit_hashes={11, 12, 13, 14, 15, 16, 17, 18, 19, 20}, quota=quota
  64. ),
  65. ]
  66. new_timestamp, grants = limiter.check_within_quotas(requests)
  67. assert grants == [
  68. GrantedQuota(request=requests[0], granted_unit_hashes=[1, 2, 3, 4, 5], reached_quota=None),
  69. GrantedQuota(
  70. request=requests[1],
  71. granted_unit_hashes=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
  72. reached_quota=quota,
  73. ),
  74. GrantedQuota(
  75. request=requests[2],
  76. granted_unit_hashes=[11, 12, 13, 14, 15, 16, 17, 18, 19, 20],
  77. reached_quota=None,
  78. ),
  79. ]
  80. limiter.use_quotas(grants, new_timestamp)
  81. requests = [
  82. RequestedQuota(prefix="a", unit_hashes={6, 7, 8, 9, 10, 11}, quota=quota),
  83. RequestedQuota(prefix="b", unit_hashes={1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}, quota=quota),
  84. RequestedQuota(
  85. prefix="c", unit_hashes={11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21}, quota=quota
  86. ),
  87. ]
  88. new_timestamp, grants = limiter.check_within_quotas(requests)
  89. assert grants == [
  90. GrantedQuota(
  91. request=requests[0], granted_unit_hashes=[6, 7, 8, 9, 10], reached_quota=quota
  92. ),
  93. GrantedQuota(
  94. request=requests[1],
  95. granted_unit_hashes=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
  96. reached_quota=quota,
  97. ),
  98. GrantedQuota(
  99. request=requests[2],
  100. granted_unit_hashes=[11, 12, 13, 14, 15, 16, 17, 18, 19, 20],
  101. reached_quota=quota,
  102. ),
  103. ]
  104. limiter.use_quotas(grants, new_timestamp)
  105. def test_sliding(limiter: RedisCardinalityLimiter):
  106. """
  107. Our rate limiter has a sliding window of [now - 1 hour ; now], with a
  108. granularity of 1 hour.
  109. What that means is that, as time moves on, old hashes should be forgotten
  110. _one by one_, and the quota budget they occupy should become _gradually_
  111. available to newer, never-seen-before items.
  112. """
  113. helper = LimiterHelper(limiter)
  114. admissions = []
  115. # We start with a limit of 10 new hashes per hour. We add a new hash and
  116. # advance time by 6 minutes, _100 times_
  117. for i in range(100):
  118. admissions.append(helper.add_value(i))
  119. helper.timestamp += 360
  120. # We assert that _all 100 items_ are admitted/accepted. This is because we
  121. # have advanced time between each item. We have "slept" for 6 minutes a 100
  122. # times, so we actually added 100 hashes over a span of 10 hours. That
  123. # should totally fit into our limit.
  124. assert admissions == list(range(100))
  125. admissions = []
  126. expected = []
  127. # 100 hashes over 10 hours is "basically" 10 hashes over 1 hour. Since we
  128. # added items over a span of 10 hours, the limiter should've forgotten
  129. # about 90% of items already, meaning that in a real-world scenario, we
  130. # should accept 90 new hashes.
  131. #
  132. # But since we only advanced time virtually (and not in Redis for TTL
  133. # purposes), we actually only accept 10 items... a flaw in this test.
  134. #
  135. # Anyway, in the previous loop we added an item every 6 minutes. Now we're
  136. # adding an item 10 times per 6 minutes. So we should see every 10th item
  137. # being admitted.
  138. for i in range(100, 200):
  139. admissions.append(helper.add_value(i))
  140. expected.append(i if i % 10 == 0 else None)
  141. helper.timestamp += 36
  142. assert admissions == expected
  143. def test_sampling(limiter: RedisCardinalityLimiter):
  144. """
  145. demonstrate behavior when "shard sampling" is active. If one out of 10
  146. shards for an organization are stored, it is still possible to limit the
  147. exactly correct amount of hashes, for certain hash values.
  148. """
  149. limiter.num_physical_shards = 1
  150. limiter.num_shards = 10
  151. helper = LimiterHelper(limiter)
  152. # when adding "hashes" 0..10 in ascending order, the first hash will fill up the physical shard
  153. admissions = [helper.add_value(i) for i in reversed(range(10))]
  154. assert admissions == list(reversed(range(10)))
  155. # we have stored only one shard out of 10, meaning the set count reported
  156. # from redis is 1, but the total counts are extrapolated correctly. like
  157. # without sampling, assert that the limit of 10 hashes is correctly applied
  158. # and we no longer accept additional hashes beyond 10.
  159. admissions = [helper.add_value(i) for i in range(100, 110)]
  160. assert admissions == [None] * 10
  161. def test_sampling_going_bad(limiter: RedisCardinalityLimiter):
  162. """
  163. test an edgecase of set sampling in the cardinality limiter. it is not
  164. exactly desired behavior but a known sampling artifact
  165. """
  166. limiter.num_physical_shards = 1
  167. limiter.num_shards = 10
  168. helper = LimiterHelper(limiter)
  169. # when adding "hashes" 0..10 in ascending order, the first hash will fill
  170. # up the physical shard, and a total count of 10 is extrapolated from that
  171. admissions = [helper.add_value(i) for i in range(10)]
  172. assert admissions == [0] + [None] * 9
  173. def test_regression_mixed_order(limiter: RedisCardinalityLimiter):
  174. """
  175. Regression test to assert we still accept hashes after dropping some
  176. within the same request, regardless of set order.
  177. """
  178. helper = LimiterHelper(limiter)
  179. # this hash certainly fits into the default limit of 10 hashes
  180. assert helper.add_value(5) == 5
  181. # here, only 10 should be limited, as it is the 11th item being fed to the indexer.
  182. # 5 was admitted in an earlier call, and 0..9 are admitted right before it.
  183. # there used to be a bug where anything after 10 (i.e. 5) was dropped as
  184. # well (due to a wrong `break` somewhere in a loop)
  185. assert helper.add_values([0, 1, 2, 3, 4, 6, 7, 8, 9, 10, 5]) == [0, 1, 2, 3, 4, 6, 7, 8, 9, 5]