test_cloudspanner.py 8.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253
  1. import random
  2. import string
  3. from datetime import datetime
  4. from unittest.mock import patch
  5. import pytest
  6. from google.cloud import spanner
  7. from sentry.sentry_metrics.configuration import UseCaseKey
  8. from sentry.sentry_metrics.indexer.base import KeyResult, KeyResults
  9. from sentry.sentry_metrics.indexer.cloudspanner.cloudspanner import (
  10. CloudSpannerIndexer,
  11. IdCodec,
  12. RawCloudSpannerIndexer,
  13. SpannerIndexerModel,
  14. )
  15. from sentry.sentry_metrics.indexer.id_generator import get_id
  16. @pytest.fixture(scope="module")
  17. def testing_indexer():
  18. indexer = RawCloudSpannerIndexer(instance_id="", database_id="")
  19. indexer.validate()
  20. return indexer
  21. @pytest.mark.parametrize(
  22. "value",
  23. (
  24. 12345,
  25. 0, # smallest supported id
  26. 2**63 - 1, # largest supported id
  27. get_id(), # randomly generated id
  28. ),
  29. )
  30. def test_id_codec(value) -> None:
  31. codec = IdCodec()
  32. encoded = codec.encode(value)
  33. # Ensure it is in allowed range
  34. assert encoded >= -9223372036854775808
  35. assert encoded <= 9223372036854775807
  36. assert value == codec.decode(encoded)
  37. @pytest.mark.skip(reason="TODO: Implement it correctly")
  38. def test_spanner_indexer_service():
  39. # TODO: Provide instance_id and database_id when running the test
  40. span_indexer = CloudSpannerIndexer(instance_id="", database_id="")
  41. span_indexer.validate()
  42. def get_random_string(length: int) -> str:
  43. return "".join(random.choice(string.ascii_letters) for _ in range(length))
  44. @pytest.mark.skip(reason="TODO: Implement it correctly")
  45. def test_spanner_indexer_implementation_basic(testing_indexer):
  46. """
  47. Test the basic implementation of the CloudSpannerIndexer by performing a
  48. bulk record operation and then perform a resolve and reverse resolve
  49. operation and validate the record.
  50. """
  51. codec = IdCodec()
  52. record = {"org_id": 55555, "string": get_random_string(10)}
  53. testing_indexer.record(
  54. use_case_id=UseCaseKey.PERFORMANCE, org_id=record["org_id"], string=record["string"]
  55. )
  56. with testing_indexer.database.snapshot() as snapshot:
  57. result = snapshot.read(
  58. testing_indexer._get_table_name(UseCaseKey.PERFORMANCE),
  59. columns=["id"],
  60. keyset=spanner.KeySet(keys=[[record["org_id"], record["string"]]]),
  61. index=testing_indexer._get_unique_org_string_index_name(UseCaseKey.PERFORMANCE),
  62. )
  63. all_results = list(result)
  64. encoded_id = all_results[0][0]
  65. decoded_id = codec.decode(all_results[0][0])
  66. assert len(all_results) == 1
  67. indexer_resolved_id = testing_indexer.resolve(
  68. use_case_id=UseCaseKey.PERFORMANCE, org_id=record["org_id"], string=record["string"]
  69. )
  70. assert indexer_resolved_id is not None
  71. assert indexer_resolved_id == decoded_id
  72. indexer_reverse_resolved_string = testing_indexer.reverse_resolve(
  73. use_case_id=UseCaseKey.PERFORMANCE, id=encoded_id
  74. )
  75. assert indexer_reverse_resolved_string is not None
  76. assert indexer_reverse_resolved_string == record["string"]
  77. @pytest.mark.skip(reason="TODO: Implement it correctly")
  78. def test_spanner_indexer_implementation_bulk_insert_twice_gives_same_result(testing_indexer):
  79. """
  80. When performing a record operation twice (in separate transactions),
  81. the result returned should be the same since the record is fetched from
  82. the database.
  83. """
  84. record = {"org_id": 55555, "string": get_random_string(10)}
  85. record1_int = testing_indexer.record(
  86. use_case_id=UseCaseKey.PERFORMANCE, org_id=record["org_id"], string=record["string"]
  87. )
  88. # Insert the record again to validate that the returned id is the one we
  89. # got from the first insert.
  90. record2_int = testing_indexer.record(
  91. use_case_id=UseCaseKey.PERFORMANCE, org_id=record["org_id"], string=record["string"]
  92. )
  93. assert record1_int == record2_int
  94. @patch(
  95. "sentry.sentry_metrics.indexer.cloudspanner.cloudspanner.RawCloudSpannerIndexer._insert_collisions_handled"
  96. )
  97. @pytest.mark.skip(reason="TODO: Implement it correctly")
  98. def test_spanner_indexer_insert_batch_no_conflict_does_not_trigger_individual_inserts(
  99. mock, testing_indexer
  100. ):
  101. """
  102. Test that when a record already exists in the database, the individual insert
  103. api is called.
  104. """
  105. codec = IdCodec()
  106. model1_id = get_id()
  107. key_results1 = KeyResults()
  108. model1 = SpannerIndexerModel(
  109. id=codec.encode(model1_id),
  110. decoded_id=model1_id,
  111. string=get_random_string(10),
  112. organization_id=55555,
  113. date_added=datetime.now(),
  114. last_seen=datetime.now(),
  115. retention_days=55,
  116. )
  117. testing_indexer._insert_db_records(UseCaseKey.PERFORMANCE, [model1], key_results1)
  118. # Insert the same record with a different id but the key result would
  119. # have the id of model1.
  120. key_results2 = KeyResults()
  121. model2_id = get_id()
  122. model2 = SpannerIndexerModel(
  123. id=codec.encode(model2_id),
  124. decoded_id=model2_id,
  125. string=get_random_string(10),
  126. organization_id=55556,
  127. date_added=datetime.now(),
  128. last_seen=datetime.now(),
  129. retention_days=55,
  130. )
  131. testing_indexer._insert_db_records(UseCaseKey.PERFORMANCE, [model2], key_results2)
  132. assert mock.call_count == 0, "Insert with collisions should not be called"
  133. @patch(
  134. "sentry.sentry_metrics.indexer.cloudspanner.cloudspanner.RawCloudSpannerIndexer._insert_collisions_handled"
  135. )
  136. @pytest.mark.skip(reason="TODO: Implement it correctly")
  137. def test_spanner_indexer_insert_batch_conflict_triggers_individual_transactions(
  138. mock, testing_indexer
  139. ):
  140. """
  141. Test that when a record already exists in the database, the individual insert
  142. api is called.
  143. """
  144. codec = IdCodec()
  145. indexed_string = get_random_string(10)
  146. model1_id = get_id()
  147. key_results1 = KeyResults()
  148. model1 = SpannerIndexerModel(
  149. id=codec.encode(model1_id),
  150. decoded_id=model1_id,
  151. string=indexed_string,
  152. organization_id=55555,
  153. date_added=datetime.now(),
  154. last_seen=datetime.now(),
  155. retention_days=55,
  156. )
  157. testing_indexer._insert_db_records(UseCaseKey.PERFORMANCE, [model1], key_results1)
  158. # Insert the same record with a different id but the key result would
  159. # have the id of model1.
  160. key_results2 = KeyResults()
  161. model2_id = get_id()
  162. model2 = SpannerIndexerModel(
  163. id=codec.encode(model2_id),
  164. decoded_id=model2_id,
  165. string=indexed_string,
  166. organization_id=55555,
  167. date_added=datetime.now(),
  168. last_seen=datetime.now(),
  169. retention_days=55,
  170. )
  171. testing_indexer._insert_db_records(UseCaseKey.PERFORMANCE, [model2], key_results2)
  172. assert mock.call_count == 1, "Insert with collisions should be called"
  173. @pytest.mark.skip(reason="TODO: Implement it correctly")
  174. def test_spanner_indexer_individual_insert(testing_indexer):
  175. """
  176. Test that when a record already exists in the database, trying to insert
  177. a record with the same org_id and string (but different id) will return the
  178. existing id from the database and not the id of the duplicate record
  179. which is being inserted.
  180. """
  181. codec = IdCodec()
  182. indexed_string = get_random_string(10)
  183. model1_id = get_id()
  184. expected_key_result = KeyResults()
  185. expected_key_result.add_key_result(KeyResult(org_id=55555, string=indexed_string, id=model1_id))
  186. key_results1 = KeyResults()
  187. model1 = SpannerIndexerModel(
  188. id=codec.encode(model1_id),
  189. decoded_id=model1_id,
  190. string=indexed_string,
  191. organization_id=55555,
  192. date_added=datetime.now(),
  193. last_seen=datetime.now(),
  194. retention_days=55,
  195. )
  196. testing_indexer._insert_collisions_handled(UseCaseKey.PERFORMANCE, [model1], key_results1)
  197. assert (
  198. key_results1.get_mapped_key_strings_to_ints()
  199. == expected_key_result.get_mapped_key_strings_to_ints()
  200. )
  201. # Insert the same record with a different id but the key result would
  202. # have the id of model1.
  203. key_results2 = KeyResults()
  204. model2_id = get_id()
  205. model2 = SpannerIndexerModel(
  206. id=codec.encode(model2_id),
  207. decoded_id=model2_id,
  208. string=indexed_string,
  209. organization_id=55555,
  210. date_added=datetime.now(),
  211. last_seen=datetime.now(),
  212. retention_days=55,
  213. )
  214. testing_indexer._insert_collisions_handled(UseCaseKey.PERFORMANCE, [model2], key_results2)
  215. assert (
  216. key_results2.get_mapped_key_strings_to_ints()
  217. == expected_key_result.get_mapped_key_strings_to_ints()
  218. )