SMusatov
/
sentry
mirror of https://github.com/getsentry/sentry


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253
							import random
import string
from datetime import datetime
from unittest.mock import patch

import pytest
from google.cloud import spanner

from sentry.sentry_metrics.configuration import UseCaseKey
from sentry.sentry_metrics.indexer.base import KeyResult, KeyResults
from sentry.sentry_metrics.indexer.cloudspanner.cloudspanner import (
    CloudSpannerIndexer,
    IdCodec,
    RawCloudSpannerIndexer,
    SpannerIndexerModel,
)
from sentry.sentry_metrics.indexer.id_generator import get_id


@pytest.fixture(scope="module")
def testing_indexer():
    indexer = RawCloudSpannerIndexer(instance_id="", database_id="")

    indexer.validate()
    return indexer


@pytest.mark.parametrize(
    "value",
    (
        12345,
        0,  # smallest supported id
        2**63 - 1,  # largest supported id
        get_id(),  # randomly generated id
    ),
)
def test_id_codec(value) -> None:
    codec = IdCodec()
    encoded = codec.encode(value)
    # Ensure it is in allowed range
    assert encoded >= -9223372036854775808
    assert encoded <= 9223372036854775807

    assert value == codec.decode(encoded)


@pytest.mark.skip(reason="TODO: Implement it correctly")
def test_spanner_indexer_service():
    # TODO: Provide instance_id and database_id when running the test
    span_indexer = CloudSpannerIndexer(instance_id="", database_id="")
    span_indexer.validate()


def get_random_string(length: int) -> str:
    return "".join(random.choice(string.ascii_letters) for _ in range(length))


@pytest.mark.skip(reason="TODO: Implement it correctly")
def test_spanner_indexer_implementation_basic(testing_indexer):
    """
    Test the basic implementation of the CloudSpannerIndexer by performing a
    bulk record operation and then perform a resolve and reverse resolve
    operation and validate the record.
    """
    codec = IdCodec()

    record = {"org_id": 55555, "string": get_random_string(10)}
    testing_indexer.record(
        use_case_id=UseCaseKey.PERFORMANCE, org_id=record["org_id"], string=record["string"]
    )

    with testing_indexer.database.snapshot() as snapshot:
        result = snapshot.read(
            testing_indexer._get_table_name(UseCaseKey.PERFORMANCE),
            columns=["id"],
            keyset=spanner.KeySet(keys=[[record["org_id"], record["string"]]]),
            index=testing_indexer._get_unique_org_string_index_name(UseCaseKey.PERFORMANCE),
        )

    all_results = list(result)
    encoded_id = all_results[0][0]
    decoded_id = codec.decode(all_results[0][0])
    assert len(all_results) == 1

    indexer_resolved_id = testing_indexer.resolve(
        use_case_id=UseCaseKey.PERFORMANCE, org_id=record["org_id"], string=record["string"]
    )
    assert indexer_resolved_id is not None
    assert indexer_resolved_id == decoded_id

    indexer_reverse_resolved_string = testing_indexer.reverse_resolve(
        use_case_id=UseCaseKey.PERFORMANCE, id=encoded_id
    )
    assert indexer_reverse_resolved_string is not None
    assert indexer_reverse_resolved_string == record["string"]


@pytest.mark.skip(reason="TODO: Implement it correctly")
def test_spanner_indexer_implementation_bulk_insert_twice_gives_same_result(testing_indexer):
    """
    When performing a record operation twice (in separate transactions),
    the result returned should be the same since the record is fetched from
    the database.
    """
    record = {"org_id": 55555, "string": get_random_string(10)}
    record1_int = testing_indexer.record(
        use_case_id=UseCaseKey.PERFORMANCE, org_id=record["org_id"], string=record["string"]
    )

    # Insert the record again to validate that the returned id is the one we
    # got from the first insert.
    record2_int = testing_indexer.record(
        use_case_id=UseCaseKey.PERFORMANCE, org_id=record["org_id"], string=record["string"]
    )

    assert record1_int == record2_int


@patch(
    "sentry.sentry_metrics.indexer.cloudspanner.cloudspanner.RawCloudSpannerIndexer._insert_collisions_handled"
)
@pytest.mark.skip(reason="TODO: Implement it correctly")
def test_spanner_indexer_insert_batch_no_conflict_does_not_trigger_individual_inserts(
    mock, testing_indexer
):
    """
    Test that when a record already exists in the database, the individual insert
    api is called.
    """
    codec = IdCodec()

    model1_id = get_id()
    key_results1 = KeyResults()
    model1 = SpannerIndexerModel(
        id=codec.encode(model1_id),
        decoded_id=model1_id,
        string=get_random_string(10),
        organization_id=55555,
        date_added=datetime.now(),
        last_seen=datetime.now(),
        retention_days=55,
    )
    testing_indexer._insert_db_records(UseCaseKey.PERFORMANCE, [model1], key_results1)

    # Insert the same record with a different id but the key result would
    # have the id of model1.
    key_results2 = KeyResults()
    model2_id = get_id()
    model2 = SpannerIndexerModel(
        id=codec.encode(model2_id),
        decoded_id=model2_id,
        string=get_random_string(10),
        organization_id=55556,
        date_added=datetime.now(),
        last_seen=datetime.now(),
        retention_days=55,
    )
    testing_indexer._insert_db_records(UseCaseKey.PERFORMANCE, [model2], key_results2)
    assert mock.call_count == 0, "Insert with collisions should not be called"


@patch(
    "sentry.sentry_metrics.indexer.cloudspanner.cloudspanner.RawCloudSpannerIndexer._insert_collisions_handled"
)
@pytest.mark.skip(reason="TODO: Implement it correctly")
def test_spanner_indexer_insert_batch_conflict_triggers_individual_transactions(
    mock, testing_indexer
):
    """
    Test that when a record already exists in the database, the individual insert
    api is called.
    """
    codec = IdCodec()
    indexed_string = get_random_string(10)

    model1_id = get_id()
    key_results1 = KeyResults()
    model1 = SpannerIndexerModel(
        id=codec.encode(model1_id),
        decoded_id=model1_id,
        string=indexed_string,
        organization_id=55555,
        date_added=datetime.now(),
        last_seen=datetime.now(),
        retention_days=55,
    )
    testing_indexer._insert_db_records(UseCaseKey.PERFORMANCE, [model1], key_results1)

    # Insert the same record with a different id but the key result would
    # have the id of model1.
    key_results2 = KeyResults()
    model2_id = get_id()
    model2 = SpannerIndexerModel(
        id=codec.encode(model2_id),
        decoded_id=model2_id,
        string=indexed_string,
        organization_id=55555,
        date_added=datetime.now(),
        last_seen=datetime.now(),
        retention_days=55,
    )
    testing_indexer._insert_db_records(UseCaseKey.PERFORMANCE, [model2], key_results2)
    assert mock.call_count == 1, "Insert with collisions should be called"


@pytest.mark.skip(reason="TODO: Implement it correctly")
def test_spanner_indexer_individual_insert(testing_indexer):
    """
    Test that when a record already exists in the database, trying to insert
    a record with the same org_id and string (but different id) will return the
    existing id from the database and not the id of the duplicate record
    which is being inserted.
    """
    codec = IdCodec()
    indexed_string = get_random_string(10)

    model1_id = get_id()
    expected_key_result = KeyResults()
    expected_key_result.add_key_result(KeyResult(org_id=55555, string=indexed_string, id=model1_id))
    key_results1 = KeyResults()
    model1 = SpannerIndexerModel(
        id=codec.encode(model1_id),
        decoded_id=model1_id,
        string=indexed_string,
        organization_id=55555,
        date_added=datetime.now(),
        last_seen=datetime.now(),
        retention_days=55,
    )
    testing_indexer._insert_collisions_handled(UseCaseKey.PERFORMANCE, [model1], key_results1)
    assert (
        key_results1.get_mapped_key_strings_to_ints()
        == expected_key_result.get_mapped_key_strings_to_ints()
    )

    # Insert the same record with a different id but the key result would
    # have the id of model1.
    key_results2 = KeyResults()
    model2_id = get_id()
    model2 = SpannerIndexerModel(
        id=codec.encode(model2_id),
        decoded_id=model2_id,
        string=indexed_string,
        organization_id=55555,
        date_added=datetime.now(),
        last_seen=datetime.now(),
        retention_days=55,
    )
    testing_indexer._insert_collisions_handled(UseCaseKey.PERFORMANCE, [model2], key_results2)
    assert (
        key_results2.get_mapped_key_strings_to_ints()
        == expected_key_result.get_mapped_key_strings_to_ints()
    )