stat_udf.h 2.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475
  1. #pragma once
  2. #include <yql/essentials/public/udf/udf_helpers.h>
  3. #include <yql/essentials/public/udf/udf_value_builder.h>
  4. #include <library/cpp/tdigest/tdigest.h>
  5. using namespace NYql;
  6. using namespace NUdf;
  7. namespace {
  8. extern const char DigestResourceName[] = "Stat.TDigestResource";
  9. typedef TBoxedResource<TDigest, DigestResourceName> TDigestResource;
  10. typedef TRefCountedPtr<TDigestResource> TDigestResourcePtr;
  11. SIMPLE_UDF_WITH_OPTIONAL_ARGS(TTDigest_Create, TResource<DigestResourceName>(double, TOptional<double>, TOptional<double>), 2) {
  12. Y_UNUSED(valueBuilder);
  13. const double delta = args[1].GetOrDefault<double>(0.01);
  14. const double K = args[2].GetOrDefault<double>(25.0);
  15. if (delta == 0 || K / delta < 1) {
  16. UdfTerminate((TStringBuilder() << GetPos() << " Invalid combination of delta/K values").data());
  17. }
  18. return TUnboxedValuePod(new TDigestResource(delta, K, args[0].Get<double>()));
  19. }
  20. SIMPLE_STRICT_UDF(TTDigest_AddValue, TResource<DigestResourceName>(TResource<DigestResourceName>, double)) {
  21. Y_UNUSED(valueBuilder);
  22. TDigestResource::Validate(args[0]);
  23. TDigestResource* resource = static_cast<TDigestResource*>(args[0].AsBoxed().Get());
  24. resource->Get()->AddValue(args[1].Get<double>());
  25. return TUnboxedValuePod(resource);
  26. }
  27. SIMPLE_STRICT_UDF(TTDigest_GetPercentile, double(TResource<DigestResourceName>, double)) {
  28. Y_UNUSED(valueBuilder);
  29. TDigestResource::Validate(args[0]);
  30. return TUnboxedValuePod(static_cast<TDigestResource*>(args[0].AsBoxed().Get())->Get()->GetPercentile(args[1].Get<double>()));
  31. }
  32. SIMPLE_STRICT_UDF(TTDigest_Serialize, char*(TResource<DigestResourceName>)) {
  33. TDigestResource::Validate(args[0]);
  34. return valueBuilder->NewString(static_cast<TDigestResource*>(args[0].AsBoxed().Get())->Get()->Serialize());
  35. }
  36. SIMPLE_UDF(TTDigest_Deserialize, TResource<DigestResourceName>(char*)) {
  37. Y_UNUSED(valueBuilder);
  38. return TUnboxedValuePod(new TDigestResource(TString(args[0].AsStringRef())));
  39. }
  40. SIMPLE_STRICT_UDF(TTDigest_Merge, TResource<DigestResourceName>(TResource<DigestResourceName>, TResource<DigestResourceName>)) {
  41. Y_UNUSED(valueBuilder);
  42. TDigestResource::Validate(args[0]);
  43. TDigestResource::Validate(args[1]);
  44. return TUnboxedValuePod(new TDigestResource(static_cast<TDigestResource*>(args[0].AsBoxed().Get())->Get(), static_cast<TDigestResource*>(args[1].AsBoxed().Get())->Get()));
  45. }
  46. /*
  47. *
  48. * TODO: Memory tracking
  49. *
  50. *
  51. *
  52. */
  53. SIMPLE_MODULE(TStatModule,
  54. TTDigest_Create,
  55. TTDigest_AddValue,
  56. TTDigest_GetPercentile,
  57. TTDigest_Serialize,
  58. TTDigest_Deserialize,
  59. TTDigest_Merge)
  60. }