gd_stats.h 2.1 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879
  1. #pragma once
  2. #include <util/generic/ymath.h>
  3. #include <util/generic/algorithm.h>
  4. #include <util/generic/yexception.h>
  5. namespace NGreedyDict {
  6. enum EEntryScore {
  7. ES_COUNT,
  8. ES_LEN_COUNT,
  9. ES_SIMPLE,
  10. ES_LEN_SIMPLE,
  11. ES_SOLAR
  12. };
  13. enum EEntryStatTest {
  14. EST_NONE = 0,
  15. EST_SIMPLE_NORM = 2
  16. };
  17. inline float ModelP(ui32 countA, ui32 countB, ui32 total) {
  18. return float(countA) * countB / total / total;
  19. }
  20. // P (ab | dependent)
  21. inline float SimpleTest(float modelp, ui32 countAB, ui32 total) {
  22. float realp = float(countAB) / total;
  23. return modelp >= realp ? 0 : (realp - modelp);
  24. }
  25. inline float SolarTest(float modelp, ui32 countAB, ui32 total) {
  26. float realp = float(countAB) / total;
  27. return modelp >= realp ? 0 : (modelp + realp * (log(realp / modelp) - 1));
  28. }
  29. // P (ab | dependent) / P (ab)
  30. inline float SimpleTestNorm(float modelp, ui32 countAB, ui32 total) {
  31. float realp = float(countAB) / total;
  32. return modelp >= realp ? 0 : (realp - modelp) / realp;
  33. }
  34. inline float StatTest(EEntryStatTest test, float modelp, ui32 countAB, ui32 total) {
  35. if (!total) {
  36. return 0;
  37. }
  38. switch (test) {
  39. case EST_NONE:
  40. return 1;
  41. case EST_SIMPLE_NORM:
  42. return SimpleTestNorm(modelp, countAB, total);
  43. }
  44. Y_ABORT("no way!");
  45. return 0;
  46. }
  47. inline float Score(EEntryScore score, ui32 len, float modelp, ui32 count, ui32 total) {
  48. if (!total) {
  49. return 0;
  50. }
  51. ui32 m = 1;
  52. switch (score) {
  53. case ES_LEN_COUNT:
  54. m = len;
  55. [[fallthrough]];
  56. case ES_COUNT:
  57. return m * count;
  58. case ES_LEN_SIMPLE:
  59. m = len;
  60. [[fallthrough]];
  61. case ES_SIMPLE:
  62. return m * SimpleTest(modelp, count, total);
  63. case ES_SOLAR:
  64. return SolarTest(modelp, count, total);
  65. }
  66. Y_ABORT("no way!");
  67. return 0;
  68. }
  69. }