yql-7237.sql 1.8 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768
  1. /* syntax version 1 */
  2. USE plato;
  3. $push_final_data = AsList(
  4. AsStruct("manufacturer" AS manufacturer, "state" AS state));
  5. INSERT INTO @push_final
  6. SELECT * FROM AS_TABLE($push_final_data);
  7. COMMIT;
  8. $manufacturer_name_fix = ($manufacturer) -> {
  9. $lowered_manufacturer = CAST(Unicode::ToLower(CAST(String::Strip($manufacturer) AS Utf8)) AS String);
  10. $in = AsList(
  11. "oysters", -- bullshit in naming
  12. "qumo", -- bullshit in naming
  13. "texet", -- bullshit in naming
  14. "alcatel", -- bullshit in naming
  15. "dexp", -- bullshit in naming
  16. "haier", -- bullshit in naming
  17. "dexp", -- bullshit in naming
  18. "asus", -- ASUSTek Computer Inc & ASUS both usable
  19. "yota", -- Yota Devices & Yota Devices Limited ...
  20. "ark" -- "ark" & "ark electronic technology" & "ark_electronic_technology"
  21. );
  22. $lambda = ($substring) -> {
  23. RETURN FIND($lowered_manufacturer, $substring) IS NULL;
  24. };
  25. $list = ListSkipWhile($in, $lambda);
  26. RETURN IF(ListHasItems($list), $list[0], $lowered_manufacturer);
  27. };
  28. $manufacturers_whitelist = (
  29. SELECT
  30. man as manufacturer
  31. FROM
  32. (
  33. SELECT
  34. man, COUNT(*) as cnt
  35. FROM
  36. @push_final
  37. GROUP BY
  38. $manufacturer_name_fix(manufacturer) as man
  39. )
  40. WHERE
  41. cnt > 1000
  42. );
  43. $push_final_preprocessing = (
  44. SELECT
  45. $manufacturer_name_fix(manufacturer) as manufacturer,
  46. state
  47. FROM
  48. @push_final
  49. );
  50. SELECT
  51. COALESCE(fixed_manufacturer, "other") as manufacturer,
  52. L.* WITHOUT L.manufacturer
  53. FROM
  54. $push_final_preprocessing as L
  55. LEFT JOIN
  56. (
  57. SELECT
  58. manufacturer as fixed_manufacturer
  59. FROM
  60. $manufacturers_whitelist
  61. ) as R
  62. ON(L.manufacturer = R.fixed_manufacturer);