ip_base_udf.h 13 KB


  1. #pragma once
  2. #include <yql/essentials/public/udf/udf_helpers.h>
  3. #include <library/cpp/ipv6_address/ipv6_address.h>
  4. #include <library/cpp/ipmath/ipmath.h>
  5. #include <util/generic/buffer.h>
  6. namespace {
  7. using TAutoMapString = NKikimr::NUdf::TAutoMap<char*>;
  8. using TOptionalString = NKikimr::NUdf::TOptional<char*>;
  9. using TOptionalByte = NKikimr::NUdf::TOptional<ui8>;
  10. using TStringRef = NKikimr::NUdf::TStringRef;
  11. using TUnboxedValue = NKikimr::NUdf::TUnboxedValue;
  12. using TUnboxedValuePod = NKikimr::NUdf::TUnboxedValuePod;
  13. ui8 GetAddressRangePrefix(const TIpAddressRange& range) {
  14. if (range.Contains(TIpv6Address(ui128(0), TIpv6Address::Ipv6)) && range.Contains(TIpv6Address(ui128(-1), TIpv6Address::Ipv6))) {
  15. return 0;
  16. }
  17. if (range.Size() == 0) {
  18. return range.Type() == TIpv6Address::Ipv4 ? 32 : 128;
  19. }
  20. ui128 size = range.Size();
  21. size_t sizeLog = MostSignificantBit(size);
  22. return ui8((range.Type() == TIpv6Address::Ipv4 ? 32 : 128) - sizeLog);
  23. }
  24. struct TRawIp4 {
  25. ui8 a, b, c, d;
  26. static TRawIp4 FromIpAddress(const TIpv6Address& addr) {
  27. ui128 x = addr;
  28. return {
  29. ui8(x >> 24 & 0xff),
  30. ui8(x >> 16 & 0xff),
  31. ui8(x >> 8 & 0xff),
  32. ui8(x & 0xff)
  33. };
  34. }
  35. static TRawIp4 MaskFromPrefix(ui8 prefix) {
  36. ui128 x = ui128(-1) << int(32 - prefix);
  37. x &= ui128(ui32(-1));
  38. return FromIpAddress({x, TIpv6Address::Ipv4});
  39. }
  40. TIpv6Address ToIpAddress() const {
  41. return {a, b, c, d};
  42. }
  43. std::pair<TRawIp4, TRawIp4> ApplyMask(const TRawIp4& mask) const {
  44. return {{
  45. ui8(a & mask.a),
  46. ui8(b & mask.b),
  47. ui8(c & mask.c),
  48. ui8(d & mask.d)
  49. },{
  50. ui8(a | ~mask.a),
  51. ui8(b | ~mask.b),
  52. ui8(c | ~mask.c),
  53. ui8(d | ~mask.d)
  54. }};
  55. }
  56. };
  57. struct TRawIp4Subnet {
  58. TRawIp4 base, mask;
  59. static TRawIp4Subnet FromIpRange(const TIpAddressRange& range) {
  60. return {TRawIp4::FromIpAddress(*range.Begin()), TRawIp4::MaskFromPrefix(GetAddressRangePrefix(range))};
  61. }
  62. TIpAddressRange ToIpRange() const {
  63. auto range = base.ApplyMask(mask);
  64. return {range.first.ToIpAddress(), range.second.ToIpAddress()};
  65. }
  66. };
  67. struct TRawIp6 {
  68. ui8 a1, a0, b1, b0, c1, c0, d1, d0, e1, e0, f1, f0, g1, g0, h1, h0;
  69. static TRawIp6 FromIpAddress(const TIpv6Address& addr) {
  70. ui128 x = addr;
  71. return {
  72. ui8(x >> 120 & 0xff), ui8(x >> 112 & 0xff),
  73. ui8(x >> 104 & 0xff), ui8(x >> 96 & 0xff),
  74. ui8(x >> 88 & 0xff), ui8(x >> 80 & 0xff),
  75. ui8(x >> 72 & 0xff), ui8(x >> 64 & 0xff),
  76. ui8(x >> 56 & 0xff), ui8(x >> 48 & 0xff),
  77. ui8(x >> 40 & 0xff), ui8(x >> 32 & 0xff),
  78. ui8(x >> 24 & 0xff), ui8(x >> 16 & 0xff),
  79. ui8(x >> 8 & 0xff), ui8(x & 0xff)
  80. };
  81. }
  82. static TRawIp6 MaskFromPrefix(ui8 prefix) {
  83. ui128 x = prefix == 0 ? ui128(0) : ui128(-1) << int(128 - prefix);
  84. return FromIpAddress({x, TIpv6Address::Ipv6});
  85. }
  86. TIpv6Address ToIpAddress() const {
  87. return {ui16(ui32(a1) << ui32(8) | ui32(a0)),
  88. ui16(ui32(b1) << ui32(8) | ui32(b0)),
  89. ui16(ui32(c1) << ui32(8) | ui32(c0)),
  90. ui16(ui32(d1) << ui32(8) | ui32(d0)),
  91. ui16(ui32(e1) << ui32(8) | ui32(e0)),
  92. ui16(ui32(f1) << ui32(8) | ui32(f0)),
  93. ui16(ui32(g1) << ui32(8) | ui32(g0)),
  94. ui16(ui32(h1) << ui32(8) | ui32(h0)),
  95. };
  96. }
  97. std::pair<TRawIp6, TRawIp6> ApplyMask(const TRawIp6& mask) const {
  98. return { {
  99. ui8(a1 & mask.a1),
  100. ui8(a0 & mask.a0),
  101. ui8(b1 & mask.b1),
  102. ui8(b0 & mask.b0),
  103. ui8(c1 & mask.c1),
  104. ui8(c0 & mask.c0),
  105. ui8(d1 & mask.d1),
  106. ui8(d0 & mask.d0),
  107. ui8(e1 & mask.e1),
  108. ui8(e0 & mask.e0),
  109. ui8(f1 & mask.f1),
  110. ui8(f0 & mask.f0),
  111. ui8(g1 & mask.g1),
  112. ui8(g0 & mask.g0),
  113. ui8(h1 & mask.h1),
  114. ui8(h0 & mask.h0)
  115. }, {
  116. ui8(a1 | ~mask.a1),
  117. ui8(a0 | ~mask.a0),
  118. ui8(b1 | ~mask.b1),
  119. ui8(b0 | ~mask.b0),
  120. ui8(c1 | ~mask.c1),
  121. ui8(c0 | ~mask.c0),
  122. ui8(d1 | ~mask.d1),
  123. ui8(d0 | ~mask.d0),
  124. ui8(e1 | ~mask.e1),
  125. ui8(e0 | ~mask.e0),
  126. ui8(f1 | ~mask.f1),
  127. ui8(f0 | ~mask.f0),
  128. ui8(g1 | ~mask.g1),
  129. ui8(g0 | ~mask.g0),
  130. ui8(h1 | ~mask.h1),
  131. ui8(h0 | ~mask.h0)
  132. }};
  133. }
  134. };
  135. struct TRawIp6Subnet {
  136. TRawIp6 base, mask;
  137. static TRawIp6Subnet FromIpRange(const TIpAddressRange& range) {
  138. return {TRawIp6::FromIpAddress(*range.Begin()), TRawIp6::MaskFromPrefix(GetAddressRangePrefix(range))};
  139. }
  140. TIpAddressRange ToIpRange() const {
  141. auto range = base.ApplyMask(mask);
  142. return {range.first.ToIpAddress(), range.second.ToIpAddress()};
  143. }
  144. };
  145. TIpv6Address DeserializeAddress(const TStringRef& str) {
  146. TIpv6Address addr;
  147. if (str.Size() == 4) {
  148. TRawIp4 addr4;
  149. memcpy(&addr4, str.Data(), sizeof addr4);
  150. addr = addr4.ToIpAddress();
  151. } else if (str.Size() == 16) {
  152. TRawIp6 addr6;
  153. memcpy(&addr6, str.Data(), sizeof addr6);
  154. addr = addr6.ToIpAddress();
  155. } else {
  156. ythrow yexception() << "Incorrect size of input, expected "
  157. << "4 or 16, got " << str.Size();
  158. }
  159. return addr;
  160. }
  161. TIpAddressRange DeserializeSubnet(const TStringRef& str) {
  162. TIpAddressRange range;
  163. if (str.Size() == sizeof(TRawIp4Subnet)) {
  164. TRawIp4Subnet subnet4;
  165. memcpy(&subnet4, str.Data(), sizeof subnet4);
  166. range = subnet4.ToIpRange();
  167. } else if (str.Size() == sizeof(TRawIp6Subnet)) {
  168. TRawIp6Subnet subnet6;
  169. memcpy(&subnet6, str.Data(), sizeof subnet6);
  170. range = subnet6.ToIpRange();
  171. } else {
  172. ythrow yexception() << "Invalid binary representation";
  173. }
  174. return range;
  175. }
  176. TString SerializeAddress(const TIpv6Address& addr) {
  177. Y_ENSURE(addr.Type() == TIpv6Address::Ipv4 || addr.Type() == TIpv6Address::Ipv6);
  178. TString res;
  179. if (addr.Type() == TIpv6Address::Ipv4) {
  180. auto addr4 = TRawIp4::FromIpAddress(addr);
  181. res = TString(reinterpret_cast<const char *>(&addr4), sizeof addr4);
  182. } else if (addr.Type() == TIpv6Address::Ipv6) {
  183. auto addr6 = TRawIp6::FromIpAddress(addr);
  184. res = TString(reinterpret_cast<const char *>(&addr6), sizeof addr6);
  185. }
  186. return res;
  187. }
  188. TString SerializeSubnet(const TIpAddressRange& range) {
  189. TString res;
  190. if (range.Type() == TIpv6Address::Ipv4) {
  191. auto subnet4 = TRawIp4Subnet::FromIpRange(range);
  192. res = TString(reinterpret_cast<const char *>(&subnet4), sizeof subnet4);
  193. } else if (range.Type() == TIpv6Address::Ipv6) {
  194. auto subnet6 = TRawIp6Subnet::FromIpRange(range);
  195. res = TString(reinterpret_cast<const char *>(&subnet6), sizeof subnet6);
  196. }
  197. return res;
  198. }
  199. SIMPLE_STRICT_UDF(TFromString, TOptionalString(TAutoMapString)) {
  200. TIpv6Address addr = TIpv6Address::FromString(args[0].AsStringRef());
  201. if (addr.Type() != TIpv6Address::Ipv4 && addr.Type() != TIpv6Address::Ipv6) {
  202. return TUnboxedValue();
  203. }
  204. return valueBuilder->NewString(SerializeAddress(addr));
  205. }
  206. SIMPLE_STRICT_UDF(TSubnetFromString, TOptionalString(TAutoMapString)) {
  207. TIpAddressRange range = TIpAddressRange::FromCompactString(args[0].AsStringRef());
  208. auto res = SerializeSubnet(range);
  209. return res ? valueBuilder->NewString(res) : TUnboxedValue(TUnboxedValuePod());
  210. }
  211. SIMPLE_UDF(TToString, char*(TAutoMapString)) {
  212. return valueBuilder->NewString(DeserializeAddress(args[0].AsStringRef()).ToString(false));
  213. }
  214. SIMPLE_UDF(TSubnetToString, char*(TAutoMapString)) {
  215. TStringBuilder result;
  216. auto range = DeserializeSubnet(args[0].AsStringRef());
  217. result << (*range.Begin()).ToString(false);
  218. result << '/';
  219. result << ToString(GetAddressRangePrefix(range));
  220. return valueBuilder->NewString(result);
  221. }
  222. SIMPLE_UDF(TSubnetMatch, bool(TAutoMapString, TAutoMapString)) {
  223. Y_UNUSED(valueBuilder);
  224. auto range1 = DeserializeSubnet(args[0].AsStringRef());
  225. if (args[1].AsStringRef().Size() == sizeof(TRawIp4) || args[1].AsStringRef().Size() == sizeof(TRawIp6)) {
  226. auto addr2 = DeserializeAddress(args[1].AsStringRef());
  227. return TUnboxedValuePod(range1.Contains(addr2));
  228. } else { // second argument is a whole subnet, not a single address
  229. auto range2 = DeserializeSubnet(args[1].AsStringRef());
  230. return TUnboxedValuePod(range1.Contains(range2));
  231. }
  232. }
  233. SIMPLE_STRICT_UDF(TIsIPv4, bool(TOptionalString)) {
  234. Y_UNUSED(valueBuilder);
  235. bool result = false;
  236. if (args[0]) {
  237. const auto ref = args[0].AsStringRef();
  238. result = ref.Size() == 4;
  239. }
  240. return TUnboxedValuePod(result);
  241. }
  242. SIMPLE_STRICT_UDF(TIsIPv6, bool(TOptionalString)) {
  243. Y_UNUSED(valueBuilder);
  244. bool result = false;
  245. if (args[0]) {
  246. const auto ref = args[0].AsStringRef();
  247. result = ref.Size() == 16;
  248. }
  249. return TUnboxedValuePod(result);
  250. }
  251. SIMPLE_STRICT_UDF(TIsEmbeddedIPv4, bool(TOptionalString)) {
  252. Y_UNUSED(valueBuilder);
  253. bool result = false;
  254. if (args[0]) {
  255. const auto ref = args[0].AsStringRef();
  256. if (ref.Size() == 16) {
  257. result = DeserializeAddress(ref).Isv4MappedTov6();
  258. }
  259. }
  260. return TUnboxedValuePod(result);
  261. }
  262. SIMPLE_UDF(TConvertToIPv6, char*(TAutoMapString)) {
  263. const auto& ref = args[0].AsStringRef();
  264. if (ref.Size() == 16) {
  265. return valueBuilder->NewString(ref);
  266. } else if (ref.Size() == 4) {
  267. TIpv6Address addr4 = DeserializeAddress(ref);
  268. auto addr6 = TIpv6Address(ui128(addr4) | ui128(0xFFFF) << 32, TIpv6Address::Ipv6);
  269. return valueBuilder->NewString(SerializeAddress(addr6));
  270. } else {
  271. ythrow yexception() << "Incorrect size of input, expected "
  272. << "4 or 16, got " << ref.Size();
  273. }
  274. }
  275. SIMPLE_UDF_WITH_OPTIONAL_ARGS(TGetSubnet, char*(TAutoMapString, TOptionalByte), 1) {
  276. const auto ref = args[0].AsStringRef();
  277. ui8 subnetSize = args[1].GetOrDefault<ui8>(0);
  278. TIpv6Address addr = DeserializeAddress(ref);
  279. if (ref.Size() == 4) {
  280. if (!subnetSize) {
  281. subnetSize = 24;
  282. }
  283. if (subnetSize > 32) {
  284. subnetSize = 32;
  285. }
  286. } else if (ref.Size() == 16) {
  287. if (!subnetSize) {
  288. subnetSize = 64;
  289. }
  290. if (subnetSize > 128) {
  291. subnetSize = 128;
  292. }
  293. } else {
  294. ythrow yexception() << "Incorrect size of input, expected "
  295. << "4 or 16, got " << ref.Size();
  296. }
  297. TIpv6Address beg = LowerBoundForPrefix(addr, subnetSize);
  298. return valueBuilder->NewString(SerializeAddress(beg));
  299. }
  300. SIMPLE_UDF(TGetSubnetByMask, char*(TAutoMapString, TAutoMapString)) {
  301. const auto refBase = args[0].AsStringRef();
  302. const auto refMask = args[1].AsStringRef();
  303. TIpv6Address addrBase = DeserializeAddress(refBase);
  304. TIpv6Address addrMask = DeserializeAddress(refMask);
  305. if (addrBase.Type() != addrMask.Type()) {
  306. ythrow yexception() << "Base and mask differ in length";
  307. }
  308. return valueBuilder->NewString(SerializeAddress(TIpv6Address(ui128(addrBase) & ui128(addrMask), addrBase.Type())));
  309. }
  310. #define EXPORTED_IP_BASE_UDF \
  311. TFromString, \
  312. TSubnetFromString, \
  313. TToString, \
  314. TSubnetToString, \
  315. TIsIPv4, \
  316. TIsIPv6, \
  317. TIsEmbeddedIPv4, \
  318. TConvertToIPv6, \
  319. TGetSubnet, \
  320. TSubnetMatch, \
  321. TGetSubnetByMask
  322. }