value_builder.cpp 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346
  1. #include "type_builder.h"
  2. #include "value_builder.h"
  3. #include <yql/essentials/public/udf/udf_value.h>
  4. #include <yql/essentials/public/udf/udf_value_builder.h>
  5. #include <yql/essentials/public/udf/udf_terminator.h>
  6. #include <yql/essentials/minikql/mkql_node_cast.h>
  7. #include <yql/essentials/minikql/mkql_node.h>
  8. namespace NYql {
  9. namespace NUdf {
  10. using namespace NProtoBuf;
  11. TProtobufValue::TProtobufValue(const TProtoInfo& info)
  12. : Info_(info)
  13. {
  14. }
  15. TProtobufValue::~TProtobufValue()
  16. { }
  17. TUnboxedValue TProtobufValue::Run(
  18. const IValueBuilder* valueBuilder,
  19. const TUnboxedValuePod* args) const
  20. {
  21. auto blob = args[0].AsStringRef();
  22. try {
  23. auto result = this->Parse(TStringBuf(blob.Data(), blob.Size()));
  24. if (result == nullptr) {
  25. return TUnboxedValue();
  26. }
  27. auto proto(result);
  28. return FillValueFromProto(*proto.Get(), valueBuilder, Info_);
  29. } catch (const std::exception& e) {
  30. UdfTerminate(e.what());
  31. }
  32. }
  33. TProtobufSerialize::TProtobufSerialize(const TProtoInfo& info)
  34. : Info_(info)
  35. {
  36. }
  37. TProtobufSerialize::~TProtobufSerialize()
  38. { }
  39. TUnboxedValue TProtobufSerialize::Run(
  40. const IValueBuilder* valueBuilder,
  41. const TUnboxedValuePod* args) const
  42. {
  43. try {
  44. TAutoPtr<Message> proto = MakeProto();
  45. FillProtoFromValue(args[0], *proto, Info_);
  46. TMaybe<TString> result = this->Serialize(*proto);
  47. if (!result) {
  48. return TUnboxedValue();
  49. }
  50. return valueBuilder->NewString(*result);
  51. } catch (const std::exception& e) {
  52. UdfTerminate(e.what());
  53. }
  54. }
  55. namespace {
  56. static TUnboxedValuePod CreateEnumValue(
  57. const IValueBuilder* valueBuilder,
  58. const NProtoBuf::EnumValueDescriptor* desc,
  59. const EEnumFormat format,
  60. TFlags<EFieldFlag> fieldFlags)
  61. {
  62. if (fieldFlags.HasFlags(EFieldFlag::EnumInt)) {
  63. return TUnboxedValuePod((i64)desc->number());
  64. } else if (fieldFlags.HasFlags(EFieldFlag::EnumString)) {
  65. return valueBuilder->NewString(desc->name()).Release();
  66. }
  67. switch (format) {
  68. case EEnumFormat::Number:
  69. return TUnboxedValuePod((i32)desc->number());
  70. case EEnumFormat::Name:
  71. return valueBuilder->NewString(desc->name()).Release();
  72. case EEnumFormat::FullName:
  73. return valueBuilder->NewString(desc->full_name()).Release();
  74. }
  75. Y_UNREACHABLE();
  76. }
  77. static TUnboxedValuePod CreateSingleField(
  78. const IValueBuilder* valueBuilder,
  79. const Message& proto,
  80. const FieldDescriptor* fd,
  81. const TProtoInfo& info,
  82. TFlags<EFieldFlag> fieldFlags)
  83. {
  84. auto r = proto.GetReflection();
  85. #define FIELD_TO_VALUE(EProtoCppType, ProtoGet) \
  86. case FieldDescriptor::EProtoCppType: { \
  87. return TUnboxedValuePod(r->ProtoGet(proto, fd)); \
  88. }
  89. switch (fd->cpp_type()) {
  90. FIELD_TO_VALUE(CPPTYPE_INT32, GetInt32);
  91. FIELD_TO_VALUE(CPPTYPE_INT64, GetInt64);
  92. FIELD_TO_VALUE(CPPTYPE_UINT32, GetUInt32);
  93. FIELD_TO_VALUE(CPPTYPE_UINT64, GetUInt64);
  94. FIELD_TO_VALUE(CPPTYPE_DOUBLE, GetDouble);
  95. FIELD_TO_VALUE(CPPTYPE_BOOL, GetBool);
  96. case FieldDescriptor::CPPTYPE_FLOAT: {
  97. const auto f = r->GetFloat(proto, fd);
  98. return info.YtMode ? TUnboxedValuePod(double(f)) : TUnboxedValuePod(f);
  99. }
  100. case FieldDescriptor::CPPTYPE_ENUM: {
  101. return CreateEnumValue(valueBuilder, r->GetEnum(proto, fd), info.EnumFormat, fieldFlags);
  102. }
  103. case FieldDescriptor::CPPTYPE_STRING: {
  104. return valueBuilder->NewString(r->GetString(proto, fd)).Release();
  105. }
  106. case FieldDescriptor::CPPTYPE_MESSAGE: {
  107. const auto& protoField = r->GetMessage(proto, fd);
  108. if (fieldFlags.HasFlags(EFieldFlag::Binary)) {
  109. return valueBuilder->NewString(protoField.SerializeAsString()).Release();
  110. } else {
  111. auto msg = FillValueFromProto(protoField, valueBuilder, info);
  112. return fd->is_optional() ? msg.Release().MakeOptional() : msg.Release();
  113. }
  114. }
  115. }
  116. #undef FIELD_TO_VALUE
  117. return TUnboxedValuePod();
  118. }
  119. static TUnboxedValuePod CreateDefaultValue(
  120. const IValueBuilder* valueBuilder,
  121. const FieldDescriptor* fd,
  122. const TProtoInfo& info,
  123. TFlags<EFieldFlag> fieldFlags)
  124. {
  125. #define DEFAULT_TO_VALUE(EProtoCppType, ValueGet) \
  126. case FieldDescriptor::EProtoCppType: { \
  127. return TUnboxedValuePod(fd->ValueGet()); \
  128. break; \
  129. }
  130. switch (fd->cpp_type()) {
  131. DEFAULT_TO_VALUE(CPPTYPE_INT32, default_value_int32);
  132. DEFAULT_TO_VALUE(CPPTYPE_INT64, default_value_int64);
  133. DEFAULT_TO_VALUE(CPPTYPE_UINT32, default_value_uint32);
  134. DEFAULT_TO_VALUE(CPPTYPE_UINT64, default_value_uint64);
  135. DEFAULT_TO_VALUE(CPPTYPE_DOUBLE, default_value_double);
  136. DEFAULT_TO_VALUE(CPPTYPE_BOOL, default_value_bool);
  137. case FieldDescriptor::CPPTYPE_FLOAT: {
  138. const auto f = fd->default_value_float();
  139. return info.YtMode ? TUnboxedValuePod(double(f)) : TUnboxedValuePod(f);
  140. }
  141. case FieldDescriptor::CPPTYPE_ENUM:
  142. return CreateEnumValue(valueBuilder, fd->default_value_enum(), info.EnumFormat, fieldFlags);
  143. case FieldDescriptor::CPPTYPE_STRING:
  144. return valueBuilder->NewString(fd->default_value_string()).Release();
  145. default:
  146. return TUnboxedValuePod();
  147. }
  148. #undef DEFAULT_TO_VALUE
  149. }
  150. static TUnboxedValuePod CreateRepeatedField(
  151. const IValueBuilder* valueBuilder,
  152. const Message& proto,
  153. const FieldDescriptor* fd,
  154. const TProtoInfo& info,
  155. TFlags<EFieldFlag> fieldFlags)
  156. {
  157. auto r = proto.GetReflection();
  158. #define REPEATED_FIELD_TO_VALUE(EProtoCppType, ProtoGet) \
  159. case FieldDescriptor::EProtoCppType: { \
  160. for (int i = 0; i < endI; ++i) { \
  161. *items++ = TUnboxedValuePod(r->ProtoGet(proto, fd, i)); \
  162. } \
  163. break; \
  164. }
  165. const auto endI = r->FieldSize(proto, fd);
  166. NUdf::TUnboxedValue *items = nullptr;
  167. auto list = valueBuilder->NewArray(endI, items);
  168. switch (fd->cpp_type()) {
  169. REPEATED_FIELD_TO_VALUE(CPPTYPE_INT32, GetRepeatedInt32);
  170. REPEATED_FIELD_TO_VALUE(CPPTYPE_INT64, GetRepeatedInt64);
  171. REPEATED_FIELD_TO_VALUE(CPPTYPE_UINT32, GetRepeatedUInt32);
  172. REPEATED_FIELD_TO_VALUE(CPPTYPE_UINT64, GetRepeatedUInt64);
  173. REPEATED_FIELD_TO_VALUE(CPPTYPE_DOUBLE, GetRepeatedDouble);
  174. REPEATED_FIELD_TO_VALUE(CPPTYPE_BOOL, GetRepeatedBool);
  175. case FieldDescriptor::CPPTYPE_FLOAT:
  176. for (int i = 0; i < endI; ++i) {
  177. const auto f = r->GetRepeatedFloat(proto, fd, i);
  178. *items++ = info.YtMode ? TUnboxedValuePod(double(f)) : TUnboxedValuePod(f);
  179. }
  180. break;
  181. case FieldDescriptor::CPPTYPE_ENUM:
  182. for (int i = 0; i < endI; ++i) {
  183. *items++ = CreateEnumValue(valueBuilder, r->GetRepeatedEnum(proto, fd, i), info.EnumFormat, fieldFlags);
  184. }
  185. break;
  186. case FieldDescriptor::CPPTYPE_STRING:
  187. for (int i = 0; i < endI; ++i) {
  188. *items++ = valueBuilder->NewString(r->GetRepeatedString(proto, fd, i));
  189. }
  190. break;
  191. case FieldDescriptor::CPPTYPE_MESSAGE:
  192. for (int i = 0; i < endI; ++i) {
  193. const auto& protoFieldElement = r->GetRepeatedMessage(proto, fd, i);
  194. if (fieldFlags.HasFlags(EFieldFlag::Binary)) {
  195. *items++ = valueBuilder->NewString(protoFieldElement.SerializeAsString());
  196. } else {
  197. *items++ = FillValueFromProto(protoFieldElement, valueBuilder, info);
  198. }
  199. }
  200. break;
  201. }
  202. #undef REPEATED_FIELD_TO_VALUE
  203. return list.Release();
  204. }
  205. static TUnboxedValuePod CreateMapField(
  206. const IValueBuilder* valueBuilder,
  207. const Message& proto,
  208. const FieldDescriptor* fd,
  209. const TProtoInfo& info,
  210. const TMessageInfo& msgInfo,
  211. TFlags<EFieldFlag> fieldFlags)
  212. {
  213. auto r = proto.GetReflection();
  214. auto dictType = msgInfo.DictTypes.Value(fd->number(), nullptr);
  215. Y_ENSURE(dictType);
  216. auto dictBuilder = valueBuilder->NewDict(dictType, TDictFlags::Hashed);
  217. const auto noBinaryFlags = TFlags<EFieldFlag>(fieldFlags).RemoveFlags(EFieldFlag::Binary);
  218. for (int i = 0, end = r->FieldSize(proto, fd); i < end; ++i) {
  219. const auto& protoDictElement = r->GetRepeatedMessage(proto, fd, i);
  220. dictBuilder->Add(
  221. TUnboxedValue(CreateSingleField(valueBuilder, protoDictElement, fd->message_type()->map_key(), info, noBinaryFlags)),
  222. TUnboxedValue(CreateSingleField(valueBuilder, protoDictElement, fd->message_type()->map_value(), info, fieldFlags))
  223. );
  224. }
  225. return dictBuilder->Build().Release();
  226. }
  227. }
  228. TUnboxedValue FillValueFromProto(
  229. const Message& proto,
  230. const IValueBuilder* valueBuilder,
  231. const TProtoInfo& info)
  232. {
  233. const auto d = proto.GetDescriptor();
  234. const auto r = proto.GetReflection();
  235. const auto mi = info.Messages.find(d->full_name());
  236. if (mi == info.Messages.end()) {
  237. ythrow yexception() << "unknown message " << d->full_name();
  238. }
  239. const auto msgInfo = mi->second;
  240. TUnboxedValue* items = nullptr;
  241. auto value = valueBuilder->NewArray(msgInfo->FieldsCount, items);
  242. auto makeValue = [&](const FieldDescriptor* fd, const TMessageInfo::TFieldInfo& fInfo) -> TUnboxedValuePod {
  243. if (fInfo.Flags.HasFlags(EFieldFlag::Void)) {
  244. return TUnboxedValuePod::Void();
  245. }
  246. if (fd->is_map() && fInfo.Flags.HasFlags(EFieldFlag::Dict)) {
  247. if (r->FieldSize(proto, fd) == 0 && fInfo.Flags.HasFlags(EFieldFlag::OptionalContainer)) {
  248. return TUnboxedValuePod();
  249. } else {
  250. return CreateMapField(valueBuilder, proto, fd, info, *msgInfo, fInfo.Flags);
  251. }
  252. } else if (fd->is_optional()) {
  253. if (r->HasField(proto, fd)) {
  254. return CreateSingleField(valueBuilder, proto, fd, info, fInfo.Flags);
  255. } else if (fd->has_default_value() || AvoidOptionalScalars(info.SyntaxAware, fd)) {
  256. return CreateDefaultValue(valueBuilder, fd, info, fInfo.Flags);
  257. } else {
  258. return TUnboxedValuePod();
  259. }
  260. } else if (fd->is_repeated()) {
  261. if (r->FieldSize(proto, fd) > 0) {
  262. return CreateRepeatedField(valueBuilder, proto, fd, info, fInfo.Flags);
  263. } else {
  264. if (info.OptionalLists || fInfo.Flags.HasFlags(EFieldFlag::OptionalContainer)) {
  265. return TUnboxedValuePod();
  266. } else {
  267. return valueBuilder->NewEmptyList().Release();
  268. }
  269. }
  270. } else if (fd->is_required()) {
  271. if (r->HasField(proto, fd)) {
  272. return CreateSingleField(valueBuilder, proto, fd, info, fInfo.Flags);
  273. } else {
  274. ythrow yexception() << "required field " << fd->name() << " has no value";
  275. }
  276. }
  277. return TUnboxedValuePod();
  278. };
  279. THashSet<const OneofDescriptor*> visitedOneofs;
  280. for (int i = 0, end = d->field_count(); i < end; ++i) {
  281. const FieldDescriptor* fd = d->field(i);
  282. const auto& fInfo = msgInfo->Fields[fd->number()];
  283. if (auto oneofDescriptor = fd->containing_oneof(); info.YtMode && oneofDescriptor && fInfo.Flags.HasFlags(EFieldFlag::Variant)) {
  284. if (visitedOneofs.insert(oneofDescriptor).second) {
  285. items[fInfo.Pos] = TUnboxedValuePod();
  286. if (auto ofd = r->GetOneofFieldDescriptor(proto, oneofDescriptor)) {
  287. const auto& ofInfo = msgInfo->Fields[ofd->number()];
  288. if (fInfo.Pos != ofInfo.Pos) {
  289. ythrow yexception() << "mismatch of oneof field " << ofd->name() << " position";
  290. }
  291. const ui32* varIndex = msgInfo->VariantIndicies.FindPtr(ofd->number());
  292. if (!varIndex) {
  293. ythrow yexception() << "missing oneof field " << ofd->name() << " index";
  294. }
  295. items[ofInfo.Pos] = valueBuilder->NewVariant(*varIndex, TUnboxedValue(makeValue(ofd, ofInfo))).Release().MakeOptional();
  296. }
  297. }
  298. } else {
  299. items[fInfo.Pos] = makeValue(fd, fInfo);
  300. }
  301. }
  302. return value;
  303. }
  304. } // namespace NUdf
  305. } // namespace NYql