udf_resolver.cpp 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513
  1. #include "discover.h"
  2. #include <yql/essentials/providers/common/schema/mkql/yql_mkql_schema.h>
  3. #include <yql/essentials/providers/common/proto/udf_resolver.pb.h>
  4. #include <yql/essentials/core/yql_type_annotation.h>
  5. #include <yql/essentials/utils/backtrace/backtrace.h>
  6. #include <yql/essentials/utils/sys/become_user.h>
  7. #include <yql/essentials/utils/sys/linux_version.h>
  8. #include <yql/essentials/minikql/mkql_function_registry.h>
  9. #include <yql/essentials/minikql/mkql_node.h>
  10. #include <yql/essentials/minikql/mkql_type_builder.h>
  11. #include <yql/essentials/minikql/mkql_program_builder.h>
  12. #include <yql/essentials/minikql/mkql_utils.h>
  13. #include <library/cpp/getopt/last_getopt.h>
  14. #include <util/generic/yexception.h>
  15. #include <util/generic/string.h>
  16. #include <util/generic/strbuf.h>
  17. #include <util/generic/hash_set.h>
  18. #include <util/generic/hash.h>
  19. #include <util/system/env.h>
  20. #include <util/system/fs.h>
  21. #include <util/stream/output.h>
  22. #include <util/string/builder.h>
  23. #ifdef _linux_
  24. #include <sys/types.h>
  25. #include <sys/prctl.h>
  26. #include <sys/resource.h>
  27. #include <sys/syscall.h>
  28. #include <sys/socket.h>
  29. #include <sys/stat.h>
  30. #ifndef GRND_RANDOM
  31. #include <sys/random.h>
  32. #endif
  33. #include <linux/filter.h>
  34. #include <linux/seccomp.h>
  35. #include <linux/audit.h>
  36. #ifndef GRND_RANDOM
  37. #include <linux/random.h>
  38. #endif
  39. #ifndef __SI_MAX_SIZE
  40. #define __SI_MAX_SIZE 128
  41. #endif
  42. #ifndef __SI_PAD_SIZE
  43. #if __WORDSIZE == 64
  44. # define __SI_PAD_SIZE ((__SI_MAX_SIZE / sizeof (int)) - 4)
  45. #else
  46. # define __SI_PAD_SIZE ((__SI_MAX_SIZE / sizeof (int)) - 3)
  47. #endif
  48. #endif
  49. #if !defined(SYS_newfstatat)
  50. #if defined(__x86_64__)
  51. #define SYS_newfstatat 262
  52. #elif defined(__i386__)
  53. #error Unsupported syscall
  54. #elif defined(__aarch64__)
  55. #define SYS_newfstatat 79
  56. #elif defined(__arm__)
  57. #error Unsupported syscall
  58. #elif defined(__powerpc__)
  59. #define SYS_newfstatat 291
  60. #else
  61. #error Unsupported platform
  62. #endif
  63. #endif
  64. #if !defined(SYS_clone3)
  65. #define SYS_clone3 435
  66. #endif
  67. #if !defined(SYS_rseq)
  68. #if defined(__x86_64__)
  69. #define SYS_rseq 334
  70. #elif defined(__i386__)
  71. #define SYS_rseq 386
  72. #elif defined(__aarch64__)
  73. #define SYS_rseq 293
  74. #elif defined(__arm__)
  75. #define SYS_rseq 398
  76. #elif defined(__powerpc__)
  77. #define SYS_rseq 387
  78. #else
  79. #error Unsupported platform
  80. #endif
  81. #endif
  82. #endif
  83. using namespace NKikimr;
  84. using namespace NKikimr::NMiniKQL;
  85. void ResolveUDFs() {
  86. NYql::TResolve inMsg;
  87. if (!inMsg.ParseFromArcadiaStream(&Cin)) {
  88. throw yexception() << "Bad input TResolve proto message";
  89. }
  90. NYql::TResolveResult outMsg;
  91. auto functionRegistry = CreateFunctionRegistry(IBuiltinFunctionRegistry::TPtr());
  92. auto newRegistry = functionRegistry->Clone();
  93. newRegistry->SetBackTraceCallback(&NYql::NBacktrace::KikimrBackTrace);
  94. TScopedAlloc alloc(__LOCATION__);
  95. TTypeEnvironment env(alloc);
  96. NUdf::ITypeInfoHelper::TPtr typeInfoHelper(new TTypeInfoHelper);
  97. TSet<TString> loadedPaths;
  98. // Load system imports first
  99. for (auto& import : inMsg.GetImports()) {
  100. if (import.GetSystem()) {
  101. if (!loadedPaths.emplace(import.GetPath()).second) {
  102. continue;
  103. }
  104. newRegistry->LoadUdfs(import.GetPath(), {}, NUdf::IRegistrator::TFlags::TypesOnly);
  105. }
  106. }
  107. THashMap<std::pair<TString, TString>, THashSet<TString>> cachedModules;
  108. for (auto& import : inMsg.GetImports()) {
  109. if (!import.GetSystem()) {
  110. auto importRes = outMsg.AddImports();
  111. importRes->SetFileAlias(import.GetFileAlias());
  112. importRes->SetCustomUdfPrefix(import.GetCustomUdfPrefix());
  113. auto [it, inserted] = cachedModules.emplace(std::make_pair(import.GetPath(), import.GetCustomUdfPrefix()), THashSet<TString>());
  114. if (inserted) {
  115. THashSet<TString> modules;
  116. newRegistry->LoadUdfs(import.GetPath(),
  117. {},
  118. NUdf::IRegistrator::TFlags::TypesOnly,
  119. import.GetCustomUdfPrefix(),
  120. &modules);
  121. NUdfResolver::FillImportResultModules(modules, *importRes);
  122. it->second = modules;
  123. } else {
  124. NUdfResolver::FillImportResultModules(it->second, *importRes);
  125. }
  126. }
  127. }
  128. for (size_t i = 0; i < inMsg.UdfsSize(); ++i) {
  129. auto& udf = inMsg.GetUdfs(i);
  130. auto udfRes = outMsg.AddUdfs();
  131. try {
  132. TProgramBuilder pgmBuilder(env, *newRegistry);
  133. TType* mkqlUserType = nullptr;
  134. if (udf.HasUserType()) {
  135. TStringStream err;
  136. mkqlUserType = NYql::NCommon::ParseTypeFromYson(TStringBuf{udf.GetUserType()}, pgmBuilder, err);
  137. if (!mkqlUserType) {
  138. udfRes->SetError(TStringBuilder() << "Invalid user type for function: "
  139. << udf.GetName() << ", error: " << err.Str());
  140. continue;
  141. }
  142. }
  143. TFunctionTypeInfo funcInfo;
  144. auto status = newRegistry->FindFunctionTypeInfo(env, typeInfoHelper, nullptr,
  145. udf.GetName(), mkqlUserType, udf.GetTypeConfig(), NUdf::IUdfModule::TFlags::TypesOnly, {}, nullptr, &funcInfo);
  146. if (!status.IsOk()) {
  147. udfRes->SetError(TStringBuilder() << "Failed to find UDF function: " << udf.GetName()
  148. << ", reason: " << status.GetError());
  149. continue;
  150. }
  151. udfRes->SetCallableType(NYql::NCommon::WriteTypeToYson(funcInfo.FunctionType));
  152. if (funcInfo.RunConfigType) {
  153. udfRes->SetRunConfigType(NYql::NCommon::WriteTypeToYson(funcInfo.RunConfigType));
  154. }
  155. if (funcInfo.UserType) {
  156. udfRes->SetNormalizedUserType(NYql::NCommon::WriteTypeToYson(funcInfo.UserType));
  157. }
  158. udfRes->SetSupportsBlocks(funcInfo.SupportsBlocks);
  159. udfRes->SetIsStrict(funcInfo.IsStrict);
  160. } catch (yexception& e) {
  161. udfRes->SetError(TStringBuilder()
  162. << "Internal error was found when udf metadata is loading for function: " << udf.GetName()
  163. << ", reason: " << e.what());
  164. }
  165. }
  166. outMsg.SerializeToArcadiaStream(&Cout);
  167. }
  168. void ListModules(const TString& dir) {
  169. TVector<TString> udfPaths;
  170. NMiniKQL::FindUdfsInDir(dir, &udfPaths);
  171. auto funcRegistry = CreateFunctionRegistry(&NYql::NBacktrace::KikimrBackTrace, IBuiltinFunctionRegistry::TPtr(), false, udfPaths,
  172. NUdf::IRegistrator::TFlags::TypesOnly);
  173. for (auto& m : funcRegistry->GetAllModuleNames()) {
  174. auto path = *funcRegistry->FindUdfPath(m);
  175. Cout << m << '\t' << path << Endl;
  176. }
  177. }
  178. // NOLINTBEGIN(readability-identifier-naming)
  179. #ifdef _linux_
  180. struct my_siginfo_t
  181. {
  182. int si_signo; /* Signal number. */
  183. #if __SI_ERRNO_THEN_CODE
  184. int si_errno; /* If non-zero, an errno value associated with
  185. this signal, as defined in <errno.h>. */
  186. int si_code; /* Signal code. */
  187. #else
  188. int si_code;
  189. int si_errno;
  190. #endif
  191. #if __WORDSIZE == 64
  192. int __pad0; /* Explicit padding. */
  193. #endif
  194. union
  195. {
  196. int _pad[__SI_PAD_SIZE];
  197. struct
  198. {
  199. void *_call_addr; /* Calling user insn. */
  200. int _syscall; /* Triggering system call number. */
  201. unsigned int _arch; /* AUDIT_ARCH_* of syscall. */
  202. } _sigsys;
  203. } _sifields;
  204. };
  205. // NOLINTEND(readability-identifier-naming)
  206. void SigSysHandler(int sig, my_siginfo_t *info, void *) {
  207. Cerr << "SigSysHandler: " << sig << ", code: " << info->si_code << ", errno: " <<
  208. info->si_errno << ", call: " << info->_sifields._sigsys._syscall << ", arch:" << info->_sifields._sigsys._arch << "\n";
  209. // repeat SIGSYS signal (this will kill current process)
  210. raise(sig);
  211. }
  212. #endif
  213. int main(int argc, char **argv) {
  214. NYql::NBacktrace::RegisterKikimrFatalActions();
  215. NYql::NBacktrace::EnableKikimrSymbolize();
  216. try {
  217. #ifdef _linux_
  218. struct sigaction sa;
  219. memset(&sa, 0, sizeof(sa));
  220. sa.sa_flags = SA_RESETHAND | SA_SIGINFO;
  221. typedef void (*TSigSysHandler)(int, siginfo_t *, void *);
  222. sa.sa_sigaction = (TSigSysHandler)SigSysHandler;
  223. sigfillset(&sa.sa_mask);
  224. if (sigaction(SIGSYS, &sa, nullptr) == -1) {
  225. ythrow TSystemError() << "Cannot set handler for signal " << strsignal(SIGSYS);
  226. }
  227. #endif
  228. TString path;
  229. TString user;
  230. TString group;
  231. bool printAsProto = true;
  232. NLastGetopt::TOpts opts = NLastGetopt::TOpts::Default();
  233. opts.AddLongOption('L', "list", "List UDF modules in specified directory")
  234. .Optional()
  235. .RequiredArgument("DIR")
  236. .StoreResult(&path);
  237. opts.AddLongOption('D', "discover-path", "Discover UDFs in folder or single file")
  238. .Optional()
  239. .RequiredArgument("DIR")
  240. .StoreResult(&path);
  241. opts.AddLongOption('P', "discover-proto", "Discover UDFs according to TResolve proto from cin")
  242. .Optional()
  243. .NoArgument();
  244. opts.AddLongOption('A', "as-proto", "Print result in protobuf format")
  245. .Optional()
  246. .DefaultValue(true)
  247. .StoreResult(&printAsProto);
  248. opts.AddLongOption('U', "user", "Run as user")
  249. .Optional()
  250. .StoreResult(&user);
  251. opts.AddLongOption('G', "group", "Run as group")
  252. .Optional()
  253. .StoreResult(&group);
  254. opts.AddLongOption('F', "filter-syscalls", "Filter syscalls")
  255. .Optional()
  256. .NoArgument();
  257. opts.SetFreeArgsNum(0);
  258. NLastGetopt::TOptsParseResult res(&opts, argc, argv);
  259. SetEnv("USER", user ? user : "udf_resolver");
  260. if (user && !NYql::IsLinuxKernelBelow4_3()) {
  261. // since we are going to load untrusted modules
  262. // we have to switch into another user and drop privileges (capabilities)
  263. NYql::BecomeUser(user, group);
  264. NYql::TurnOffBecomeUserAbility();
  265. }
  266. NYql::SendSignalOnParentThreadExit(SIGTERM);
  267. #ifdef _linux_
  268. if (rlimit limit = {0, 0}; setrlimit(RLIMIT_CORE, &limit) != 0) {
  269. ythrow TSystemError() << "Failed to set RLIMIT_CORE";
  270. }
  271. #endif
  272. if (res.Has("filter-syscalls")) {
  273. #ifdef _linux_
  274. #define ArchField offsetof(struct seccomp_data, arch) // NOLINT(readability-identifier-naming)
  275. // NOLINTNEXTLINE(readability-identifier-naming)
  276. #define Allow(syscall) \
  277. BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, SYS_##syscall, 0, 1), \
  278. BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW)
  279. struct sock_filter filter[] = {
  280. /* validate arch */
  281. BPF_STMT(BPF_LD+BPF_W+BPF_ABS, ArchField),
  282. BPF_JUMP( BPF_JMP+BPF_JEQ+BPF_K, AUDIT_ARCH_X86_64, 1, 0),
  283. BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_TRAP),
  284. /* load syscall */
  285. BPF_STMT(BPF_LD+BPF_W+BPF_ABS, offsetof(struct seccomp_data, nr)),
  286. /* list of allowed syscalls */
  287. #ifndef _arm64_
  288. Allow(access),
  289. #endif
  290. Allow(brk),
  291. Allow(chdir),
  292. Allow(clock_gettime),
  293. Allow(clock_nanosleep),
  294. Allow(clone),
  295. Allow(clone3),
  296. Allow(close),
  297. #ifndef _arm64_
  298. Allow(creat),
  299. #endif
  300. Allow(dup),
  301. #ifndef _arm64_
  302. Allow(dup2),
  303. #endif
  304. Allow(dup3),
  305. Allow(eventfd2),
  306. Allow(exit),
  307. Allow(exit_group),
  308. Allow(fadvise64),
  309. Allow(fallocate),
  310. Allow(flock),
  311. Allow(fstat),
  312. Allow(fsync),
  313. Allow(ftruncate),
  314. Allow(futex),
  315. Allow(get_robust_list),
  316. Allow(getcwd),
  317. #ifndef _arm64_
  318. Allow(getdents),
  319. #endif
  320. Allow(getdents64),
  321. Allow(getegid),
  322. Allow(geteuid),
  323. Allow(getgid),
  324. Allow(getgroups),
  325. Allow(getpgid),
  326. #ifndef _arm64_
  327. Allow(getpgrp),
  328. #endif
  329. Allow(getpid),
  330. Allow(getppid),
  331. Allow(getpriority),
  332. Allow(getrandom),
  333. Allow(getrlimit),
  334. Allow(getrusage),
  335. Allow(getsid),
  336. Allow(gettid),
  337. Allow(gettimeofday),
  338. Allow(getuid),
  339. Allow(getxattr),
  340. Allow(ioctl),
  341. Allow(lgetxattr),
  342. #ifndef _arm64_
  343. Allow(link),
  344. #endif
  345. Allow(listxattr),
  346. Allow(llistxattr),
  347. Allow(lremovexattr),
  348. Allow(lseek),
  349. Allow(lsetxattr),
  350. #ifndef _arm64_
  351. Allow(lstat),
  352. #endif
  353. Allow(madvise),
  354. #ifndef _arm64_
  355. Allow(mkdir),
  356. #endif
  357. Allow(mkdirat),
  358. Allow(mlock),
  359. Allow(mlockall),
  360. Allow(mmap),
  361. Allow(mprotect),
  362. Allow(munlock),
  363. Allow(munlockall),
  364. Allow(munmap),
  365. Allow(nanosleep),
  366. Allow(newfstatat),
  367. #ifndef _arm64_
  368. Allow(open),
  369. #endif
  370. Allow(openat),
  371. Allow(pipe2),
  372. Allow(prctl),
  373. Allow(pread64),
  374. Allow(pwrite64),
  375. Allow(read),
  376. #ifndef _arm64_
  377. Allow(readlink),
  378. #endif
  379. Allow(readv),
  380. Allow(removexattr),
  381. #ifndef _arm64_
  382. Allow(rename),
  383. #endif
  384. Allow(renameat),
  385. #ifndef _arm64_
  386. Allow(rmdir),
  387. #endif
  388. Allow(rseq),
  389. Allow(rt_sigaction),
  390. Allow(rt_sigpending),
  391. Allow(rt_sigprocmask),
  392. Allow(rt_sigqueueinfo),
  393. Allow(rt_sigreturn),
  394. Allow(rt_sigsuspend),
  395. Allow(rt_sigtimedwait),
  396. Allow(rt_tgsigqueueinfo),
  397. Allow(sched_getaffinity),
  398. Allow(sched_setaffinity),
  399. Allow(set_robust_list),
  400. Allow(setxattr),
  401. #ifndef _arm64_
  402. Allow(stat),
  403. #endif
  404. Allow(sysinfo),
  405. Allow(sigaltstack),
  406. Allow(uname),
  407. #ifndef _arm64_
  408. Allow(unlink),
  409. #endif
  410. Allow(unlinkat),
  411. Allow(write),
  412. Allow(writev),
  413. /* and if we don't match above, die */
  414. BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_TRAP),
  415. };
  416. struct sock_fprog filterprog = {
  417. .len = sizeof(filter)/sizeof(filter[0]),
  418. .filter = filter
  419. };
  420. if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) == -1) {
  421. ythrow yexception() << "prctl(PR_SET_NO_NEW_PRIVS, 1, ...) failed with: " << LastSystemErrorText();
  422. }
  423. if (prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &filterprog) == -1) {
  424. ythrow yexception() << "prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, ...) failed with: " << LastSystemErrorText();
  425. }
  426. #endif
  427. }
  428. Y_UNUSED(NUdf::GetStaticSymbols());
  429. if (res.Has("list")) {
  430. ListModules(path);
  431. return 0;
  432. }
  433. if (res.Has("discover-path")) {
  434. NFs::EnsureExists(path);
  435. TFileStat fstat(path);
  436. if (fstat.IsDir()) {
  437. NUdfResolver::DiscoverInDir(path, Cout, printAsProto);
  438. } else {
  439. NUdfResolver::DiscoverInFile(path, Cout, printAsProto);
  440. }
  441. return 0;
  442. }
  443. if (res.Has("discover-proto")) {
  444. NUdfResolver::Discover(Cin, Cout, printAsProto);
  445. return 0;
  446. }
  447. ResolveUDFs();
  448. return 0;
  449. } catch (...) {
  450. Cerr << CurrentExceptionMessage() << Endl;
  451. return 1;
  452. }
  453. }