LocalServer.cpp 38 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995
  1. #include "LocalServer.h"
  2. #include <sys/resource.h>
  3. #include <Common/logger_useful.h>
  4. #include <Common/formatReadable.h>
  5. #include <base/getMemoryAmount.h>
  6. #include <base/errnoToString.h>
  7. #include <Poco/Util/XMLConfiguration.h>
  8. #include <Poco/String.h>
  9. #include <Poco/Logger.h>
  10. #include <Poco/NullChannel.h>
  11. #include <Poco/SimpleFileChannel.h>
  12. #include <Databases/DatabaseFilesystem.h>
  13. #include <Databases/DatabaseMemory.h>
  14. #include <Databases/DatabasesOverlay.h>
  15. #include <Storages/System/attachSystemTables.h>
  16. #include <Storages/System/attachInformationSchemaTables.h>
  17. #include <Interpreters/DatabaseCatalog.h>
  18. #include <Interpreters/JIT/CompiledExpressionCache.h>
  19. #include <Interpreters/ProcessList.h>
  20. #include <Interpreters/loadMetadata.h>
  21. #include <base/getFQDNOrHostName.h>
  22. #include <Common/scope_guard_safe.h>
  23. #include <Interpreters/Session.h>
  24. #include <Access/AccessControl.h>
  25. #include <Common/Exception.h>
  26. #include <Common/Macros.h>
  27. #include <Common/Config/ConfigProcessor.h>
  28. #include <Common/ThreadStatus.h>
  29. #include <Common/TLDListsHolder.h>
  30. #include <Common/quoteString.h>
  31. #include <Common/randomSeed.h>
  32. #include <Common/ThreadPool.h>
  33. #include <Loggers/Loggers.h>
  34. #include <IO/ReadBufferFromFile.h>
  35. #include <IO/ReadBufferFromString.h>
  36. #include <IO/WriteBufferFromFileDescriptor.h>
  37. #include <IO/UseSSL.h>
  38. #include <IO/SharedThreadPools.h>
  39. #include <Parsers/IAST.h>
  40. #include <Parsers/ASTInsertQuery.h>
  41. #include <Common/ErrorHandlers.h>
  42. #include <Functions/UserDefined/IUserDefinedSQLObjectsLoader.h>
  43. #include <Functions/registerFunctions.h>
  44. #include <AggregateFunctions/registerAggregateFunctions.h>
  45. #include <TableFunctions/registerTableFunctions.h>
  46. #include <Storages/registerStorages.h>
  47. #include <Dictionaries/registerDictionaries.h>
  48. #include <Disks/registerDisks.h>
  49. #include <Formats/registerFormats.h>
  50. #include <Formats/FormatFactory.h>
  51. #include <boost/algorithm/string/replace.hpp>
  52. #include <boost/program_options/options_description.hpp>
  53. #include <base/argsToConfig.h>
  54. #include <filesystem>
  55. #include "clickhouse_config.h"
  56. #if defined(FUZZING_MODE)
  57. #include <Functions/getFuzzerData.h>
  58. #endif
  59. #if USE_AZURE_BLOB_STORAGE
  60. # error #include <azure/storage/common/internal/xml_wrapper.hpp>
  61. #endif
  62. namespace fs = std::filesystem;
  63. namespace DB
  64. {
  65. namespace ErrorCodes
  66. {
  67. extern const int BAD_ARGUMENTS;
  68. extern const int CANNOT_LOAD_CONFIG;
  69. extern const int FILE_ALREADY_EXISTS;
  70. }
  71. void applySettingsOverridesForLocal(ContextMutablePtr context)
  72. {
  73. Settings settings = context->getSettings();
  74. settings.allow_introspection_functions = true;
  75. settings.storage_file_read_method = LocalFSReadMethod::mmap;
  76. context->setSettings(settings);
  77. }
  78. void LocalServer::processError(const String &) const
  79. {
  80. if (ignore_error)
  81. return;
  82. if (is_interactive)
  83. {
  84. String message;
  85. if (server_exception)
  86. {
  87. message = getExceptionMessage(*server_exception, print_stack_trace, true);
  88. }
  89. else if (client_exception)
  90. {
  91. message = client_exception->message();
  92. }
  93. fmt::print(stderr, "Received exception:\n{}\n", message);
  94. fmt::print(stderr, "\n");
  95. }
  96. else
  97. {
  98. if (server_exception)
  99. server_exception->rethrow();
  100. if (client_exception)
  101. client_exception->rethrow();
  102. }
  103. }
  104. void LocalServer::initialize(Poco::Util::Application & self)
  105. {
  106. Poco::Util::Application::initialize(self);
  107. /// Load config files if exists
  108. if (config().has("config-file") || fs::exists("config.xml"))
  109. {
  110. const auto config_path = config().getString("config-file", "config.xml");
  111. ConfigProcessor config_processor(config_path, false, true);
  112. config_processor.setConfigPath(fs::path(config_path).parent_path());
  113. auto loaded_config = config_processor.loadConfig();
  114. config().add(loaded_config.configuration.duplicate(), PRIO_DEFAULT, false);
  115. }
  116. GlobalThreadPool::initialize(
  117. config().getUInt("max_thread_pool_size", 10000),
  118. config().getUInt("max_thread_pool_free_size", 1000),
  119. config().getUInt("thread_pool_queue_size", 10000)
  120. );
  121. #if USE_AZURE_BLOB_STORAGE
  122. /// See the explanation near the same line in Server.cpp
  123. GlobalThreadPool::instance().addOnDestroyCallback([]
  124. {
  125. Azure::Storage::_internal::XmlGlobalDeinitialize();
  126. });
  127. #endif
  128. getIOThreadPool().initialize(
  129. config().getUInt("max_io_thread_pool_size", 100),
  130. config().getUInt("max_io_thread_pool_free_size", 0),
  131. config().getUInt("io_thread_pool_queue_size", 10000));
  132. const size_t active_parts_loading_threads = config().getUInt("max_active_parts_loading_thread_pool_size", 64);
  133. getActivePartsLoadingThreadPool().initialize(
  134. active_parts_loading_threads,
  135. 0, // We don't need any threads one all the parts will be loaded
  136. active_parts_loading_threads);
  137. const size_t outdated_parts_loading_threads = config().getUInt("max_outdated_parts_loading_thread_pool_size", 32);
  138. getOutdatedPartsLoadingThreadPool().initialize(
  139. outdated_parts_loading_threads,
  140. 0, // We don't need any threads one all the parts will be loaded
  141. outdated_parts_loading_threads);
  142. getOutdatedPartsLoadingThreadPool().setMaxTurboThreads(active_parts_loading_threads);
  143. const size_t cleanup_threads = config().getUInt("max_parts_cleaning_thread_pool_size", 128);
  144. getPartsCleaningThreadPool().initialize(
  145. cleanup_threads,
  146. 0, // We don't need any threads one all the parts will be deleted
  147. cleanup_threads);
  148. }
  149. static DatabasePtr createMemoryDatabaseIfNotExists(ContextPtr context, const String & database_name)
  150. {
  151. DatabasePtr system_database = DatabaseCatalog::instance().tryGetDatabase(database_name);
  152. if (!system_database)
  153. {
  154. /// TODO: add attachTableDelayed into DatabaseMemory to speedup loading
  155. system_database = std::make_shared<DatabaseMemory>(database_name, context);
  156. DatabaseCatalog::instance().attachDatabase(database_name, system_database);
  157. }
  158. return system_database;
  159. }
  160. static DatabasePtr createClickHouseLocalDatabaseOverlay(const String & name_, ContextPtr context_)
  161. {
  162. auto databaseCombiner = std::make_shared<DatabasesOverlay>(name_, context_);
  163. databaseCombiner->registerNextDatabase(std::make_shared<DatabaseFilesystem>(name_, "", context_));
  164. databaseCombiner->registerNextDatabase(std::make_shared<DatabaseMemory>(name_, context_));
  165. return databaseCombiner;
  166. }
  167. /// If path is specified and not empty, will try to setup server environment and load existing metadata
  168. void LocalServer::tryInitPath()
  169. {
  170. std::string path;
  171. if (config().has("path"))
  172. {
  173. // User-supplied path.
  174. path = config().getString("path");
  175. Poco::trimInPlace(path);
  176. if (path.empty())
  177. {
  178. throw Exception(ErrorCodes::BAD_ARGUMENTS,
  179. "Cannot work with empty storage path that is explicitly specified"
  180. " by the --path option. Please check the program options and"
  181. " correct the --path.");
  182. }
  183. }
  184. else
  185. {
  186. // The path is not provided explicitly - use a unique path in the system temporary directory
  187. // (or in the current dir if temporary don't exist)
  188. Poco::Logger * log = &logger();
  189. std::filesystem::path parent_folder;
  190. std::filesystem::path default_path;
  191. try
  192. {
  193. // try to guess a tmp folder name, and check if it's a directory (throw exception otherwise)
  194. parent_folder = std::filesystem::temp_directory_path();
  195. }
  196. catch (const fs::filesystem_error & e)
  197. {
  198. // The tmp folder doesn't exist? Is it a misconfiguration? Or chroot?
  199. LOG_DEBUG(log, "Can not get temporary folder: {}", e.what());
  200. parent_folder = std::filesystem::current_path();
  201. std::filesystem::is_directory(parent_folder); // that will throw an exception if it's not a directory
  202. LOG_DEBUG(log, "Will create working directory inside current directory: {}", parent_folder.string());
  203. }
  204. /// we can have another clickhouse-local running simultaneously, even with the same PID (for ex. - several dockers mounting the same folder)
  205. /// or it can be some leftovers from other clickhouse-local runs
  206. /// as we can't accurately distinguish those situations we don't touch any existent folders
  207. /// we just try to pick some free name for our working folder
  208. default_path = parent_folder / fmt::format("clickhouse-local-{}-{}-{}", getpid(), time(nullptr), randomSeed());
  209. if (exists(default_path))
  210. throw Exception(ErrorCodes::FILE_ALREADY_EXISTS, "Unsuccessful attempt to create working directory: {} exist!", default_path.string());
  211. create_directory(default_path);
  212. temporary_directory_to_delete = default_path;
  213. path = default_path.string();
  214. LOG_DEBUG(log, "Working directory created: {}", path);
  215. }
  216. if (path.back() != '/')
  217. path += '/';
  218. fs::create_directories(fs::path(path) / "user_defined/");
  219. fs::create_directories(fs::path(path) / "data/");
  220. fs::create_directories(fs::path(path) / "metadata/");
  221. fs::create_directories(fs::path(path) / "metadata_dropped/");
  222. global_context->setPath(path);
  223. global_context->setTemporaryStoragePath(path + "tmp/", 0);
  224. global_context->setFlagsPath(path + "flags");
  225. global_context->setUserFilesPath(""); // user's files are everywhere
  226. std::string user_scripts_path = config().getString("user_scripts_path", fs::path(path) / "user_scripts/");
  227. global_context->setUserScriptsPath(user_scripts_path);
  228. fs::create_directories(user_scripts_path);
  229. /// top_level_domains_lists
  230. const std::string & top_level_domains_path = config().getString("top_level_domains_path", path + "top_level_domains/");
  231. if (!top_level_domains_path.empty())
  232. TLDListsHolder::getInstance().parseConfig(fs::path(top_level_domains_path) / "", config());
  233. }
  234. void LocalServer::cleanup()
  235. {
  236. try
  237. {
  238. connection.reset();
  239. if (global_context)
  240. {
  241. global_context->shutdown();
  242. global_context.reset();
  243. }
  244. /// thread status should be destructed before shared context because it relies on process list.
  245. status.reset();
  246. // Delete the temporary directory if needed.
  247. if (temporary_directory_to_delete)
  248. {
  249. const auto dir = *temporary_directory_to_delete;
  250. temporary_directory_to_delete.reset();
  251. LOG_DEBUG(&logger(), "Removing temporary directory: {}", dir.string());
  252. remove_all(dir);
  253. }
  254. }
  255. catch (...)
  256. {
  257. tryLogCurrentException(__PRETTY_FUNCTION__);
  258. }
  259. }
  260. static bool checkIfStdinIsRegularFile()
  261. {
  262. struct stat file_stat;
  263. return fstat(STDIN_FILENO, &file_stat) == 0 && S_ISREG(file_stat.st_mode);
  264. }
  265. std::string LocalServer::getInitialCreateTableQuery()
  266. {
  267. if (!config().has("table-structure") && !config().has("table-file") && !config().has("table-data-format") && (!checkIfStdinIsRegularFile() || !config().has("query")))
  268. return {};
  269. auto table_name = backQuoteIfNeed(config().getString("table-name", "table"));
  270. auto table_structure = config().getString("table-structure", "auto");
  271. String table_file;
  272. String format_from_file_name;
  273. if (!config().has("table-file") || config().getString("table-file") == "-")
  274. {
  275. /// Use Unix tools stdin naming convention
  276. table_file = "stdin";
  277. format_from_file_name = FormatFactory::instance().getFormatFromFileDescriptor(STDIN_FILENO);
  278. }
  279. else
  280. {
  281. /// Use regular file
  282. auto file_name = config().getString("table-file");
  283. table_file = quoteString(file_name);
  284. format_from_file_name = FormatFactory::instance().getFormatFromFileName(file_name, false);
  285. }
  286. auto data_format = backQuoteIfNeed(
  287. config().getString("table-data-format", config().getString("format", format_from_file_name.empty() ? "TSV" : format_from_file_name)));
  288. if (table_structure == "auto")
  289. table_structure = "";
  290. else
  291. table_structure = "(" + table_structure + ")";
  292. return fmt::format("CREATE TABLE {} {} ENGINE = File({}, {});",
  293. table_name, table_structure, data_format, table_file);
  294. }
  295. static ConfigurationPtr getConfigurationFromXMLString(const char * xml_data)
  296. {
  297. std::stringstream ss{std::string{xml_data}}; // STYLE_CHECK_ALLOW_STD_STRING_STREAM
  298. Poco::XML::InputSource input_source{ss};
  299. return {new Poco::Util::XMLConfiguration{&input_source}};
  300. }
  301. void LocalServer::setupUsers()
  302. {
  303. static const char * minimal_default_user_xml =
  304. "<clickhouse>"
  305. " <profiles>"
  306. " <default></default>"
  307. " </profiles>"
  308. " <users>"
  309. " <default>"
  310. " <password></password>"
  311. " <networks>"
  312. " <ip>::/0</ip>"
  313. " </networks>"
  314. " <profile>default</profile>"
  315. " <quota>default</quota>"
  316. " </default>"
  317. " </users>"
  318. " <quotas>"
  319. " <default></default>"
  320. " </quotas>"
  321. "</clickhouse>";
  322. ConfigurationPtr users_config;
  323. auto & access_control = global_context->getAccessControl();
  324. access_control.setNoPasswordAllowed(config().getBool("allow_no_password", true));
  325. access_control.setPlaintextPasswordAllowed(config().getBool("allow_plaintext_password", true));
  326. if (config().has("config-file") || fs::exists("config.xml"))
  327. {
  328. String config_path = config().getString("config-file", "");
  329. bool has_user_directories = config().has("user_directories");
  330. const auto config_dir = fs::path{config_path}.remove_filename().string();
  331. String users_config_path = config().getString("users_config", "");
  332. if (users_config_path.empty() && has_user_directories)
  333. {
  334. users_config_path = config().getString("user_directories.users_xml.path");
  335. if (fs::path(users_config_path).is_relative() && fs::exists(fs::path(config_dir) / users_config_path))
  336. users_config_path = fs::path(config_dir) / users_config_path;
  337. }
  338. if (users_config_path.empty())
  339. users_config = getConfigurationFromXMLString(minimal_default_user_xml);
  340. else
  341. {
  342. ConfigProcessor config_processor(users_config_path);
  343. const auto loaded_config = config_processor.loadConfig();
  344. users_config = loaded_config.configuration;
  345. }
  346. }
  347. else
  348. users_config = getConfigurationFromXMLString(minimal_default_user_xml);
  349. if (users_config)
  350. global_context->setUsersConfig(users_config);
  351. else
  352. throw Exception(ErrorCodes::CANNOT_LOAD_CONFIG, "Can't load config for users");
  353. }
  354. void LocalServer::connect()
  355. {
  356. connection_parameters = ConnectionParameters(config());
  357. connection = LocalConnection::createConnection(
  358. connection_parameters, global_context, need_render_progress, need_render_profile_events, server_display_name);
  359. }
  360. int LocalServer::main(const std::vector<std::string> & /*args*/)
  361. try
  362. {
  363. UseSSL use_ssl;
  364. thread_status.emplace();
  365. StackTrace::setShowAddresses(config().getBool("show_addresses_in_stack_traces", true));
  366. setupSignalHandler();
  367. std::cout << std::fixed << std::setprecision(3);
  368. std::cerr << std::fixed << std::setprecision(3);
  369. /// Try to increase limit on number of open files.
  370. {
  371. rlimit rlim;
  372. if (getrlimit(RLIMIT_NOFILE, &rlim))
  373. throw Poco::Exception("Cannot getrlimit");
  374. if (rlim.rlim_cur < rlim.rlim_max)
  375. {
  376. rlim.rlim_cur = config().getUInt("max_open_files", static_cast<unsigned>(rlim.rlim_max));
  377. int rc = setrlimit(RLIMIT_NOFILE, &rlim);
  378. if (rc != 0)
  379. std::cerr << fmt::format("Cannot set max number of file descriptors to {}. Try to specify max_open_files according to your system limits. error: {}", rlim.rlim_cur, errnoToString()) << '\n';
  380. }
  381. }
  382. #if defined(FUZZING_MODE)
  383. static bool first_time = true;
  384. if (first_time)
  385. {
  386. if (queries_files.empty() && !config().has("query"))
  387. {
  388. std::cerr << "\033[31m" << "ClickHouse compiled in fuzzing mode." << "\033[0m" << std::endl;
  389. std::cerr << "\033[31m" << "You have to provide a query with --query or --queries-file option." << "\033[0m" << std::endl;
  390. std::cerr << "\033[31m" << "The query have to use function getFuzzerData() inside." << "\033[0m" << std::endl;
  391. exit(1);
  392. }
  393. is_interactive = false;
  394. #else
  395. is_interactive = stdin_is_a_tty
  396. && (config().hasOption("interactive")
  397. || (!config().has("query") && !config().has("table-structure") && queries_files.empty() && !config().has("table-file")));
  398. #endif
  399. if (!is_interactive)
  400. {
  401. /// We will terminate process on error
  402. static KillingErrorHandler error_handler;
  403. Poco::ErrorHandler::set(&error_handler);
  404. }
  405. /// Don't initialize DateLUT
  406. registerFunctions();
  407. registerAggregateFunctions();
  408. registerTableFunctions();
  409. registerStorages();
  410. registerDictionaries();
  411. registerDisks(/* global_skip_access_check= */ true);
  412. registerFormats();
  413. processConfig();
  414. initTtyBuffer(toProgressOption(config().getString("progress", "default")));
  415. applyCmdSettings(global_context);
  416. /// try to load user defined executable functions, throw on error and die
  417. try
  418. {
  419. global_context->loadOrReloadUserDefinedExecutableFunctions(config());
  420. }
  421. catch (...)
  422. {
  423. tryLogCurrentException(&logger(), "Caught exception while loading user defined executable functions.");
  424. throw;
  425. }
  426. if (is_interactive)
  427. {
  428. clearTerminal();
  429. showClientVersion();
  430. std::cerr << std::endl;
  431. }
  432. connect();
  433. #ifdef FUZZING_MODE
  434. first_time = false;
  435. }
  436. #endif
  437. String initial_query = getInitialCreateTableQuery();
  438. if (!initial_query.empty())
  439. processQueryText(initial_query);
  440. if (is_interactive && !delayed_interactive)
  441. {
  442. runInteractive();
  443. }
  444. else
  445. {
  446. runNonInteractive();
  447. if (delayed_interactive)
  448. runInteractive();
  449. }
  450. #ifndef FUZZING_MODE
  451. cleanup();
  452. #endif
  453. return Application::EXIT_OK;
  454. }
  455. catch (const DB::Exception & e)
  456. {
  457. cleanup();
  458. bool need_print_stack_trace = config().getBool("stacktrace", false);
  459. std::cerr << getExceptionMessage(e, need_print_stack_trace, true) << std::endl;
  460. return e.code() ? e.code() : -1;
  461. }
  462. catch (...)
  463. {
  464. cleanup();
  465. std::cerr << getCurrentExceptionMessage(false) << std::endl;
  466. return getCurrentExceptionCode();
  467. }
  468. void LocalServer::updateLoggerLevel(const String & logs_level)
  469. {
  470. if (!logging_initialized)
  471. return;
  472. config().setString("logger.level", logs_level);
  473. updateLevels(config(), logger());
  474. }
  475. void LocalServer::processConfig()
  476. {
  477. if (config().has("query") && config().has("queries-file"))
  478. throw Exception(ErrorCodes::BAD_ARGUMENTS, "Options '--query' and '--queries-file' cannot be specified at the same time");
  479. delayed_interactive = config().has("interactive") && (config().has("query") || config().has("queries-file"));
  480. if (is_interactive && !delayed_interactive)
  481. {
  482. if (config().has("multiquery"))
  483. is_multiquery = true;
  484. }
  485. else
  486. {
  487. echo_queries = config().hasOption("echo") || config().hasOption("verbose");
  488. ignore_error = config().getBool("ignore-error", false);
  489. is_multiquery = true;
  490. }
  491. print_stack_trace = config().getBool("stacktrace", false);
  492. const std::string clickhouse_dialect{"clickhouse"};
  493. load_suggestions = (is_interactive || delayed_interactive) && !config().getBool("disable_suggestion", false)
  494. && config().getString("dialect", clickhouse_dialect) == clickhouse_dialect;
  495. auto logging = (config().has("logger.console")
  496. || config().has("logger.level")
  497. || config().has("log-level")
  498. || config().has("send_logs_level")
  499. || config().has("logger.log"));
  500. auto level = config().getString("log-level", "trace");
  501. if (config().has("server_logs_file"))
  502. {
  503. auto poco_logs_level = Poco::Logger::parseLevel(level);
  504. Poco::Logger::root().setLevel(poco_logs_level);
  505. Poco::Logger::root().setChannel(Poco::AutoPtr<Poco::SimpleFileChannel>(new Poco::SimpleFileChannel(server_logs_file)));
  506. logging_initialized = true;
  507. }
  508. else if (logging || is_interactive)
  509. {
  510. config().setString("logger", "logger");
  511. auto log_level_default = is_interactive && !logging ? "none" : level;
  512. config().setString("logger.level", config().getString("log-level", config().getString("send_logs_level", log_level_default)));
  513. buildLoggers(config(), logger(), "clickhouse-local");
  514. logging_initialized = true;
  515. }
  516. else
  517. {
  518. Poco::Logger::root().setLevel("none");
  519. Poco::Logger::root().setChannel(Poco::AutoPtr<Poco::NullChannel>(new Poco::NullChannel()));
  520. logging_initialized = false;
  521. }
  522. shared_context = Context::createShared();
  523. global_context = Context::createGlobal(shared_context.get());
  524. global_context->makeGlobalContext();
  525. global_context->setApplicationType(Context::ApplicationType::LOCAL);
  526. tryInitPath();
  527. Poco::Logger * log = &logger();
  528. /// Maybe useless
  529. if (config().has("macros"))
  530. global_context->setMacros(std::make_unique<Macros>(config(), "macros", log));
  531. format = config().getString("output-format", config().getString("format", is_interactive ? "PrettyCompact" : "TSV"));
  532. insert_format = "Values";
  533. /// Setting value from cmd arg overrides one from config
  534. if (global_context->getSettingsRef().max_insert_block_size.changed)
  535. {
  536. insert_format_max_block_size = global_context->getSettingsRef().max_insert_block_size;
  537. }
  538. else
  539. {
  540. insert_format_max_block_size = config().getUInt64("insert_format_max_block_size",
  541. global_context->getSettingsRef().max_insert_block_size);
  542. }
  543. /// Sets external authenticators config (LDAP, Kerberos).
  544. global_context->setExternalAuthenticatorsConfig(config());
  545. setupUsers();
  546. /// Limit on total number of concurrently executing queries.
  547. /// There is no need for concurrent queries, override max_concurrent_queries.
  548. global_context->getProcessList().setMaxSize(0);
  549. const size_t physical_server_memory = getMemoryAmount();
  550. const double cache_size_to_ram_max_ratio = config().getDouble("cache_size_to_ram_max_ratio", 0.5);
  551. const size_t max_cache_size = static_cast<size_t>(physical_server_memory * cache_size_to_ram_max_ratio);
  552. String uncompressed_cache_policy = config().getString("uncompressed_cache_policy", DEFAULT_UNCOMPRESSED_CACHE_POLICY);
  553. size_t uncompressed_cache_size = config().getUInt64("uncompressed_cache_size", DEFAULT_UNCOMPRESSED_CACHE_MAX_SIZE);
  554. double uncompressed_cache_size_ratio = config().getDouble("uncompressed_cache_size_ratio", DEFAULT_UNCOMPRESSED_CACHE_SIZE_RATIO);
  555. if (uncompressed_cache_size > max_cache_size)
  556. {
  557. uncompressed_cache_size = max_cache_size;
  558. LOG_INFO(log, "Lowered uncompressed cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(uncompressed_cache_size));
  559. }
  560. global_context->setUncompressedCache(uncompressed_cache_policy, uncompressed_cache_size, uncompressed_cache_size_ratio);
  561. String mark_cache_policy = config().getString("mark_cache_policy", DEFAULT_MARK_CACHE_POLICY);
  562. size_t mark_cache_size = config().getUInt64("mark_cache_size", DEFAULT_MARK_CACHE_MAX_SIZE);
  563. double mark_cache_size_ratio = config().getDouble("mark_cache_size_ratio", DEFAULT_MARK_CACHE_SIZE_RATIO);
  564. if (!mark_cache_size)
  565. LOG_ERROR(log, "Too low mark cache size will lead to severe performance degradation.");
  566. if (mark_cache_size > max_cache_size)
  567. {
  568. mark_cache_size = max_cache_size;
  569. LOG_INFO(log, "Lowered mark cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(mark_cache_size));
  570. }
  571. global_context->setMarkCache(mark_cache_policy, mark_cache_size, mark_cache_size_ratio);
  572. String index_uncompressed_cache_policy = config().getString("index_uncompressed_cache_policy", DEFAULT_INDEX_UNCOMPRESSED_CACHE_POLICY);
  573. size_t index_uncompressed_cache_size = config().getUInt64("index_uncompressed_cache_size", DEFAULT_INDEX_UNCOMPRESSED_CACHE_MAX_SIZE);
  574. double index_uncompressed_cache_size_ratio = config().getDouble("index_uncompressed_cache_size_ratio", DEFAULT_INDEX_UNCOMPRESSED_CACHE_SIZE_RATIO);
  575. if (index_uncompressed_cache_size > max_cache_size)
  576. {
  577. index_uncompressed_cache_size = max_cache_size;
  578. LOG_INFO(log, "Lowered index uncompressed cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(uncompressed_cache_size));
  579. }
  580. global_context->setIndexUncompressedCache(index_uncompressed_cache_policy, index_uncompressed_cache_size, index_uncompressed_cache_size_ratio);
  581. String index_mark_cache_policy = config().getString("index_mark_cache_policy", DEFAULT_INDEX_MARK_CACHE_POLICY);
  582. size_t index_mark_cache_size = config().getUInt64("index_mark_cache_size", DEFAULT_INDEX_MARK_CACHE_MAX_SIZE);
  583. double index_mark_cache_size_ratio = config().getDouble("index_mark_cache_size_ratio", DEFAULT_INDEX_MARK_CACHE_SIZE_RATIO);
  584. if (index_mark_cache_size > max_cache_size)
  585. {
  586. index_mark_cache_size = max_cache_size;
  587. LOG_INFO(log, "Lowered index mark cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(uncompressed_cache_size));
  588. }
  589. global_context->setIndexMarkCache(index_mark_cache_policy, index_mark_cache_size, index_mark_cache_size_ratio);
  590. size_t mmap_cache_size = config().getUInt64("mmap_cache_size", DEFAULT_MMAP_CACHE_MAX_SIZE);
  591. if (mmap_cache_size > max_cache_size)
  592. {
  593. mmap_cache_size = max_cache_size;
  594. LOG_INFO(log, "Lowered mmap file cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(uncompressed_cache_size));
  595. }
  596. global_context->setMMappedFileCache(mmap_cache_size);
  597. /// Initialize a dummy query cache.
  598. global_context->setQueryCache(0, 0, 0, 0);
  599. #if USE_EMBEDDED_COMPILER
  600. size_t compiled_expression_cache_max_size_in_bytes = config().getUInt64("compiled_expression_cache_size", DEFAULT_COMPILED_EXPRESSION_CACHE_MAX_SIZE);
  601. size_t compiled_expression_cache_max_elements = config().getUInt64("compiled_expression_cache_elements_size", DEFAULT_COMPILED_EXPRESSION_CACHE_MAX_ENTRIES);
  602. CompiledExpressionCacheFactory::instance().init(compiled_expression_cache_max_size_in_bytes, compiled_expression_cache_max_elements);
  603. #endif
  604. /// NOTE: it is important to apply any overrides before
  605. /// setDefaultProfiles() calls since it will copy current context (i.e.
  606. /// there is separate context for Buffer tables).
  607. applySettingsOverridesForLocal(global_context);
  608. applyCmdOptions(global_context);
  609. /// Load global settings from default_profile and system_profile.
  610. global_context->setDefaultProfiles(config());
  611. /// We load temporary database first, because projections need it.
  612. DatabaseCatalog::instance().initializeAndLoadTemporaryDatabase();
  613. /** Init dummy default DB
  614. * NOTE: We force using isolated default database to avoid conflicts with default database from server environment
  615. * Otherwise, metadata of temporary File(format, EXPLICIT_PATH) tables will pollute metadata/ directory;
  616. * if such tables will not be dropped, clickhouse-server will not be able to load them due to security reasons.
  617. */
  618. std::string default_database = config().getString("default_database", "_local");
  619. DatabaseCatalog::instance().attachDatabase(default_database, createClickHouseLocalDatabaseOverlay(default_database, global_context));
  620. global_context->setCurrentDatabase(default_database);
  621. if (config().has("path"))
  622. {
  623. String path = global_context->getPath();
  624. /// Lock path directory before read
  625. status.emplace(fs::path(path) / "status", StatusFile::write_full_info);
  626. LOG_DEBUG(log, "Loading metadata from {}", path);
  627. loadMetadataSystem(global_context);
  628. attachSystemTablesLocal(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::SYSTEM_DATABASE));
  629. attachInformationSchema(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::INFORMATION_SCHEMA));
  630. attachInformationSchema(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::INFORMATION_SCHEMA_UPPERCASE));
  631. startupSystemTables();
  632. if (!config().has("only-system-tables"))
  633. {
  634. DatabaseCatalog::instance().createBackgroundTasks();
  635. loadMetadata(global_context);
  636. DatabaseCatalog::instance().startupBackgroundCleanup();
  637. }
  638. /// For ClickHouse local if path is not set the loader will be disabled.
  639. global_context->getUserDefinedSQLObjectsLoader().loadObjects();
  640. LOG_DEBUG(log, "Loaded metadata.");
  641. }
  642. else if (!config().has("no-system-tables"))
  643. {
  644. attachSystemTablesLocal(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::SYSTEM_DATABASE));
  645. attachInformationSchema(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::INFORMATION_SCHEMA));
  646. attachInformationSchema(global_context, *createMemoryDatabaseIfNotExists(global_context, DatabaseCatalog::INFORMATION_SCHEMA_UPPERCASE));
  647. }
  648. server_display_name = config().getString("display_name", getFQDNOrHostName());
  649. prompt_by_server_display_name = config().getRawString("prompt_by_server_display_name.default", "{display_name} :) ");
  650. std::map<String, String> prompt_substitutions{{"display_name", server_display_name}};
  651. for (const auto & [key, value] : prompt_substitutions)
  652. boost::replace_all(prompt_by_server_display_name, "{" + key + "}", value);
  653. global_context->setQueryKindInitial();
  654. global_context->setQueryKind(query_kind);
  655. }
  656. [[ maybe_unused ]] static std::string getHelpHeader()
  657. {
  658. return
  659. "usage: clickhouse-local [initial table definition] [--query <query>]\n"
  660. "clickhouse-local allows to execute SQL queries on your data files via single command line call."
  661. " To do so, initially you need to define your data source and its format."
  662. " After you can execute your SQL queries in usual manner.\n"
  663. "There are two ways to define initial table keeping your data."
  664. " Either just in first query like this:\n"
  665. " CREATE TABLE <table> (<structure>) ENGINE = File(<input-format>, <file>);\n"
  666. "Either through corresponding command line parameters --table --structure --input-format and --file.";
  667. }
  668. [[ maybe_unused ]] static std::string getHelpFooter()
  669. {
  670. return
  671. "Example printing memory used by each Unix user:\n"
  672. "ps aux | tail -n +2 | awk '{ printf(\"%s\\t%s\\n\", $1, $4) }' | "
  673. "clickhouse-local -S \"user String, mem Float64\" -q"
  674. " \"SELECT user, round(sum(mem), 2) as mem_total FROM table GROUP BY user ORDER"
  675. " BY mem_total DESC FORMAT PrettyCompact\"";
  676. }
  677. void LocalServer::printHelpMessage([[maybe_unused]] const OptionsDescription & options_description)
  678. {
  679. #if defined(FUZZING_MODE)
  680. std::cout <<
  681. "usage: clickhouse <clickhouse-local arguments> -- <libfuzzer arguments>\n"
  682. "Note: It is important not to use only one letter keys with single dash for \n"
  683. "for clickhouse-local arguments. It may work incorrectly.\n"
  684. "ClickHouse is build with coverage guided fuzzer (libfuzzer) inside it.\n"
  685. "You have to provide a query which contains getFuzzerData function.\n"
  686. "This will take the data from fuzzing engine, pass it to getFuzzerData function and execute a query.\n"
  687. "Each time the data will be different, and it will last until some segfault or sanitizer assertion is found. \n";
  688. #else
  689. std::cout << getHelpHeader() << "\n";
  690. std::cout << options_description.main_description.value() << "\n";
  691. std::cout << getHelpFooter() << "\n";
  692. #endif
  693. }
  694. void LocalServer::addOptions(OptionsDescription & options_description)
  695. {
  696. options_description.main_description->add_options()
  697. ("table,N", po::value<std::string>(), "name of the initial table")
  698. /// If structure argument is omitted then initial query is not generated
  699. ("structure,S", po::value<std::string>(), "structure of the initial table (list of column and type names)")
  700. ("file,f", po::value<std::string>(), "path to file with data of the initial table (stdin if not specified)")
  701. ("input-format", po::value<std::string>(), "input format of the initial table data")
  702. ("output-format", po::value<std::string>(), "default output format")
  703. ("logger.console", po::value<bool>()->implicit_value(true), "Log to console")
  704. ("logger.log", po::value<std::string>(), "Log file name")
  705. ("logger.level", po::value<std::string>(), "Log level")
  706. ("no-system-tables", "do not attach system tables (better startup time)")
  707. ("path", po::value<std::string>(), "Storage path")
  708. ("only-system-tables", "attach only system tables from specified path")
  709. ("top_level_domains_path", po::value<std::string>(), "Path to lists with custom TLDs")
  710. ;
  711. }
  712. void LocalServer::applyCmdSettings(ContextMutablePtr context)
  713. {
  714. context->applySettingsChanges(cmd_settings.changes());
  715. }
  716. void LocalServer::applyCmdOptions(ContextMutablePtr context)
  717. {
  718. context->setDefaultFormat(config().getString("output-format", config().getString("format", is_interactive ? "PrettyCompact" : "TSV")));
  719. applyCmdSettings(context);
  720. }
  721. void LocalServer::processOptions(const OptionsDescription &, const CommandLineOptions & options, const std::vector<Arguments> &, const std::vector<Arguments> &)
  722. {
  723. if (options.count("table"))
  724. config().setString("table-name", options["table"].as<std::string>());
  725. if (options.count("file"))
  726. config().setString("table-file", options["file"].as<std::string>());
  727. if (options.count("structure"))
  728. config().setString("table-structure", options["structure"].as<std::string>());
  729. if (options.count("no-system-tables"))
  730. config().setBool("no-system-tables", true);
  731. if (options.count("only-system-tables"))
  732. config().setBool("only-system-tables", true);
  733. if (options.count("input-format"))
  734. config().setString("table-data-format", options["input-format"].as<std::string>());
  735. if (options.count("output-format"))
  736. config().setString("output-format", options["output-format"].as<std::string>());
  737. if (options.count("logger.console"))
  738. config().setBool("logger.console", options["logger.console"].as<bool>());
  739. if (options.count("logger.log"))
  740. config().setString("logger.log", options["logger.log"].as<std::string>());
  741. if (options.count("logger.level"))
  742. config().setString("logger.level", options["logger.level"].as<std::string>());
  743. if (options.count("send_logs_level"))
  744. config().setString("send_logs_level", options["send_logs_level"].as<std::string>());
  745. }
  746. void LocalServer::readArguments(int argc, char ** argv, Arguments & common_arguments, std::vector<Arguments> &, std::vector<Arguments> &)
  747. {
  748. for (int arg_num = 1; arg_num < argc; ++arg_num)
  749. {
  750. std::string_view arg = argv[arg_num];
  751. if (arg == "--multiquery" && (arg_num + 1) < argc && !std::string_view(argv[arg_num + 1]).starts_with('-'))
  752. {
  753. /// Transform the abbreviated syntax '--multiquery <SQL>' into the full syntax '--multiquery -q <SQL>'
  754. ++arg_num;
  755. arg = argv[arg_num];
  756. addMultiquery(arg, common_arguments);
  757. }
  758. else
  759. common_arguments.emplace_back(arg);
  760. }
  761. }
  762. }
  763. #pragma GCC diagnostic ignored "-Wunused-function"
  764. #pragma GCC diagnostic ignored "-Wmissing-declarations"
  765. int mainEntryClickHouseLocal(int argc, char ** argv)
  766. {
  767. try
  768. {
  769. DB::LocalServer app;
  770. app.init(argc, argv);
  771. return app.run();
  772. }
  773. catch (const DB::Exception & e)
  774. {
  775. std::cerr << DB::getExceptionMessage(e, false) << std::endl;
  776. auto code = DB::getCurrentExceptionCode();
  777. return code ? code : 1;
  778. }
  779. catch (const boost::program_options::error & e)
  780. {
  781. std::cerr << "Bad arguments: " << e.what() << std::endl;
  782. return DB::ErrorCodes::BAD_ARGUMENTS;
  783. }
  784. catch (...)
  785. {
  786. std::cerr << DB::getCurrentExceptionMessage(true) << '\n';
  787. auto code = DB::getCurrentExceptionCode();
  788. return code ? code : 1;
  789. }
  790. }
  791. #if defined(FUZZING_MODE)
  792. std::optional<DB::LocalServer> fuzz_app;
  793. extern "C" int LLVMFuzzerInitialize(int * pargc, char *** pargv)
  794. {
  795. int & argc = *pargc;
  796. char ** argv = *pargv;
  797. /// As a user you can add flags to clickhouse binary in fuzzing mode as follows
  798. /// clickhouse <set of clickhouse-local specific flag> -- <set of libfuzzer flags>
  799. /// Calculate the position of delimiter "--" that separates arguments
  800. /// of clickhouse-local and libfuzzer
  801. int pos_delim = argc;
  802. for (int i = 0; i < argc; ++i)
  803. {
  804. if (strcmp(argv[i], "--") == 0)
  805. {
  806. pos_delim = i;
  807. break;
  808. }
  809. }
  810. /// Initialize clickhouse-local app
  811. fuzz_app.emplace();
  812. fuzz_app->init(pos_delim, argv);
  813. /// We will leave clickhouse-local specific arguments as is, because libfuzzer will ignore
  814. /// all keys starting with --
  815. return 0;
  816. }
  817. extern "C" int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size)
  818. try
  819. {
  820. auto input = String(reinterpret_cast<const char *>(data), size);
  821. DB::FunctionGetFuzzerData::update(input);
  822. fuzz_app->run();
  823. return 0;
  824. }
  825. catch (...)
  826. {
  827. return 1;
  828. }
  829. #endif