asyncclient.py 38 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672
  1. import asyncio
  2. import io
  3. import os
  4. from concurrent.futures.thread import ThreadPoolExecutor
  5. from datetime import tzinfo
  6. from typing import Optional, Union, Dict, Any, Sequence, Iterable, Generator, BinaryIO
  7. from clickhouse_connect.driver.client import Client
  8. from clickhouse_connect.driver.common import StreamContext
  9. from clickhouse_connect.driver.httpclient import HttpClient
  10. from clickhouse_connect.driver.external import ExternalData
  11. from clickhouse_connect.driver.query import QueryContext, QueryResult
  12. from clickhouse_connect.driver.summary import QuerySummary
  13. from clickhouse_connect.datatypes.base import ClickHouseType
  14. from clickhouse_connect.driver.insert import InsertContext
  15. # pylint: disable=too-many-public-methods,too-many-instance-attributes,too-many-arguments,too-many-positional-arguments,too-many-locals
  16. class AsyncClient:
  17. """
  18. AsyncClient is a wrapper around the ClickHouse Client object that allows for async calls to the ClickHouse server.
  19. Internally, each of the methods that uses IO is wrapped in a call to EventLoop.run_in_executor.
  20. """
  21. def __init__(self, *, client: Client, executor_threads: int = 0):
  22. if isinstance(client, HttpClient):
  23. client.headers['User-Agent'] = client.headers['User-Agent'].replace('mode:sync;', 'mode:async;')
  24. self.client = client
  25. if executor_threads == 0:
  26. executor_threads = min(32, (os.cpu_count() or 1) + 4) # Mimic the default behavior
  27. self.executor = ThreadPoolExecutor(max_workers=executor_threads)
  28. def set_client_setting(self, key, value):
  29. """
  30. Set a clickhouse setting for the client after initialization. If a setting is not recognized by ClickHouse,
  31. or the setting is identified as "read_only", this call will either throw a Programming exception or attempt
  32. to send the setting anyway based on the common setting 'invalid_setting_action'.
  33. :param key: ClickHouse setting name
  34. :param value: ClickHouse setting value
  35. """
  36. self.client.set_client_setting(key=key, value=value)
  37. def get_client_setting(self, key) -> Optional[str]:
  38. """
  39. :param key: The setting key
  40. :return: The string value of the setting, if it exists, or None
  41. """
  42. return self.client.get_client_setting(key=key)
  43. def min_version(self, version_str: str) -> bool:
  44. """
  45. Determine whether the connected server is at least the submitted version
  46. For Altinity Stable versions like 22.8.15.25.altinitystable
  47. the last condition in the first list comprehension expression is added
  48. :param version_str: A version string consisting of up to 4 integers delimited by dots
  49. :return: True if version_str is greater than the server_version, False if less than
  50. """
  51. return self.client.min_version(version_str)
  52. def close(self):
  53. """
  54. Subclass implementation to close the connection to the server/deallocate the client
  55. """
  56. self.client.close()
  57. async def query(self,
  58. query: Optional[str] = None,
  59. parameters: Optional[Union[Sequence, Dict[str, Any]]] = None,
  60. settings: Optional[Dict[str, Any]] = None,
  61. query_formats: Optional[Dict[str, str]] = None,
  62. column_formats: Optional[Dict[str, Union[str, Dict[str, str]]]] = None,
  63. encoding: Optional[str] = None,
  64. use_none: Optional[bool] = None,
  65. column_oriented: Optional[bool] = None,
  66. use_numpy: Optional[bool] = None,
  67. max_str_len: Optional[int] = None,
  68. context: QueryContext = None,
  69. query_tz: Optional[Union[str, tzinfo]] = None,
  70. column_tzs: Optional[Dict[str, Union[str, tzinfo]]] = None,
  71. external_data: Optional[ExternalData] = None) -> QueryResult:
  72. """
  73. Main query method for SELECT, DESCRIBE and other SQL statements that return a result matrix.
  74. For parameters, see the create_query_context method.
  75. :return: QueryResult -- data and metadata from response
  76. """
  77. def _query():
  78. return self.client.query(query=query, parameters=parameters, settings=settings, query_formats=query_formats,
  79. column_formats=column_formats, encoding=encoding, use_none=use_none,
  80. column_oriented=column_oriented, use_numpy=use_numpy, max_str_len=max_str_len,
  81. context=context, query_tz=query_tz, column_tzs=column_tzs,
  82. external_data=external_data)
  83. loop = asyncio.get_running_loop()
  84. result = await loop.run_in_executor(self.executor, _query)
  85. return result
  86. async def query_column_block_stream(self,
  87. query: Optional[str] = None,
  88. parameters: Optional[Union[Sequence, Dict[str, Any]]] = None,
  89. settings: Optional[Dict[str, Any]] = None,
  90. query_formats: Optional[Dict[str, str]] = None,
  91. column_formats: Optional[Dict[str, Union[str, Dict[str, str]]]] = None,
  92. encoding: Optional[str] = None,
  93. use_none: Optional[bool] = None,
  94. context: QueryContext = None,
  95. query_tz: Optional[Union[str, tzinfo]] = None,
  96. column_tzs: Optional[Dict[str, Union[str, tzinfo]]] = None,
  97. external_data: Optional[ExternalData] = None) -> StreamContext:
  98. """
  99. Variation of main query method that returns a stream of column oriented blocks.
  100. For parameters, see the create_query_context method.
  101. :return: StreamContext -- Iterable stream context that returns column oriented blocks
  102. """
  103. def _query_column_block_stream():
  104. return self.client.query_column_block_stream(query=query, parameters=parameters, settings=settings,
  105. query_formats=query_formats, column_formats=column_formats,
  106. encoding=encoding, use_none=use_none, context=context,
  107. query_tz=query_tz, column_tzs=column_tzs,
  108. external_data=external_data)
  109. loop = asyncio.get_running_loop()
  110. result = await loop.run_in_executor(self.executor, _query_column_block_stream)
  111. return result
  112. async def query_row_block_stream(self,
  113. query: Optional[str] = None,
  114. parameters: Optional[Union[Sequence, Dict[str, Any]]] = None,
  115. settings: Optional[Dict[str, Any]] = None,
  116. query_formats: Optional[Dict[str, str]] = None,
  117. column_formats: Optional[Dict[str, Union[str, Dict[str, str]]]] = None,
  118. encoding: Optional[str] = None,
  119. use_none: Optional[bool] = None,
  120. context: QueryContext = None,
  121. query_tz: Optional[Union[str, tzinfo]] = None,
  122. column_tzs: Optional[Dict[str, Union[str, tzinfo]]] = None,
  123. external_data: Optional[ExternalData] = None) -> StreamContext:
  124. """
  125. Variation of main query method that returns a stream of row oriented blocks.
  126. For parameters, see the create_query_context method.
  127. :return: StreamContext -- Iterable stream context that returns blocks of rows
  128. """
  129. def _query_row_block_stream():
  130. return self.client.query_row_block_stream(query=query, parameters=parameters, settings=settings,
  131. query_formats=query_formats, column_formats=column_formats,
  132. encoding=encoding, use_none=use_none, context=context,
  133. query_tz=query_tz, column_tzs=column_tzs,
  134. external_data=external_data)
  135. loop = asyncio.get_running_loop()
  136. result = await loop.run_in_executor(self.executor, _query_row_block_stream)
  137. return result
  138. async def query_rows_stream(self,
  139. query: Optional[str] = None,
  140. parameters: Optional[Union[Sequence, Dict[str, Any]]] = None,
  141. settings: Optional[Dict[str, Any]] = None,
  142. query_formats: Optional[Dict[str, str]] = None,
  143. column_formats: Optional[Dict[str, Union[str, Dict[str, str]]]] = None,
  144. encoding: Optional[str] = None,
  145. use_none: Optional[bool] = None,
  146. context: QueryContext = None,
  147. query_tz: Optional[Union[str, tzinfo]] = None,
  148. column_tzs: Optional[Dict[str, Union[str, tzinfo]]] = None,
  149. external_data: Optional[ExternalData] = None) -> StreamContext:
  150. """
  151. Variation of main query method that returns a stream of row oriented blocks.
  152. For parameters, see the create_query_context method.
  153. :return: StreamContext -- Iterable stream context that returns blocks of rows
  154. """
  155. def _query_rows_stream():
  156. return self.client.query_rows_stream(query=query, parameters=parameters, settings=settings,
  157. query_formats=query_formats, column_formats=column_formats,
  158. encoding=encoding, use_none=use_none, context=context,
  159. query_tz=query_tz, column_tzs=column_tzs,
  160. external_data=external_data)
  161. loop = asyncio.get_running_loop()
  162. result = await loop.run_in_executor(self.executor, _query_rows_stream)
  163. return result
  164. async def raw_query(self,
  165. query: str,
  166. parameters: Optional[Union[Sequence, Dict[str, Any]]] = None,
  167. settings: Optional[Dict[str, Any]] = None,
  168. fmt: str = None,
  169. use_database: bool = True,
  170. external_data: Optional[ExternalData] = None) -> bytes:
  171. """
  172. Query method that simply returns the raw ClickHouse format bytes.
  173. :param query: Query statement/format string
  174. :param parameters: Optional dictionary used to format the query
  175. :param settings: Optional dictionary of ClickHouse settings (key/string values)
  176. :param fmt: ClickHouse output format
  177. :param use_database Send the database parameter to ClickHouse so the command will be executed in the client
  178. database context
  179. :param external_data External data to send with the query
  180. :return: bytes representing raw ClickHouse return value based on format
  181. """
  182. def _raw_query():
  183. return self.client.raw_query(query=query, parameters=parameters, settings=settings, fmt=fmt,
  184. use_database=use_database, external_data=external_data)
  185. loop = asyncio.get_running_loop()
  186. result = await loop.run_in_executor(self.executor, _raw_query)
  187. return result
  188. async def raw_stream(self, query: str,
  189. parameters: Optional[Union[Sequence, Dict[str, Any]]] = None,
  190. settings: Optional[Dict[str, Any]] = None,
  191. fmt: str = None,
  192. use_database: bool = True,
  193. external_data: Optional[ExternalData] = None) -> io.IOBase:
  194. """
  195. Query method that returns the result as an io.IOBase iterator.
  196. :param query: Query statement/format string
  197. :param parameters: Optional dictionary used to format the query
  198. :param settings: Optional dictionary of ClickHouse settings (key/string values)
  199. :param fmt: ClickHouse output format
  200. :param use_database Send the database parameter to ClickHouse so the command will be executed in the client
  201. database context
  202. :param external_data External data to send with the query
  203. :return: io.IOBase stream/iterator for the result
  204. """
  205. def _raw_stream():
  206. return self.client.raw_stream(query=query, parameters=parameters, settings=settings, fmt=fmt,
  207. use_database=use_database, external_data=external_data)
  208. loop = asyncio.get_running_loop()
  209. result = await loop.run_in_executor(self.executor, _raw_stream)
  210. return result
  211. async def query_np(self,
  212. query: Optional[str] = None,
  213. parameters: Optional[Union[Sequence, Dict[str, Any]]] = None,
  214. settings: Optional[Dict[str, Any]] = None,
  215. query_formats: Optional[Dict[str, str]] = None,
  216. column_formats: Optional[Dict[str, str]] = None,
  217. encoding: Optional[str] = None,
  218. use_none: Optional[bool] = None,
  219. max_str_len: Optional[int] = None,
  220. context: QueryContext = None,
  221. external_data: Optional[ExternalData] = None):
  222. """
  223. Query method that returns the results as a numpy array.
  224. For parameter values, see the create_query_context method.
  225. :return: Numpy array representing the result set
  226. """
  227. def _query_np():
  228. return self.client.query_np(query=query, parameters=parameters, settings=settings,
  229. query_formats=query_formats, column_formats=column_formats, encoding=encoding,
  230. use_none=use_none, max_str_len=max_str_len, context=context,
  231. external_data=external_data)
  232. loop = asyncio.get_running_loop()
  233. result = await loop.run_in_executor(self.executor, _query_np)
  234. return result
  235. async def query_np_stream(self,
  236. query: Optional[str] = None,
  237. parameters: Optional[Union[Sequence, Dict[str, Any]]] = None,
  238. settings: Optional[Dict[str, Any]] = None,
  239. query_formats: Optional[Dict[str, str]] = None,
  240. column_formats: Optional[Dict[str, str]] = None,
  241. encoding: Optional[str] = None,
  242. use_none: Optional[bool] = None,
  243. max_str_len: Optional[int] = None,
  244. context: QueryContext = None,
  245. external_data: Optional[ExternalData] = None) -> StreamContext:
  246. """
  247. Query method that returns the results as a stream of numpy arrays.
  248. For parameter values, see the create_query_context method.
  249. :return: Generator that yield a numpy array per block representing the result set
  250. """
  251. def _query_np_stream():
  252. return self.client.query_np_stream(query=query, parameters=parameters, settings=settings,
  253. query_formats=query_formats, column_formats=column_formats,
  254. encoding=encoding, use_none=use_none, max_str_len=max_str_len,
  255. context=context, external_data=external_data)
  256. loop = asyncio.get_running_loop()
  257. result = await loop.run_in_executor(self.executor, _query_np_stream)
  258. return result
  259. async def query_df(self,
  260. query: Optional[str] = None,
  261. parameters: Optional[Union[Sequence, Dict[str, Any]]] = None,
  262. settings: Optional[Dict[str, Any]] = None,
  263. query_formats: Optional[Dict[str, str]] = None,
  264. column_formats: Optional[Dict[str, str]] = None,
  265. encoding: Optional[str] = None,
  266. use_none: Optional[bool] = None,
  267. max_str_len: Optional[int] = None,
  268. use_na_values: Optional[bool] = None,
  269. query_tz: Optional[str] = None,
  270. column_tzs: Optional[Dict[str, Union[str, tzinfo]]] = None,
  271. context: QueryContext = None,
  272. external_data: Optional[ExternalData] = None,
  273. use_extended_dtypes: Optional[bool] = None):
  274. """
  275. Query method that results the results as a pandas dataframe.
  276. For parameter values, see the create_query_context method.
  277. :return: Pandas dataframe representing the result set
  278. """
  279. def _query_df():
  280. return self.client.query_df(query=query, parameters=parameters, settings=settings,
  281. query_formats=query_formats, column_formats=column_formats, encoding=encoding,
  282. use_none=use_none, max_str_len=max_str_len, use_na_values=use_na_values,
  283. query_tz=query_tz, column_tzs=column_tzs, context=context,
  284. external_data=external_data, use_extended_dtypes=use_extended_dtypes)
  285. loop = asyncio.get_running_loop()
  286. result = await loop.run_in_executor(self.executor, _query_df)
  287. return result
  288. async def query_df_stream(self,
  289. query: Optional[str] = None,
  290. parameters: Optional[Union[Sequence, Dict[str, Any]]] = None,
  291. settings: Optional[Dict[str, Any]] = None,
  292. query_formats: Optional[Dict[str, str]] = None,
  293. column_formats: Optional[Dict[str, str]] = None,
  294. encoding: Optional[str] = None,
  295. use_none: Optional[bool] = None,
  296. max_str_len: Optional[int] = None,
  297. use_na_values: Optional[bool] = None,
  298. query_tz: Optional[str] = None,
  299. column_tzs: Optional[Dict[str, Union[str, tzinfo]]] = None,
  300. context: QueryContext = None,
  301. external_data: Optional[ExternalData] = None,
  302. use_extended_dtypes: Optional[bool] = None) -> StreamContext:
  303. """
  304. Query method that returns the results as a StreamContext.
  305. For parameter values, see the create_query_context method.
  306. :return: Generator that yields a Pandas dataframe per block representing the result set
  307. """
  308. def _query_df_stream():
  309. return self.client.query_df_stream(query=query, parameters=parameters, settings=settings,
  310. query_formats=query_formats, column_formats=column_formats,
  311. encoding=encoding,
  312. use_none=use_none, max_str_len=max_str_len, use_na_values=use_na_values,
  313. query_tz=query_tz, column_tzs=column_tzs, context=context,
  314. external_data=external_data, use_extended_dtypes=use_extended_dtypes)
  315. loop = asyncio.get_running_loop()
  316. result = await loop.run_in_executor(self.executor, _query_df_stream)
  317. return result
  318. def create_query_context(self,
  319. query: Optional[Union[str, bytes]] = None,
  320. parameters: Optional[Union[Sequence, Dict[str, Any]]] = None,
  321. settings: Optional[Dict[str, Any]] = None,
  322. query_formats: Optional[Dict[str, str]] = None,
  323. column_formats: Optional[Dict[str, Union[str, Dict[str, str]]]] = None,
  324. encoding: Optional[str] = None,
  325. use_none: Optional[bool] = None,
  326. column_oriented: Optional[bool] = None,
  327. use_numpy: Optional[bool] = False,
  328. max_str_len: Optional[int] = 0,
  329. context: Optional[QueryContext] = None,
  330. query_tz: Optional[Union[str, tzinfo]] = None,
  331. column_tzs: Optional[Dict[str, Union[str, tzinfo]]] = None,
  332. use_na_values: Optional[bool] = None,
  333. streaming: bool = False,
  334. as_pandas: bool = False,
  335. external_data: Optional[ExternalData] = None,
  336. use_extended_dtypes: Optional[bool] = None) -> QueryContext:
  337. """
  338. Creates or updates a reusable QueryContext object
  339. :param query: Query statement/format string
  340. :param parameters: Optional dictionary used to format the query
  341. :param settings: Optional dictionary of ClickHouse settings (key/string values)
  342. :param query_formats: See QueryContext __init__ docstring
  343. :param column_formats: See QueryContext __init__ docstring
  344. :param encoding: See QueryContext __init__ docstring
  345. :param use_none: Use None for ClickHouse NULL instead of default values. Note that using None in Numpy
  346. arrays will force the numpy array dtype to 'object', which is often inefficient. This effect also
  347. will impact the performance of Pandas dataframes.
  348. :param column_oriented: Deprecated. Controls orientation of the QueryResult result_set property
  349. :param use_numpy: Return QueryResult columns as one-dimensional numpy arrays
  350. :param max_str_len: Limit returned ClickHouse String values to this length, which allows a Numpy
  351. structured array even with ClickHouse variable length String columns. If 0, Numpy arrays for
  352. String columns will always be object arrays
  353. :param context: An existing QueryContext to be updated with any provided parameter values
  354. :param query_tz Either a string or a pytz tzinfo object. (Strings will be converted to tzinfo objects).
  355. Values for any DateTime or DateTime64 column in the query will be converted to Python datetime.datetime
  356. objects with the selected timezone
  357. :param column_tzs A dictionary of column names to tzinfo objects (or strings that will be converted to
  358. tzinfo objects). The timezone will be applied to datetime objects returned in the query
  359. :param use_na_values: Deprecated alias for use_advanced_dtypes
  360. :param as_pandas Return the result columns as pandas.Series objects
  361. :param streaming Marker used to correctly configure streaming queries
  362. :param external_data ClickHouse "external data" to send with query
  363. :param use_extended_dtypes: Only relevant to Pandas Dataframe queries. Use Pandas "missing types", such as
  364. pandas.NA and pandas.NaT for ClickHouse NULL values, as well as extended Pandas dtypes such as IntegerArray
  365. and StringArray. Defaulted to True for query_df methods
  366. :return: Reusable QueryContext
  367. """
  368. return self.client.create_query_context(query=query, parameters=parameters, settings=settings,
  369. query_formats=query_formats, column_formats=column_formats,
  370. encoding=encoding, use_none=use_none,
  371. column_oriented=column_oriented,
  372. use_numpy=use_numpy, max_str_len=max_str_len, context=context,
  373. query_tz=query_tz, column_tzs=column_tzs,
  374. use_na_values=use_na_values,
  375. streaming=streaming, as_pandas=as_pandas,
  376. external_data=external_data,
  377. use_extended_dtypes=use_extended_dtypes)
  378. async def query_arrow(self,
  379. query: str,
  380. parameters: Optional[Union[Sequence, Dict[str, Any]]] = None,
  381. settings: Optional[Dict[str, Any]] = None,
  382. use_strings: Optional[bool] = None,
  383. external_data: Optional[ExternalData] = None):
  384. """
  385. Query method using the ClickHouse Arrow format to return a PyArrow table
  386. :param query: Query statement/format string
  387. :param parameters: Optional dictionary used to format the query
  388. :param settings: Optional dictionary of ClickHouse settings (key/string values)
  389. :param use_strings: Convert ClickHouse String type to Arrow string type (instead of binary)
  390. :param external_data ClickHouse "external data" to send with query
  391. :return: PyArrow.Table
  392. """
  393. def _query_arrow():
  394. return self.client.query_arrow(query=query, parameters=parameters, settings=settings,
  395. use_strings=use_strings, external_data=external_data)
  396. loop = asyncio.get_running_loop()
  397. result = await loop.run_in_executor(self.executor, _query_arrow)
  398. return result
  399. async def query_arrow_stream(self,
  400. query: str,
  401. parameters: Optional[Union[Sequence, Dict[str, Any]]] = None,
  402. settings: Optional[Dict[str, Any]] = None,
  403. use_strings: Optional[bool] = None,
  404. external_data: Optional[ExternalData] = None) -> StreamContext:
  405. """
  406. Query method that returns the results as a stream of Arrow tables
  407. :param query: Query statement/format string
  408. :param parameters: Optional dictionary used to format the query
  409. :param settings: Optional dictionary of ClickHouse settings (key/string values)
  410. :param use_strings: Convert ClickHouse String type to Arrow string type (instead of binary)
  411. :param external_data ClickHouse "external data" to send with query
  412. :return: Generator that yields a PyArrow.Table for per block representing the result set
  413. """
  414. def _query_arrow_stream():
  415. return self.client.query_arrow_stream(query=query, parameters=parameters, settings=settings,
  416. use_strings=use_strings, external_data=external_data)
  417. loop = asyncio.get_running_loop()
  418. result = await loop.run_in_executor(self.executor, _query_arrow_stream)
  419. return result
  420. async def command(self,
  421. cmd: str,
  422. parameters: Optional[Union[Sequence, Dict[str, Any]]] = None,
  423. data: Union[str, bytes] = None,
  424. settings: Dict[str, Any] = None,
  425. use_database: bool = True,
  426. external_data: Optional[ExternalData] = None) -> Union[str, int, Sequence[str], QuerySummary]:
  427. """
  428. Client method that returns a single value instead of a result set
  429. :param cmd: ClickHouse query/command as a python format string
  430. :param parameters: Optional dictionary of key/values pairs to be formatted
  431. :param data: Optional 'data' for the command (for INSERT INTO in particular)
  432. :param settings: Optional dictionary of ClickHouse settings (key/string values)
  433. :param use_database: Send the database parameter to ClickHouse so the command will be executed in the client
  434. database context. Otherwise, no database will be specified with the command. This is useful for determining
  435. the default user database
  436. :param external_data ClickHouse "external data" to send with command/query
  437. :return: Decoded response from ClickHouse as either a string, int, or sequence of strings, or QuerySummary
  438. if no data returned
  439. """
  440. def _command():
  441. return self.client.command(cmd=cmd, parameters=parameters, data=data, settings=settings,
  442. use_database=use_database, external_data=external_data)
  443. loop = asyncio.get_running_loop()
  444. result = await loop.run_in_executor(self.executor, _command)
  445. return result
  446. async def ping(self) -> bool:
  447. """
  448. Validate the connection, does not throw an Exception (see debug logs)
  449. :return: ClickHouse server is up and reachable
  450. """
  451. def _ping():
  452. return self.client.ping()
  453. loop = asyncio.get_running_loop()
  454. result = await loop.run_in_executor(self.executor, _ping)
  455. return result
  456. async def insert(self,
  457. table: Optional[str] = None,
  458. data: Sequence[Sequence[Any]] = None,
  459. column_names: Union[str, Iterable[str]] = '*',
  460. database: Optional[str] = None,
  461. column_types: Sequence[ClickHouseType] = None,
  462. column_type_names: Sequence[str] = None,
  463. column_oriented: bool = False,
  464. settings: Optional[Dict[str, Any]] = None,
  465. context: InsertContext = None) -> QuerySummary:
  466. """
  467. Method to insert multiple rows/data matrix of native Python objects. If context is specified arguments
  468. other than data are ignored
  469. :param table: Target table
  470. :param data: Sequence of sequences of Python data
  471. :param column_names: Ordered list of column names or '*' if column types should be retrieved from the
  472. ClickHouse table definition
  473. :param database: Target database -- will use client default database if not specified.
  474. :param column_types: ClickHouse column types. If set then column data does not need to be retrieved from
  475. the server
  476. :param column_type_names: ClickHouse column type names. If set then column data does not need to be
  477. retrieved from the server
  478. :param column_oriented: If true the data is already "pivoted" in column form
  479. :param settings: Optional dictionary of ClickHouse settings (key/string values)
  480. :param context: Optional reusable insert context to allow repeated inserts into the same table with
  481. different data batches
  482. :return: QuerySummary with summary information, throws exception if insert fails
  483. """
  484. def _insert():
  485. return self.client.insert(table=table, data=data, column_names=column_names, database=database,
  486. column_types=column_types, column_type_names=column_type_names,
  487. column_oriented=column_oriented, settings=settings, context=context)
  488. loop = asyncio.get_running_loop()
  489. result = await loop.run_in_executor(self.executor, _insert)
  490. return result
  491. async def insert_df(self, table: str = None,
  492. df=None,
  493. database: Optional[str] = None,
  494. settings: Optional[Dict] = None,
  495. column_names: Optional[Sequence[str]] = None,
  496. column_types: Sequence[ClickHouseType] = None,
  497. column_type_names: Sequence[str] = None,
  498. context: InsertContext = None) -> QuerySummary:
  499. """
  500. Insert a pandas DataFrame into ClickHouse. If context is specified arguments other than df are ignored
  501. :param table: ClickHouse table
  502. :param df: two-dimensional pandas dataframe
  503. :param database: Optional ClickHouse database
  504. :param settings: Optional dictionary of ClickHouse settings (key/string values)
  505. :param column_names: An optional list of ClickHouse column names. If not set, the DataFrame column names
  506. will be used
  507. :param column_types: ClickHouse column types. If set then column data does not need to be retrieved from
  508. the server
  509. :param column_type_names: ClickHouse column type names. If set then column data does not need to be
  510. retrieved from the server
  511. :param context: Optional reusable insert context to allow repeated inserts into the same table with
  512. different data batches
  513. :return: QuerySummary with summary information, throws exception if insert fails
  514. """
  515. def _insert_df():
  516. return self.client.insert_df(table=table, df=df, database=database, settings=settings,
  517. column_names=column_names,
  518. column_types=column_types, column_type_names=column_type_names,
  519. context=context)
  520. loop = asyncio.get_running_loop()
  521. result = await loop.run_in_executor(self.executor, _insert_df)
  522. return result
  523. async def insert_arrow(self, table: str,
  524. arrow_table, database: str = None,
  525. settings: Optional[Dict] = None) -> QuerySummary:
  526. """
  527. Insert a PyArrow table DataFrame into ClickHouse using raw Arrow format
  528. :param table: ClickHouse table
  529. :param arrow_table: PyArrow Table object
  530. :param database: Optional ClickHouse database
  531. :param settings: Optional dictionary of ClickHouse settings (key/string values)
  532. :return: QuerySummary with summary information, throws exception if insert fails
  533. """
  534. def _insert_arrow():
  535. return self.client.insert_arrow(table=table, arrow_table=arrow_table, database=database, settings=settings)
  536. loop = asyncio.get_running_loop()
  537. result = await loop.run_in_executor(self.executor, _insert_arrow)
  538. return result
  539. async def create_insert_context(self,
  540. table: str,
  541. column_names: Optional[Union[str, Sequence[str]]] = None,
  542. database: Optional[str] = None,
  543. column_types: Sequence[ClickHouseType] = None,
  544. column_type_names: Sequence[str] = None,
  545. column_oriented: bool = False,
  546. settings: Optional[Dict[str, Any]] = None,
  547. data: Optional[Sequence[Sequence[Any]]] = None) -> InsertContext:
  548. """
  549. Builds a reusable insert context to hold state for a duration of an insert
  550. :param table: Target table
  551. :param database: Target database. If not set, uses the client default database
  552. :param column_names: Optional ordered list of column names. If not set, all columns ('*') will be assumed
  553. in the order specified by the table definition
  554. :param database: Target database -- will use client default database if not specified
  555. :param column_types: ClickHouse column types. Optional Sequence of ClickHouseType objects. If neither column
  556. types nor column type names are set, actual column types will be retrieved from the server.
  557. :param column_type_names: ClickHouse column type names. Specified column types by name string
  558. :param column_oriented: If true the data is already "pivoted" in column form
  559. :param settings: Optional dictionary of ClickHouse settings (key/string values)
  560. :param data: Initial dataset for insert
  561. :return Reusable insert context
  562. """
  563. def _create_insert_context():
  564. return self.client.create_insert_context(table=table, column_names=column_names, database=database,
  565. column_types=column_types, column_type_names=column_type_names,
  566. column_oriented=column_oriented, settings=settings, data=data)
  567. loop = asyncio.get_running_loop()
  568. result = await loop.run_in_executor(self.executor, _create_insert_context)
  569. return result
  570. async def data_insert(self, context: InsertContext) -> QuerySummary:
  571. """
  572. Subclass implementation of the data insert
  573. :context: InsertContext parameter object
  574. :return: No return, throws an exception if the insert fails
  575. """
  576. def _data_insert():
  577. return self.client.data_insert(context=context)
  578. loop = asyncio.get_running_loop()
  579. result = await loop.run_in_executor(self.executor, _data_insert)
  580. return result
  581. async def raw_insert(self, table: str,
  582. column_names: Optional[Sequence[str]] = None,
  583. insert_block: Union[str, bytes, Generator[bytes, None, None], BinaryIO] = None,
  584. settings: Optional[Dict] = None,
  585. fmt: Optional[str] = None,
  586. compression: Optional[str] = None) -> QuerySummary:
  587. """
  588. Insert data already formatted in a bytes object
  589. :param table: Table name (whether qualified with the database name or not)
  590. :param column_names: Sequence of column names
  591. :param insert_block: Binary or string data already in a recognized ClickHouse format
  592. :param settings: Optional dictionary of ClickHouse settings (key/string values)
  593. :param compression: Recognized ClickHouse `Accept-Encoding` header compression value
  594. :param fmt: Valid clickhouse format
  595. """
  596. def _raw_insert():
  597. return self.client.raw_insert(table=table, column_names=column_names, insert_block=insert_block,
  598. settings=settings, fmt=fmt, compression=compression)
  599. loop = asyncio.get_running_loop()
  600. result = await loop.run_in_executor(self.executor, _raw_insert)
  601. return result