Browse Source

Intermediate changes

robot-piglet 11 months ago
parent
commit
32db6a72f8

+ 1 - 1
contrib/python/clickhouse-connect/.dist-info/METADATA

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: clickhouse-connect
-Version: 0.7.3
+Version: 0.7.4
 Summary: ClickHouse Database Core Driver for Python, Pandas, and Superset
 Home-page: https://github.com/ClickHouse/clickhouse-connect
 Author: ClickHouse Inc.

+ 1 - 1
contrib/python/clickhouse-connect/clickhouse_connect/__version__.py

@@ -1 +1 @@
-version = '0.7.3'
+version = '0.7.4'

+ 39 - 8
contrib/python/clickhouse-connect/clickhouse_connect/driver/client.py

@@ -19,7 +19,8 @@ from clickhouse_connect.driver.external import ExternalData
 from clickhouse_connect.driver.insert import InsertContext
 from clickhouse_connect.driver.summary import QuerySummary
 from clickhouse_connect.driver.models import ColumnDef, SettingDef, SettingStatus
-from clickhouse_connect.driver.query import QueryResult, to_arrow, QueryContext, arrow_buffer, quote_identifier
+from clickhouse_connect.driver.query import QueryResult, to_arrow, to_arrow_batches, QueryContext, arrow_buffer, \
+    quote_identifier
 
 io.DEFAULT_BUFFER_SIZE = 1024 * 256
 logger = logging.getLogger(__name__)
@@ -255,7 +256,8 @@ class Client(ABC):
                   settings: Optional[Dict[str, Any]] = None,
                   fmt: str = None,
                   use_database: bool = True,
-                  external_data: Optional[ExternalData] = None) -> bytes:
+                  external_data: Optional[ExternalData] = None,
+                  stream: bool = False) -> Union[bytes, io.IOBase]:
         """
         Query method that simply returns the raw ClickHouse format bytes
         :param query: Query statement/format string
@@ -348,7 +350,7 @@ class Client(ABC):
         """
         Query method that returns the results as a StreamContext.  For parameter values, see the
         create_query_context method
-        :return: Pandas dataframe representing the result set
+        :return: Generator that yields a Pandas dataframe per block representing the result set
         """
         return self._context_query(locals(), use_numpy=True,
                                    as_pandas=True,
@@ -462,6 +464,39 @@ class Client(ABC):
         :param external_data ClickHouse "external data" to send with query
         :return: PyArrow.Table
         """
+        settings = self._update_arrow_settings(settings, use_strings)
+        return to_arrow(self.raw_query(query,
+                                       parameters,
+                                       settings,
+                                       fmt='Arrow',
+                                       external_data=external_data))
+
+    def query_arrow_stream(self,
+                           query: str,
+                           parameters: Optional[Union[Sequence, Dict[str, Any]]] = None,
+                           settings: Optional[Dict[str, Any]] = None,
+                           use_strings: Optional[bool] = None,
+                           external_data: Optional[ExternalData] = None) -> StreamContext:
+        """
+        Query method that returns the results as a stream of Arrow tables
+        :param query: Query statement/format string
+        :param parameters: Optional dictionary used to format the query
+        :param settings: Optional dictionary of ClickHouse settings (key/string values)
+        :param use_strings:  Convert ClickHouse String type to Arrow string type (instead of binary)
+        :param external_data ClickHouse "external data" to send with query
+        :return: Generator that yields a PyArrow.Table for per block representing the result set
+        """
+        settings = self._update_arrow_settings(settings, use_strings)
+        return to_arrow_batches(self.raw_query(query,
+                                               parameters,
+                                               settings,
+                                               fmt='ArrowStream',
+                                               external_data=external_data,
+                                               stream=True))
+
+    def _update_arrow_settings(self,
+                               settings: Optional[Dict[str, Any]],
+                               use_strings: Optional[bool]) -> Dict[str, Any]:
         settings = dict_copy(settings)
         if self.database:
             settings['database'] = self.database
@@ -473,11 +508,7 @@ class Client(ABC):
             if not str_status.is_writable:
                 raise OperationalError(f'Cannot change readonly {arrow_str_setting} to {use_strings}')
             settings[arrow_str_setting] = '1' if use_strings else '0'
-        return to_arrow(self.raw_query(query,
-                                       parameters,
-                                       settings,
-                                       fmt='Arrow',
-                                       external_data=external_data))
+        return settings
 
     @abstractmethod
     def command(self,

+ 1 - 1
contrib/python/clickhouse-connect/clickhouse_connect/driver/common.py

@@ -125,7 +125,7 @@ def coerce_int(val: Optional[Union[str, int]]) -> int:
 def coerce_bool(val: Optional[Union[str, bool]]):
     if not val:
         return False
-    return val in (True, 'True', 'true', '1')
+    return val is True or (isinstance(val, str) and val.lower() in ('true', '1', 'y', 'yes'))
 
 
 class SliceView(Sequence):

+ 1 - 1
contrib/python/clickhouse-connect/clickhouse_connect/driver/dataconv.py

@@ -122,7 +122,7 @@ def write_str_col(column: Sequence, nullable: bool, encoding: Optional[str], des
             if encoding:
                 x = x.encode(encoding)
             else:
-                x = b''
+                x = bytes(x)
             sz = len(x)
             while True:
                 b = sz & 0x7f

+ 7 - 3
contrib/python/clickhouse-connect/clickhouse_connect/driver/httpclient.py

@@ -449,8 +449,11 @@ class HttpClient(Client):
 
     def raw_query(self, query: str,
                   parameters: Optional[Union[Sequence, Dict[str, Any]]] = None,
-                  settings: Optional[Dict[str, Any]] = None, fmt: str = None,
-                  use_database: bool = True, external_data: Optional[ExternalData] = None) -> bytes:
+                  settings: Optional[Dict[str, Any]] = None,
+                  fmt: str = None,
+                  use_database: bool = True,
+                  external_data: Optional[ExternalData] = None,
+                  stream: bool = False) -> Union[bytes, HTTPResponse]:
         """
         See BaseClient doc_string for this method
         """
@@ -469,7 +472,8 @@ class HttpClient(Client):
         else:
             body = final_query
             fields = None
-        return self._raw_request(body, params, fields=fields).data
+        response = self._raw_request(body, params, fields=fields, stream=stream)
+        return response if stream else response.data
 
     def close(self):
         if self._owns_pool_manager:

+ 7 - 0
contrib/python/clickhouse-connect/clickhouse_connect/driver/query.py

@@ -5,6 +5,7 @@ import uuid
 import pytz
 
 from enum import Enum
+from io import IOBase
 from typing import Any, Tuple, Dict, Sequence, Optional, Union, Generator
 from datetime import date, datetime, tzinfo
 
@@ -489,6 +490,12 @@ def to_arrow(content: bytes):
     return reader.read_all()
 
 
+def to_arrow_batches(buffer: IOBase) -> StreamContext:
+    pyarrow = check_arrow()
+    reader = pyarrow.ipc.open_stream(buffer)
+    return StreamContext(buffer, reader)
+
+
 def arrow_buffer(table) -> Tuple[Sequence[str], bytes]:
     pyarrow = check_arrow()
     sink = pyarrow.BufferOutputStream()

+ 1 - 1
contrib/python/clickhouse-connect/ya.make

@@ -2,7 +2,7 @@
 
 PY3_LIBRARY()
 
-VERSION(0.7.3)
+VERSION(0.7.4)
 
 LICENSE(Apache-2.0)
 

+ 3 - 3
contrib/python/hypothesis/py3/.dist-info/METADATA

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: hypothesis
-Version: 6.99.12
+Version: 6.99.13
 Summary: A library for property-based testing
 Home-page: https://hypothesis.works
 Author: David R. MacIver and Zac Hatfield-Dodds
@@ -41,7 +41,7 @@ Requires-Dist: exceptiongroup >=1.0.0 ; python_version < "3.11"
 Provides-Extra: all
 Requires-Dist: black >=19.10b0 ; extra == 'all'
 Requires-Dist: click >=7.0 ; extra == 'all'
-Requires-Dist: crosshair-tool >=0.0.51 ; extra == 'all'
+Requires-Dist: crosshair-tool >=0.0.53 ; extra == 'all'
 Requires-Dist: django >=3.2 ; extra == 'all'
 Requires-Dist: dpcontracts >=0.4 ; extra == 'all'
 Requires-Dist: hypothesis-crosshair >=0.0.2 ; extra == 'all'
@@ -64,7 +64,7 @@ Provides-Extra: codemods
 Requires-Dist: libcst >=0.3.16 ; extra == 'codemods'
 Provides-Extra: crosshair
 Requires-Dist: hypothesis-crosshair >=0.0.2 ; extra == 'crosshair'
-Requires-Dist: crosshair-tool >=0.0.51 ; extra == 'crosshair'
+Requires-Dist: crosshair-tool >=0.0.53 ; extra == 'crosshair'
 Provides-Extra: dateutil
 Requires-Dist: python-dateutil >=1.4 ; extra == 'dateutil'
 Provides-Extra: django

+ 11 - 8
contrib/python/hypothesis/py3/hypothesis/core.py

@@ -786,7 +786,6 @@ class StateForActualGivenExecution:
         self.explain_traces = defaultdict(set)
         self._start_timestamp = time.time()
         self._string_repr = ""
-        self._jsonable_arguments = {}
         self._timing_features = {}
 
     @property
@@ -913,7 +912,7 @@ class StateForActualGivenExecution:
                     ),
                 )
                 self._string_repr = printer.getvalue()
-                self._jsonable_arguments = {
+                data._observability_arguments = {
                     **dict(enumerate(map(to_jsonable, args))),
                     **{k: to_jsonable(v) for k, v in kwargs.items()},
                 }
@@ -1085,19 +1084,23 @@ class StateForActualGivenExecution:
             # Conditional here so we can save some time constructing the payload; in
             # other cases (without coverage) it's cheap enough to do that regardless.
             if TESTCASE_CALLBACKS:
-                if self.failed_normally or self.failed_due_to_deadline:
-                    phase = "shrink"
-                elif runner := getattr(self, "_runner", None):
+                if runner := getattr(self, "_runner", None):
                     phase = runner._current_phase
+                elif self.failed_normally or self.failed_due_to_deadline:
+                    phase = "shrink"
                 else:  # pragma: no cover  # in case of messing with internals
                     phase = "unknown"
+                backend_desc = f", using backend={self.settings.backend!r}" * (
+                    self.settings.backend != "hypothesis"
+                    and not getattr(runner, "_switch_to_hypothesis_provider", False)
+                )
                 tc = make_testcase(
                     start_timestamp=self._start_timestamp,
                     test_name_or_nodeid=self.test_identifier,
                     data=data,
-                    how_generated=f"generated during {phase} phase",
+                    how_generated=f"during {phase} phase{backend_desc}",
                     string_repr=self._string_repr,
-                    arguments={**self._jsonable_arguments, **data._observability_args},
+                    arguments=data._observability_args,
                     timing=self._timing_features,
                     coverage=tractable_coverage_report(trace) or None,
                     phase=phase,
@@ -1217,7 +1220,7 @@ class StateForActualGivenExecution:
                     "status": "passed" if sys.exc_info()[0] else "failed",
                     "status_reason": str(origin or "unexpected/flaky pass"),
                     "representation": self._string_repr,
-                    "arguments": self._jsonable_arguments,
+                    "arguments": ran_example._observability_args,
                     "how_generated": "minimal failing example",
                     "features": {
                         **{

Some files were not shown because too many files changed in this diff