Browse Source

Intermediate changes

robot-piglet 11 months ago
parent
commit
32db6a72f8

+ 1 - 1
contrib/python/clickhouse-connect/.dist-info/METADATA

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Metadata-Version: 2.1
 Name: clickhouse-connect
 Name: clickhouse-connect
-Version: 0.7.3
+Version: 0.7.4
 Summary: ClickHouse Database Core Driver for Python, Pandas, and Superset
 Summary: ClickHouse Database Core Driver for Python, Pandas, and Superset
 Home-page: https://github.com/ClickHouse/clickhouse-connect
 Home-page: https://github.com/ClickHouse/clickhouse-connect
 Author: ClickHouse Inc.
 Author: ClickHouse Inc.

+ 1 - 1
contrib/python/clickhouse-connect/clickhouse_connect/__version__.py

@@ -1 +1 @@
-version = '0.7.3'
+version = '0.7.4'

+ 39 - 8
contrib/python/clickhouse-connect/clickhouse_connect/driver/client.py

@@ -19,7 +19,8 @@ from clickhouse_connect.driver.external import ExternalData
 from clickhouse_connect.driver.insert import InsertContext
 from clickhouse_connect.driver.insert import InsertContext
 from clickhouse_connect.driver.summary import QuerySummary
 from clickhouse_connect.driver.summary import QuerySummary
 from clickhouse_connect.driver.models import ColumnDef, SettingDef, SettingStatus
 from clickhouse_connect.driver.models import ColumnDef, SettingDef, SettingStatus
-from clickhouse_connect.driver.query import QueryResult, to_arrow, QueryContext, arrow_buffer, quote_identifier
+from clickhouse_connect.driver.query import QueryResult, to_arrow, to_arrow_batches, QueryContext, arrow_buffer, \
+    quote_identifier
 
 
 io.DEFAULT_BUFFER_SIZE = 1024 * 256
 io.DEFAULT_BUFFER_SIZE = 1024 * 256
 logger = logging.getLogger(__name__)
 logger = logging.getLogger(__name__)
@@ -255,7 +256,8 @@ class Client(ABC):
                   settings: Optional[Dict[str, Any]] = None,
                   settings: Optional[Dict[str, Any]] = None,
                   fmt: str = None,
                   fmt: str = None,
                   use_database: bool = True,
                   use_database: bool = True,
-                  external_data: Optional[ExternalData] = None) -> bytes:
+                  external_data: Optional[ExternalData] = None,
+                  stream: bool = False) -> Union[bytes, io.IOBase]:
         """
         """
         Query method that simply returns the raw ClickHouse format bytes
         Query method that simply returns the raw ClickHouse format bytes
         :param query: Query statement/format string
         :param query: Query statement/format string
@@ -348,7 +350,7 @@ class Client(ABC):
         """
         """
         Query method that returns the results as a StreamContext.  For parameter values, see the
         Query method that returns the results as a StreamContext.  For parameter values, see the
         create_query_context method
         create_query_context method
-        :return: Pandas dataframe representing the result set
+        :return: Generator that yields a Pandas dataframe per block representing the result set
         """
         """
         return self._context_query(locals(), use_numpy=True,
         return self._context_query(locals(), use_numpy=True,
                                    as_pandas=True,
                                    as_pandas=True,
@@ -462,6 +464,39 @@ class Client(ABC):
         :param external_data ClickHouse "external data" to send with query
         :param external_data ClickHouse "external data" to send with query
         :return: PyArrow.Table
         :return: PyArrow.Table
         """
         """
+        settings = self._update_arrow_settings(settings, use_strings)
+        return to_arrow(self.raw_query(query,
+                                       parameters,
+                                       settings,
+                                       fmt='Arrow',
+                                       external_data=external_data))
+
+    def query_arrow_stream(self,
+                           query: str,
+                           parameters: Optional[Union[Sequence, Dict[str, Any]]] = None,
+                           settings: Optional[Dict[str, Any]] = None,
+                           use_strings: Optional[bool] = None,
+                           external_data: Optional[ExternalData] = None) -> StreamContext:
+        """
+        Query method that returns the results as a stream of Arrow tables
+        :param query: Query statement/format string
+        :param parameters: Optional dictionary used to format the query
+        :param settings: Optional dictionary of ClickHouse settings (key/string values)
+        :param use_strings:  Convert ClickHouse String type to Arrow string type (instead of binary)
+        :param external_data ClickHouse "external data" to send with query
+        :return: Generator that yields a PyArrow.Table for per block representing the result set
+        """
+        settings = self._update_arrow_settings(settings, use_strings)
+        return to_arrow_batches(self.raw_query(query,
+                                               parameters,
+                                               settings,
+                                               fmt='ArrowStream',
+                                               external_data=external_data,
+                                               stream=True))
+
+    def _update_arrow_settings(self,
+                               settings: Optional[Dict[str, Any]],
+                               use_strings: Optional[bool]) -> Dict[str, Any]:
         settings = dict_copy(settings)
         settings = dict_copy(settings)
         if self.database:
         if self.database:
             settings['database'] = self.database
             settings['database'] = self.database
@@ -473,11 +508,7 @@ class Client(ABC):
             if not str_status.is_writable:
             if not str_status.is_writable:
                 raise OperationalError(f'Cannot change readonly {arrow_str_setting} to {use_strings}')
                 raise OperationalError(f'Cannot change readonly {arrow_str_setting} to {use_strings}')
             settings[arrow_str_setting] = '1' if use_strings else '0'
             settings[arrow_str_setting] = '1' if use_strings else '0'
-        return to_arrow(self.raw_query(query,
-                                       parameters,
-                                       settings,
-                                       fmt='Arrow',
-                                       external_data=external_data))
+        return settings
 
 
     @abstractmethod
     @abstractmethod
     def command(self,
     def command(self,

+ 1 - 1
contrib/python/clickhouse-connect/clickhouse_connect/driver/common.py

@@ -125,7 +125,7 @@ def coerce_int(val: Optional[Union[str, int]]) -> int:
 def coerce_bool(val: Optional[Union[str, bool]]):
 def coerce_bool(val: Optional[Union[str, bool]]):
     if not val:
     if not val:
         return False
         return False
-    return val in (True, 'True', 'true', '1')
+    return val is True or (isinstance(val, str) and val.lower() in ('true', '1', 'y', 'yes'))
 
 
 
 
 class SliceView(Sequence):
 class SliceView(Sequence):

+ 1 - 1
contrib/python/clickhouse-connect/clickhouse_connect/driver/dataconv.py

@@ -122,7 +122,7 @@ def write_str_col(column: Sequence, nullable: bool, encoding: Optional[str], des
             if encoding:
             if encoding:
                 x = x.encode(encoding)
                 x = x.encode(encoding)
             else:
             else:
-                x = b''
+                x = bytes(x)
             sz = len(x)
             sz = len(x)
             while True:
             while True:
                 b = sz & 0x7f
                 b = sz & 0x7f

+ 7 - 3
contrib/python/clickhouse-connect/clickhouse_connect/driver/httpclient.py

@@ -449,8 +449,11 @@ class HttpClient(Client):
 
 
     def raw_query(self, query: str,
     def raw_query(self, query: str,
                   parameters: Optional[Union[Sequence, Dict[str, Any]]] = None,
                   parameters: Optional[Union[Sequence, Dict[str, Any]]] = None,
-                  settings: Optional[Dict[str, Any]] = None, fmt: str = None,
-                  use_database: bool = True, external_data: Optional[ExternalData] = None) -> bytes:
+                  settings: Optional[Dict[str, Any]] = None,
+                  fmt: str = None,
+                  use_database: bool = True,
+                  external_data: Optional[ExternalData] = None,
+                  stream: bool = False) -> Union[bytes, HTTPResponse]:
         """
         """
         See BaseClient doc_string for this method
         See BaseClient doc_string for this method
         """
         """
@@ -469,7 +472,8 @@ class HttpClient(Client):
         else:
         else:
             body = final_query
             body = final_query
             fields = None
             fields = None
-        return self._raw_request(body, params, fields=fields).data
+        response = self._raw_request(body, params, fields=fields, stream=stream)
+        return response if stream else response.data
 
 
     def close(self):
     def close(self):
         if self._owns_pool_manager:
         if self._owns_pool_manager:

+ 7 - 0
contrib/python/clickhouse-connect/clickhouse_connect/driver/query.py

@@ -5,6 +5,7 @@ import uuid
 import pytz
 import pytz
 
 
 from enum import Enum
 from enum import Enum
+from io import IOBase
 from typing import Any, Tuple, Dict, Sequence, Optional, Union, Generator
 from typing import Any, Tuple, Dict, Sequence, Optional, Union, Generator
 from datetime import date, datetime, tzinfo
 from datetime import date, datetime, tzinfo
 
 
@@ -489,6 +490,12 @@ def to_arrow(content: bytes):
     return reader.read_all()
     return reader.read_all()
 
 
 
 
+def to_arrow_batches(buffer: IOBase) -> StreamContext:
+    pyarrow = check_arrow()
+    reader = pyarrow.ipc.open_stream(buffer)
+    return StreamContext(buffer, reader)
+
+
 def arrow_buffer(table) -> Tuple[Sequence[str], bytes]:
 def arrow_buffer(table) -> Tuple[Sequence[str], bytes]:
     pyarrow = check_arrow()
     pyarrow = check_arrow()
     sink = pyarrow.BufferOutputStream()
     sink = pyarrow.BufferOutputStream()

+ 1 - 1
contrib/python/clickhouse-connect/ya.make

@@ -2,7 +2,7 @@
 
 
 PY3_LIBRARY()
 PY3_LIBRARY()
 
 
-VERSION(0.7.3)
+VERSION(0.7.4)
 
 
 LICENSE(Apache-2.0)
 LICENSE(Apache-2.0)
 
 

+ 3 - 3
contrib/python/hypothesis/py3/.dist-info/METADATA

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Metadata-Version: 2.1
 Name: hypothesis
 Name: hypothesis
-Version: 6.99.12
+Version: 6.99.13
 Summary: A library for property-based testing
 Summary: A library for property-based testing
 Home-page: https://hypothesis.works
 Home-page: https://hypothesis.works
 Author: David R. MacIver and Zac Hatfield-Dodds
 Author: David R. MacIver and Zac Hatfield-Dodds
@@ -41,7 +41,7 @@ Requires-Dist: exceptiongroup >=1.0.0 ; python_version < "3.11"
 Provides-Extra: all
 Provides-Extra: all
 Requires-Dist: black >=19.10b0 ; extra == 'all'
 Requires-Dist: black >=19.10b0 ; extra == 'all'
 Requires-Dist: click >=7.0 ; extra == 'all'
 Requires-Dist: click >=7.0 ; extra == 'all'
-Requires-Dist: crosshair-tool >=0.0.51 ; extra == 'all'
+Requires-Dist: crosshair-tool >=0.0.53 ; extra == 'all'
 Requires-Dist: django >=3.2 ; extra == 'all'
 Requires-Dist: django >=3.2 ; extra == 'all'
 Requires-Dist: dpcontracts >=0.4 ; extra == 'all'
 Requires-Dist: dpcontracts >=0.4 ; extra == 'all'
 Requires-Dist: hypothesis-crosshair >=0.0.2 ; extra == 'all'
 Requires-Dist: hypothesis-crosshair >=0.0.2 ; extra == 'all'
@@ -64,7 +64,7 @@ Provides-Extra: codemods
 Requires-Dist: libcst >=0.3.16 ; extra == 'codemods'
 Requires-Dist: libcst >=0.3.16 ; extra == 'codemods'
 Provides-Extra: crosshair
 Provides-Extra: crosshair
 Requires-Dist: hypothesis-crosshair >=0.0.2 ; extra == 'crosshair'
 Requires-Dist: hypothesis-crosshair >=0.0.2 ; extra == 'crosshair'
-Requires-Dist: crosshair-tool >=0.0.51 ; extra == 'crosshair'
+Requires-Dist: crosshair-tool >=0.0.53 ; extra == 'crosshair'
 Provides-Extra: dateutil
 Provides-Extra: dateutil
 Requires-Dist: python-dateutil >=1.4 ; extra == 'dateutil'
 Requires-Dist: python-dateutil >=1.4 ; extra == 'dateutil'
 Provides-Extra: django
 Provides-Extra: django

+ 11 - 8
contrib/python/hypothesis/py3/hypothesis/core.py

@@ -786,7 +786,6 @@ class StateForActualGivenExecution:
         self.explain_traces = defaultdict(set)
         self.explain_traces = defaultdict(set)
         self._start_timestamp = time.time()
         self._start_timestamp = time.time()
         self._string_repr = ""
         self._string_repr = ""
-        self._jsonable_arguments = {}
         self._timing_features = {}
         self._timing_features = {}
 
 
     @property
     @property
@@ -913,7 +912,7 @@ class StateForActualGivenExecution:
                     ),
                     ),
                 )
                 )
                 self._string_repr = printer.getvalue()
                 self._string_repr = printer.getvalue()
-                self._jsonable_arguments = {
+                data._observability_arguments = {
                     **dict(enumerate(map(to_jsonable, args))),
                     **dict(enumerate(map(to_jsonable, args))),
                     **{k: to_jsonable(v) for k, v in kwargs.items()},
                     **{k: to_jsonable(v) for k, v in kwargs.items()},
                 }
                 }
@@ -1085,19 +1084,23 @@ class StateForActualGivenExecution:
             # Conditional here so we can save some time constructing the payload; in
             # Conditional here so we can save some time constructing the payload; in
             # other cases (without coverage) it's cheap enough to do that regardless.
             # other cases (without coverage) it's cheap enough to do that regardless.
             if TESTCASE_CALLBACKS:
             if TESTCASE_CALLBACKS:
-                if self.failed_normally or self.failed_due_to_deadline:
-                    phase = "shrink"
-                elif runner := getattr(self, "_runner", None):
+                if runner := getattr(self, "_runner", None):
                     phase = runner._current_phase
                     phase = runner._current_phase
+                elif self.failed_normally or self.failed_due_to_deadline:
+                    phase = "shrink"
                 else:  # pragma: no cover  # in case of messing with internals
                 else:  # pragma: no cover  # in case of messing with internals
                     phase = "unknown"
                     phase = "unknown"
+                backend_desc = f", using backend={self.settings.backend!r}" * (
+                    self.settings.backend != "hypothesis"
+                    and not getattr(runner, "_switch_to_hypothesis_provider", False)
+                )
                 tc = make_testcase(
                 tc = make_testcase(
                     start_timestamp=self._start_timestamp,
                     start_timestamp=self._start_timestamp,
                     test_name_or_nodeid=self.test_identifier,
                     test_name_or_nodeid=self.test_identifier,
                     data=data,
                     data=data,
-                    how_generated=f"generated during {phase} phase",
+                    how_generated=f"during {phase} phase{backend_desc}",
                     string_repr=self._string_repr,
                     string_repr=self._string_repr,
-                    arguments={**self._jsonable_arguments, **data._observability_args},
+                    arguments=data._observability_args,
                     timing=self._timing_features,
                     timing=self._timing_features,
                     coverage=tractable_coverage_report(trace) or None,
                     coverage=tractable_coverage_report(trace) or None,
                     phase=phase,
                     phase=phase,
@@ -1217,7 +1220,7 @@ class StateForActualGivenExecution:
                     "status": "passed" if sys.exc_info()[0] else "failed",
                     "status": "passed" if sys.exc_info()[0] else "failed",
                     "status_reason": str(origin or "unexpected/flaky pass"),
                     "status_reason": str(origin or "unexpected/flaky pass"),
                     "representation": self._string_repr,
                     "representation": self._string_repr,
-                    "arguments": self._jsonable_arguments,
+                    "arguments": ran_example._observability_args,
                     "how_generated": "minimal failing example",
                     "how_generated": "minimal failing example",
                     "features": {
                     "features": {
                         **{
                         **{

Some files were not shown because too many files changed in this diff