Browse Source

Intermediate changes

robot-piglet 10 months ago
parent
commit
f22ed1308c

+ 3 - 1
contrib/python/clickhouse-connect/.dist-info/METADATA

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: clickhouse-connect
-Version: 0.7.7
+Version: 0.7.8
 Summary: ClickHouse Database Core Driver for Python, Pandas, and Superset
 Home-page: https://github.com/ClickHouse/clickhouse-connect
 Author: ClickHouse Inc.
@@ -33,6 +33,8 @@ Provides-Extra: pandas
 Requires-Dist: pandas ; extra == 'pandas'
 Provides-Extra: sqlalchemy
 Requires-Dist: sqlalchemy <2.0,>1.3.21 ; extra == 'sqlalchemy'
+Provides-Extra: tzlocal
+Requires-Dist: tzlocal ; extra == 'tzlocal'
 
 ## ClickHouse Connect
 

+ 1 - 1
contrib/python/clickhouse-connect/clickhouse_connect/__version__.py

@@ -1 +1 @@
-version = '0.7.7'
+version = '0.7.8'

+ 13 - 6
contrib/python/clickhouse-connect/clickhouse_connect/driver/client.py

@@ -1,6 +1,6 @@
 import io
 import logging
-from datetime import tzinfo, datetime
+from datetime import tzinfo
 
 import pytz
 
@@ -12,6 +12,7 @@ from clickhouse_connect import common
 from clickhouse_connect.common import version
 from clickhouse_connect.datatypes.registry import get_from_name
 from clickhouse_connect.datatypes.base import ClickHouseType
+from clickhouse_connect.driver import tzutil
 from clickhouse_connect.driver.common import dict_copy, StreamContext, coerce_int, coerce_bool
 from clickhouse_connect.driver.constants import CH_VERSION_WITH_PROTOCOL, PROTOCOL_VERSION_WITH_LOW_CARD
 from clickhouse_connect.driver.exceptions import ProgrammingError, OperationalError
@@ -39,6 +40,7 @@ class Client(ABC):
     optional_transport_settings = set()
     database = None
     max_error_message = 0
+    apply_server_timezone = False
 
     def __init__(self,
                  database: str,
@@ -56,16 +58,21 @@ class Client(ABC):
         self.query_limit = coerce_int(query_limit)
         self.query_retries = coerce_int(query_retries)
         self.server_host_name = server_host_name
-        self.server_tz = pytz.UTC
+        self.server_tz, dst_safe = pytz.UTC, True
         self.server_version, server_tz = \
             tuple(self.command('SELECT version(), timezone()', use_database=False))
         try:
-            self.server_tz = pytz.timezone(server_tz)
+            server_tz = pytz.timezone(server_tz)
+            server_tz, dst_safe = tzutil.normalize_timezone(server_tz)
+            if apply_server_timezone is None:
+                apply_server_timezone = dst_safe
+            self.apply_server_timezone = apply_server_timezone == 'always' or coerce_bool(apply_server_timezone)
         except UnknownTimeZoneError:
             logger.warning('Warning, server is using an unrecognized timezone %s, will use UTC default', server_tz)
-        offsets_differ = datetime.now().astimezone().utcoffset() != datetime.now(tz=self.server_tz).utcoffset()
-        self.apply_server_timezone = apply_server_timezone == 'always' or (
-                coerce_bool(apply_server_timezone) and offsets_differ)
+
+        if not self.apply_server_timezone and not tzutil.local_tz_dst_safe:
+            logger.warning('local timezone %s may return unexpected times due to Daylight Savings Time/' +
+                           'Summer Time differences', tzutil.local_tz.tzname())
         readonly = 'readonly'
         if not self.min_version('19.17'):
             readonly = common.get_setting('readonly')

+ 0 - 14
contrib/python/clickhouse-connect/clickhouse_connect/driver/context.py

@@ -1,10 +1,7 @@
 import logging
 import re
-from datetime import datetime
 from typing import Optional, Dict, Union, Any
 
-import pytz
-
 logger = logging.getLogger(__name__)
 
 _empty_map = {}
@@ -12,7 +9,6 @@ _empty_map = {}
 
 # pylint: disable=too-many-instance-attributes
 class BaseQueryContext:
-    local_tz: pytz.timezone
 
     def __init__(self,
                  settings: Optional[Dict[str, Any]] = None,
@@ -60,13 +56,3 @@ class BaseQueryContext:
             if type_pattern.match(ch_type):
                 return fmt
         return None
-
-
-def _init_context_cls():
-    local_tz = datetime.now().astimezone().tzinfo
-    if local_tz.tzname(datetime.now()) in ('UTC', 'GMT', 'Universal', 'GMT-0', 'Zulu', 'Greenwich'):
-        local_tz = pytz.UTC
-    BaseQueryContext.local_tz = local_tz
-
-
-_init_context_cls()

+ 1 - 1
contrib/python/clickhouse-connect/clickhouse_connect/driver/httpclient.py

@@ -68,7 +68,7 @@ class HttpClient(Client):
                  http_proxy: Optional[str] = None,
                  https_proxy: Optional[str] = None,
                  server_host_name: Optional[str] = None,
-                 apply_server_timezone: Optional[Union[str, bool]] = True):
+                 apply_server_timezone: Optional[Union[str, bool]] = None):
         """
         Create an HTTP ClickHouse Connect client
         See clickhouse_connect.get_client for parameters

+ 5 - 2
contrib/python/clickhouse-connect/clickhouse_connect/driver/npquery.py

@@ -96,10 +96,13 @@ class NumpyResult(Closable):
     def close_df(self):
         if self._block_gen is None:
             raise StreamClosedError
-        chains = [itertools.chain(b) for b in zip(*self._block_gen)]
+        bg = self._block_gen
+        chain = itertools.chain
+        chains = [chain(b) for b in zip(*bg)]
         new_df_series = []
         for c in chains:
-            new_df_series.append(pd.concat([pd.Series(piece, copy=False) for piece in c], copy=False))
+            new_df_series.append(pd.concat([pd.Series(piece, copy=False) for piece in c],
+                                           copy=False, ignore_index=True))
         self._df_result = pd.DataFrame(dict(zip(self.column_names, new_df_series)))
         self.close()
         return self

+ 3 - 3
contrib/python/clickhouse-connect/clickhouse_connect/driver/query.py

@@ -12,6 +12,7 @@ from datetime import date, datetime, tzinfo
 from pytz.exceptions import UnknownTimeZoneError
 
 from clickhouse_connect import common
+from clickhouse_connect.driver import tzutil
 from clickhouse_connect.driver.common import dict_copy, empty_gen, StreamContext
 from clickhouse_connect.driver.external import ExternalData
 from clickhouse_connect.driver.types import Matrix, Closable
@@ -170,7 +171,7 @@ class QueryContext(BaseQueryContext):
         elif self.apply_server_tz:
             active_tz = self.server_tz
         else:
-            active_tz = self.local_tz
+            active_tz = tzutil.local_tz
         if active_tz == pytz.UTC:
             return None
         return active_tz
@@ -440,8 +441,7 @@ def format_bind_value(value: Any, server_tz: tzinfo = pytz.UTC, top_level: bool
             return escape_str(value)
         return format_str(value)
     if isinstance(value, datetime):
-        if value.tzinfo is None:
-            value = value.replace(tzinfo=server_tz)
+        value = value.astimezone(server_tz)
         val = value.strftime('%Y-%m-%d %H:%M:%S')
         if top_level:
             return val

+ 41 - 0
contrib/python/clickhouse-connect/clickhouse_connect/driver/tzutil.py

@@ -0,0 +1,41 @@
+import os
+from datetime import datetime
+from typing import Tuple
+
+import pytz
+
+tzlocal = None
+try:
+    import tzlocal  # Maybe we can use the tzlocal module to get a safe timezone
+except ImportError:
+    pass
+
+# Set the local timezone for DateTime conversions.  Note in most cases we want to use either UTC or the server
+# timezone, but if someone insists on using the local timezone we will try to convert.  The problem is we
+# never have anything but an epoch timestamp returned from ClickHouse, so attempts to convert times when the
+# local timezone is "DST" aware (like 'CEST' vs 'CET') will be wrong approximately half the time
+local_tz: pytz.timezone
+local_tz_dst_safe: bool = False
+
+
+def normalize_timezone(timezone: pytz.timezone) -> Tuple[pytz.timezone, bool]:
+    if timezone.tzname(None) in ('UTC', 'GMT', 'Universal', 'GMT-0', 'Zulu', 'Greenwich'):
+        return pytz.UTC, True
+
+    if timezone.tzname(None) in pytz.common_timezones:
+        return timezone, True
+
+    if tzlocal is not None:  # Maybe we can use the tzlocal module to get a safe timezone
+        local_name = tzlocal.get_localzone_name()
+        if local_name in pytz.common_timezones:
+            return pytz.timezone(local_name), True
+
+    return timezone, False
+
+
+try:
+    local_tz = pytz.timezone(os.environ.get('TZ', ''))
+except pytz.UnknownTimeZoneError:
+    local_tz = datetime.now().astimezone().tzinfo
+
+local_tz, local_tz_dst_safe = normalize_timezone(local_tz)

+ 2 - 1
contrib/python/clickhouse-connect/ya.make

@@ -2,7 +2,7 @@
 
 PY3_LIBRARY()
 
-VERSION(0.7.7)
+VERSION(0.7.8)
 
 LICENSE(Apache-2.0)
 
@@ -78,6 +78,7 @@ PY_SRCS(
     clickhouse_connect/driver/tools.py
     clickhouse_connect/driver/transform.py
     clickhouse_connect/driver/types.py
+    clickhouse_connect/driver/tzutil.py
     clickhouse_connect/driverc/__init__.py
     clickhouse_connect/entry_points.py
     clickhouse_connect/json_impl.py